]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/tests/test_upgrade.py
7aa46f9027695e914523282905749e9a27495d58
[ceph.git] / ceph / src / pybind / mgr / cephadm / tests / test_upgrade.py
1 import json
2 from unittest import mock
3
4 import pytest
5
6 from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
7 from cephadm import CephadmOrchestrator
8 from cephadm.upgrade import CephadmUpgrade, UpgradeState
9 from cephadm.ssh import HostConnectionError
10 from orchestrator import OrchestratorError, DaemonDescription
11 from .fixtures import _run_cephadm, wait, with_host, with_service, \
12 receive_agent_metadata, async_side_effect
13
14 from typing import List, Tuple, Optional
15
16
17 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
18 def test_upgrade_start(cephadm_module: CephadmOrchestrator):
19 with with_host(cephadm_module, 'test'):
20 with with_host(cephadm_module, 'test2'):
21 with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True):
22 assert wait(cephadm_module, cephadm_module.upgrade_start(
23 'image_id', None)) == 'Initiating upgrade to image_id'
24
25 assert wait(cephadm_module, cephadm_module.upgrade_status()
26 ).target_image == 'image_id'
27
28 assert wait(cephadm_module, cephadm_module.upgrade_pause()
29 ) == 'Paused upgrade to image_id'
30
31 assert wait(cephadm_module, cephadm_module.upgrade_resume()
32 ) == 'Resumed upgrade to image_id'
33
34 assert wait(cephadm_module, cephadm_module.upgrade_stop()
35 ) == 'Stopped upgrade to image_id'
36
37
38 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
39 def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator):
40 with with_host(cephadm_module, 'test'):
41 with with_host(cephadm_module, 'test2'):
42 cephadm_module.offline_hosts = set(['test2'])
43 with pytest.raises(OrchestratorError, match=r"Upgrade aborted - Some host\(s\) are currently offline: {'test2'}"):
44 cephadm_module.upgrade_start('image_id', None)
45 cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block
46
47
48 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
49 def test_upgrade_daemons_offline_hosts(cephadm_module: CephadmOrchestrator):
50 with with_host(cephadm_module, 'test'):
51 with with_host(cephadm_module, 'test2'):
52 cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
53 with mock.patch("cephadm.serve.CephadmServe._run_cephadm", side_effect=HostConnectionError('connection failure reason', 'test2', '192.168.122.1')):
54 _to_upgrade = [(DaemonDescription(daemon_type='crash', daemon_id='test2', hostname='test2'), True)]
55 with pytest.raises(HostConnectionError, match=r"connection failure reason"):
56 cephadm_module.upgrade._upgrade_daemons(_to_upgrade, 'target_image', ['digest1'])
57
58
59 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
60 def test_do_upgrade_offline_hosts(cephadm_module: CephadmOrchestrator):
61 with with_host(cephadm_module, 'test'):
62 with with_host(cephadm_module, 'test2'):
63 cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0)
64 cephadm_module.offline_hosts = set(['test2'])
65 with pytest.raises(HostConnectionError, match=r"Host\(s\) were marked offline: {'test2'}"):
66 cephadm_module.upgrade._do_upgrade()
67 cephadm_module.offline_hosts = set([]) # so remove_host doesn't fail when leaving the with_host block
68
69
70 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
71 @mock.patch("cephadm.module.CephadmOrchestrator.remove_health_warning")
72 def test_upgrade_resume_clear_health_warnings(_rm_health_warning, cephadm_module: CephadmOrchestrator):
73 with with_host(cephadm_module, 'test'):
74 with with_host(cephadm_module, 'test2'):
75 cephadm_module.upgrade.upgrade_state = UpgradeState('target_image', 0, paused=True)
76 _rm_health_warning.return_value = None
77 assert wait(cephadm_module, cephadm_module.upgrade_resume()
78 ) == 'Resumed upgrade to target_image'
79 calls_list = [mock.call(alert_id) for alert_id in cephadm_module.upgrade.UPGRADE_ERRORS]
80 _rm_health_warning.assert_has_calls(calls_list, any_order=True)
81
82
83 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
84 @pytest.mark.parametrize("use_repo_digest",
85 [
86 False,
87 True
88 ])
89 def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
90 with with_host(cephadm_module, 'host1'):
91 with with_host(cephadm_module, 'host2'):
92 cephadm_module.set_container_image('global', 'from_image')
93 cephadm_module.use_repo_digest = use_repo_digest
94 with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)),
95 CephadmOrchestrator.apply_mgr, '', status_running=True), \
96 mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name",
97 return_value='foo'), \
98 mock.patch("cephadm.module.CephadmOrchestrator.version",
99 new_callable=mock.PropertyMock) as version_mock, \
100 mock.patch("cephadm.module.CephadmOrchestrator.get",
101 return_value={
102 # capture fields in both mon and osd maps
103 "require_osd_release": "pacific",
104 "min_mon_release": 16,
105 }):
106 version_mock.return_value = 'ceph version 18.2.1 (somehash)'
107 assert wait(cephadm_module, cephadm_module.upgrade_start(
108 'to_image', None)) == 'Initiating upgrade to to_image'
109
110 assert wait(cephadm_module, cephadm_module.upgrade_status()
111 ).target_image == 'to_image'
112
113 def _versions_mock(cmd):
114 return json.dumps({
115 'mgr': {
116 'ceph version 1.2.3 (asdf) blah': 1
117 }
118 })
119
120 cephadm_module._mon_command_mock_versions = _versions_mock
121
122 with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
123 'image_id': 'image_id',
124 'repo_digests': ['to_image@repo_digest'],
125 'ceph_version': 'ceph version 18.2.3 (hash)',
126 }))):
127
128 cephadm_module.upgrade._do_upgrade()
129
130 assert cephadm_module.upgrade_status is not None
131
132 with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
133 json.dumps([
134 dict(
135 name=list(cephadm_module.cache.daemons['host1'].keys())[0],
136 style='cephadm',
137 fsid='fsid',
138 container_id='container_id',
139 container_image_name='to_image',
140 container_image_id='image_id',
141 container_image_digests=['to_image@repo_digest'],
142 deployed_by=['to_image@repo_digest'],
143 version='version',
144 state='running',
145 )
146 ])
147 )):
148 receive_agent_metadata(cephadm_module, 'host1', ['ls'])
149 receive_agent_metadata(cephadm_module, 'host2', ['ls'])
150
151 with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
152 'image_id': 'image_id',
153 'repo_digests': ['to_image@repo_digest'],
154 'ceph_version': 'ceph version 18.2.3 (hash)',
155 }))):
156 cephadm_module.upgrade._do_upgrade()
157
158 _, image, _ = cephadm_module.check_mon_command({
159 'prefix': 'config get',
160 'who': 'global',
161 'key': 'container_image',
162 })
163 if use_repo_digest:
164 assert image == 'to_image@repo_digest'
165 else:
166 assert image == 'to_image'
167
168
169 def test_upgrade_state_null(cephadm_module: CephadmOrchestrator):
170 # This test validates https://tracker.ceph.com/issues/47580
171 cephadm_module.set_store('upgrade_state', 'null')
172 CephadmUpgrade(cephadm_module)
173 assert CephadmUpgrade(cephadm_module).upgrade_state is None
174
175
176 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
177 def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator):
178 with with_host(cephadm_module, 'host1'):
179 with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''):
180 with pytest.raises(OrchestratorError):
181 wait(cephadm_module, cephadm_module.upgrade_start('image_id', None))
182
183
184 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
185 @mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
186 def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator):
187 # only 2 monitors, not enough for ok-to-stop to ever pass
188 check_mon_command.return_value = (
189 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '')
190 assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop()
191
192 # 3 monitors, ok-to-stop should work fine
193 check_mon_command.return_value = (
194 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '')
195 assert cephadm_module.upgrade._enough_mons_for_ok_to_stop()
196
197
198 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
199 @mock.patch("cephadm.module.HostCache.get_daemons_by_service")
200 @mock.patch("cephadm.CephadmOrchestrator.get")
201 def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator):
202 get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}]
203 get_daemons_by_service.side_effect = [[DaemonDescription()]]
204 assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
205 DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test'))
206
207 get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}]
208 get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
209 assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
210 DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
211
212 get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}]
213 get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
214 assert cephadm_module.upgrade._enough_mds_for_ok_to_stop(
215 DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
216
217
218 @pytest.mark.parametrize("current_version, use_tags, show_all_versions, tags, result",
219 [
220 # several candidate versions (from different major versions)
221 (
222 (16, 1, '16.1.0'),
223 False, # use_tags
224 False, # show_all_versions
225 [
226 'v17.1.0',
227 'v16.2.7',
228 'v16.2.6',
229 'v16.2.5',
230 'v16.1.4',
231 'v16.1.3',
232 'v15.2.0',
233 ],
234 ['17.1.0', '16.2.7', '16.2.6', '16.2.5', '16.1.4', '16.1.3']
235 ),
236 # candidate minor versions are available
237 (
238 (16, 1, '16.1.0'),
239 False, # use_tags
240 False, # show_all_versions
241 [
242 'v16.2.2',
243 'v16.2.1',
244 'v16.1.6',
245 ],
246 ['16.2.2', '16.2.1', '16.1.6']
247 ),
248 # all versions are less than the current version
249 (
250 (17, 2, '17.2.0'),
251 False, # use_tags
252 False, # show_all_versions
253 [
254 'v17.1.0',
255 'v16.2.7',
256 'v16.2.6',
257 ],
258 []
259 ),
260 # show all versions (regardless of the current version)
261 (
262 (16, 1, '16.1.0'),
263 False, # use_tags
264 True, # show_all_versions
265 [
266 'v17.1.0',
267 'v16.2.7',
268 'v16.2.6',
269 'v15.1.0',
270 'v14.2.0',
271 ],
272 ['17.1.0', '16.2.7', '16.2.6', '15.1.0', '14.2.0']
273 ),
274 # show all tags (regardless of the current version and show_all_versions flag)
275 (
276 (16, 1, '16.1.0'),
277 True, # use_tags
278 False, # show_all_versions
279 [
280 'v17.1.0',
281 'v16.2.7',
282 'v16.2.6',
283 'v16.2.5',
284 'v16.1.4',
285 'v16.1.3',
286 'v15.2.0',
287 ],
288 ['v15.2.0', 'v16.1.3', 'v16.1.4', 'v16.2.5',
289 'v16.2.6', 'v16.2.7', 'v17.1.0']
290 ),
291 ])
292 @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
293 def test_upgrade_ls(current_version, use_tags, show_all_versions, tags, result, cephadm_module: CephadmOrchestrator):
294 with mock.patch('cephadm.upgrade.Registry.get_tags', return_value=tags):
295 with mock.patch('cephadm.upgrade.CephadmUpgrade._get_current_version', return_value=current_version):
296 out = cephadm_module.upgrade.upgrade_ls(None, use_tags, show_all_versions)
297 if use_tags:
298 assert out['tags'] == result
299 else:
300 assert out['versions'] == result
301
302
303 @pytest.mark.parametrize(
304 "upgraded, not_upgraded, daemon_types, hosts, services, should_block",
305 # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ]
306 [
307 ( # valid, upgrade mgr daemons
308 [],
309 [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
310 ['mgr'],
311 None,
312 None,
313 False
314 ),
315 ( # invalid, can't upgrade mons until mgr is upgraded
316 [],
317 [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
318 ['mon'],
319 None,
320 None,
321 True
322 ),
323 ( # invalid, can't upgrade mon service until all mgr daemons are upgraded
324 [],
325 [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
326 None,
327 None,
328 ['mon'],
329 True
330 ),
331 ( # valid, upgrade mgr service
332 [],
333 [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
334 None,
335 None,
336 ['mgr'],
337 False
338 ),
339 ( # valid, mgr is already upgraded so can upgrade mons
340 [('mgr', 'a', 'a.x')],
341 [('mon', 'a', 'a')],
342 ['mon'],
343 None,
344 None,
345 False
346 ),
347 ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a
348 [],
349 [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
350 None,
351 ['a'],
352 None,
353 True
354 ),
355 ( # valid, only daemon on b is a mgr
356 [],
357 [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
358 None,
359 ['b'],
360 None,
361 False
362 ),
363 ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
364 [],
365 [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
366 None,
367 ['a'],
368 None,
369 True
370 ),
371 ( # valid, only upgrading the mgr on a
372 [],
373 [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
374 ['mgr'],
375 ['a'],
376 None,
377 False
378 ),
379 ( # valid, mgr daemon not on b are upgraded
380 [('mgr', 'a', 'a.x')],
381 [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
382 None,
383 ['b'],
384 None,
385 False
386 ),
387 ( # valid, all the necessary hosts are covered, mgr on c is already upgraded
388 [('mgr', 'c', 'c.z')],
389 [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')],
390 None,
391 ['a', 'b'],
392 None,
393 False
394 ),
395 ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
396 [],
397 [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
398 ['mgr', 'mon'],
399 ['a'],
400 None,
401 True
402 ),
403 ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test
404 [('mon', 'a', 'a')],
405 [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')],
406 ['mon', 'osd'],
407 ['b'],
408 None,
409 False
410 ),
411 ]
412 )
413 @mock.patch("cephadm.module.HostCache.get_daemons")
414 @mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
415 @mock.patch('cephadm.module.SpecStore.__getitem__')
416 def test_staggered_upgrade_validation(
417 get_spec,
418 get_image_info,
419 get_daemons,
420 upgraded: List[Tuple[str, str, str]],
421 not_upgraded: List[Tuple[str, str, str, str]],
422 daemon_types: Optional[str],
423 hosts: Optional[str],
424 services: Optional[str],
425 should_block: bool,
426 cephadm_module: CephadmOrchestrator,
427 ):
428 def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]:
429 dds = []
430 digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest'
431 for t in ts:
432 dds.append(DaemonDescription(daemon_type=t[0],
433 hostname=t[1],
434 daemon_id=t[2],
435 container_image_digests=[digest],
436 deployed_by=[digest],))
437 return dds
438 get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False)
439 get_image_info.side_effect = async_side_effect(
440 ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest']))
441
442 class FakeSpecDesc():
443 def __init__(self, spec):
444 self.spec = spec
445
446 def _get_spec(s):
447 return FakeSpecDesc(ServiceSpec(s))
448
449 get_spec.side_effect = _get_spec
450 if should_block:
451 with pytest.raises(OrchestratorError):
452 cephadm_module.upgrade._validate_upgrade_filters(
453 'new_image_name', daemon_types, hosts, services)
454 else:
455 cephadm_module.upgrade._validate_upgrade_filters(
456 'new_image_name', daemon_types, hosts, services)