]>
Commit | Line | Data |
---|---|---|
b3b6e05e TL |
1 | """ |
2 | Rook cluster task | |
3 | """ | |
4 | import argparse | |
5 | import configobj | |
6 | import contextlib | |
7 | import json | |
8 | import logging | |
9 | import os | |
10 | import yaml | |
11 | from io import BytesIO | |
12 | ||
13 | from tarfile import ReadError | |
14 | from tasks.ceph_manager import CephManager | |
15 | from teuthology import misc as teuthology | |
16 | from teuthology.config import config as teuth_config | |
17 | from teuthology.contextutil import safe_while | |
18 | from teuthology.orchestra import run | |
19 | from teuthology import contextutil | |
20 | from tasks.ceph import healthy | |
21 | from tasks.cephadm import update_archive_setting | |
22 | ||
23 | log = logging.getLogger(__name__) | |
24 | ||
20effc67 TL |
25 | def path_to_examples(ctx, cluster_name : str) -> str: |
26 | for p in ['rook/deploy/examples/', 'rook/cluster/examples/kubernetes/ceph/']: | |
27 | try: | |
28 | ctx.rook[cluster_name].remote.get_file(p + 'operator.yaml') | |
29 | return p | |
30 | except: | |
31 | pass | |
32 | assert False, 'Path to examples not found' | |
b3b6e05e TL |
33 | |
34 | def _kubectl(ctx, config, args, **kwargs): | |
35 | cluster_name = config.get('cluster', 'ceph') | |
36 | return ctx.rook[cluster_name].remote.run( | |
37 | args=['kubectl'] + args, | |
38 | **kwargs | |
39 | ) | |
40 | ||
41 | ||
42 | def shell(ctx, config): | |
43 | """ | |
44 | Run command(s) inside the rook tools container. | |
45 | ||
46 | tasks: | |
47 | - kubeadm: | |
48 | - rook: | |
49 | - rook.shell: | |
50 | - ceph -s | |
51 | ||
52 | or | |
53 | ||
54 | tasks: | |
55 | - kubeadm: | |
56 | - rook: | |
57 | - rook.shell: | |
58 | commands: | |
59 | - ceph -s | |
60 | ||
61 | """ | |
62 | if isinstance(config, list): | |
63 | config = {'commands': config} | |
64 | for cmd in config.get('commands', []): | |
65 | if isinstance(cmd, str): | |
66 | _shell(ctx, config, cmd.split(' ')) | |
67 | else: | |
68 | _shell(ctx, config, cmd) | |
69 | ||
70 | ||
71 | def _shell(ctx, config, args, **kwargs): | |
72 | cluster_name = config.get('cluster', 'ceph') | |
73 | return _kubectl( | |
74 | ctx, config, | |
75 | [ | |
76 | '-n', 'rook-ceph', | |
77 | 'exec', | |
78 | ctx.rook[cluster_name].toolbox, '--' | |
79 | ] + args, | |
80 | **kwargs | |
81 | ) | |
82 | ||
83 | ||
84 | @contextlib.contextmanager | |
85 | def rook_operator(ctx, config): | |
86 | cluster_name = config['cluster'] | |
87 | rook_branch = config.get('rook_branch', 'master') | |
88 | rook_git_url = config.get('rook_git_url', 'https://github.com/rook/rook') | |
89 | ||
90 | log.info(f'Cloning {rook_git_url} branch {rook_branch}') | |
91 | ctx.rook[cluster_name].remote.run( | |
92 | args=[ | |
93 | 'rm', '-rf', 'rook', | |
94 | run.Raw('&&'), | |
95 | 'git', | |
96 | 'clone', | |
97 | '--single-branch', | |
98 | '--branch', rook_branch, | |
99 | rook_git_url, | |
100 | 'rook', | |
101 | ] | |
102 | ) | |
103 | ||
104 | # operator.yaml | |
20effc67 TL |
105 | log.info(os.path.abspath(os.getcwd())) |
106 | object_methods = [method_name for method_name in dir(ctx.rook[cluster_name].remote) | |
107 | if callable(getattr(ctx.rook[cluster_name].remote, method_name))] | |
108 | log.info(object_methods) | |
b3b6e05e | 109 | operator_yaml = ctx.rook[cluster_name].remote.read_file( |
20effc67 | 110 | (path_to_examples(ctx, cluster_name) + 'operator.yaml') |
b3b6e05e TL |
111 | ) |
112 | rook_image = config.get('rook_image') | |
113 | if rook_image: | |
114 | log.info(f'Patching operator to use image {rook_image}') | |
115 | crs = list(yaml.load_all(operator_yaml, Loader=yaml.FullLoader)) | |
116 | assert len(crs) == 2 | |
117 | crs[1]['spec']['template']['spec']['containers'][0]['image'] = rook_image | |
118 | operator_yaml = yaml.dump_all(crs) | |
119 | ctx.rook[cluster_name].remote.write_file('operator.yaml', operator_yaml) | |
120 | ||
121 | op_job = None | |
122 | try: | |
123 | log.info('Deploying operator') | |
124 | _kubectl(ctx, config, [ | |
125 | 'create', | |
20effc67 TL |
126 | '-f', (path_to_examples(ctx, cluster_name) + 'crds.yaml'), |
127 | '-f', (path_to_examples(ctx, cluster_name) + 'common.yaml'), | |
b3b6e05e TL |
128 | '-f', 'operator.yaml', |
129 | ]) | |
130 | ||
131 | # on centos: | |
132 | if teuthology.get_distro(ctx) == 'centos': | |
133 | _kubectl(ctx, config, [ | |
134 | '-n', 'rook-ceph', | |
135 | 'set', 'env', 'deploy/rook-ceph-operator', | |
136 | 'ROOK_HOSTPATH_REQUIRES_PRIVILEGED=true' | |
137 | ]) | |
138 | ||
139 | # wait for operator | |
140 | op_name = None | |
141 | with safe_while(sleep=10, tries=90, action="wait for operator") as proceed: | |
142 | while not op_name and proceed(): | |
143 | p = _kubectl( | |
144 | ctx, config, | |
145 | ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-operator'], | |
146 | stdout=BytesIO(), | |
147 | ) | |
148 | for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): | |
149 | name, ready, status, _ = line.split(None, 3) | |
150 | if status == 'Running': | |
151 | op_name = name | |
152 | break | |
153 | ||
154 | # log operator output | |
155 | op_job = _kubectl( | |
156 | ctx, | |
157 | config, | |
158 | ['-n', 'rook-ceph', 'logs', '-f', op_name], | |
159 | wait=False, | |
160 | logger=log.getChild('operator'), | |
161 | ) | |
162 | ||
163 | yield | |
164 | ||
165 | except Exception as e: | |
166 | log.exception(e) | |
167 | raise | |
168 | ||
169 | finally: | |
170 | log.info('Cleaning up rook operator') | |
171 | _kubectl(ctx, config, [ | |
172 | 'delete', | |
173 | '-f', 'operator.yaml', | |
174 | ]) | |
175 | if False: | |
176 | # don't bother since we'll tear down k8s anyway (and this mysteriously | |
177 | # fails sometimes when deleting some of the CRDs... not sure why!) | |
178 | _kubectl(ctx, config, [ | |
179 | 'delete', | |
20effc67 | 180 | '-f', (path_to_examples() + 'common.yaml'), |
b3b6e05e TL |
181 | ]) |
182 | _kubectl(ctx, config, [ | |
183 | 'delete', | |
20effc67 | 184 | '-f', (path_to_examples() + 'crds.yaml'), |
b3b6e05e TL |
185 | ]) |
186 | ctx.rook[cluster_name].remote.run(args=['rm', '-rf', 'rook', 'operator.yaml']) | |
187 | if op_job: | |
188 | op_job.wait() | |
189 | run.wait( | |
190 | ctx.cluster.run( | |
191 | args=[ | |
192 | 'sudo', 'rm', '-rf', '/var/lib/rook' | |
193 | ] | |
194 | ) | |
195 | ) | |
196 | ||
197 | ||
198 | @contextlib.contextmanager | |
199 | def ceph_log(ctx, config): | |
200 | cluster_name = config['cluster'] | |
201 | ||
202 | log_dir = '/var/lib/rook/rook-ceph/log' | |
203 | update_archive_setting(ctx, 'log', log_dir) | |
204 | ||
205 | try: | |
206 | yield | |
207 | ||
208 | except Exception: | |
209 | # we need to know this below | |
210 | ctx.summary['success'] = False | |
211 | raise | |
212 | ||
213 | finally: | |
214 | log.info('Checking cluster log for badness...') | |
215 | def first_in_ceph_log(pattern, excludes): | |
216 | """ | |
217 | Find the first occurrence of the pattern specified in the Ceph log, | |
218 | Returns None if none found. | |
219 | ||
220 | :param pattern: Pattern scanned for. | |
221 | :param excludes: Patterns to ignore. | |
222 | :return: First line of text (or None if not found) | |
223 | """ | |
224 | args = [ | |
225 | 'sudo', | |
226 | 'egrep', pattern, | |
227 | f'{log_dir}/ceph.log', | |
228 | ] | |
229 | if excludes: | |
230 | for exclude in excludes: | |
231 | args.extend([run.Raw('|'), 'egrep', '-v', exclude]) | |
232 | args.extend([ | |
233 | run.Raw('|'), 'head', '-n', '1', | |
234 | ]) | |
235 | r = ctx.rook[cluster_name].remote.run( | |
236 | stdout=BytesIO(), | |
237 | args=args, | |
238 | ) | |
239 | stdout = r.stdout.getvalue().decode() | |
240 | if stdout: | |
241 | return stdout | |
242 | return None | |
243 | ||
244 | if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', | |
245 | config.get('log-ignorelist')) is not None: | |
246 | log.warning('Found errors (ERR|WRN|SEC) in cluster log') | |
247 | ctx.summary['success'] = False | |
248 | # use the most severe problem as the failure reason | |
249 | if 'failure_reason' not in ctx.summary: | |
250 | for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: | |
251 | match = first_in_ceph_log(pattern, config['log-ignorelist']) | |
252 | if match is not None: | |
253 | ctx.summary['failure_reason'] = \ | |
254 | '"{match}" in cluster log'.format( | |
255 | match=match.rstrip('\n'), | |
256 | ) | |
257 | break | |
258 | ||
259 | if ctx.archive is not None and \ | |
260 | not (ctx.config.get('archive-on-error') and ctx.summary['success']): | |
261 | # and logs | |
262 | log.info('Compressing logs...') | |
263 | run.wait( | |
264 | ctx.cluster.run( | |
265 | args=[ | |
266 | 'sudo', | |
267 | 'find', | |
268 | log_dir, | |
269 | '-name', | |
270 | '*.log', | |
271 | '-print0', | |
272 | run.Raw('|'), | |
273 | 'sudo', | |
274 | 'xargs', | |
275 | '-0', | |
276 | '--no-run-if-empty', | |
277 | '--', | |
278 | 'gzip', | |
279 | '--', | |
280 | ], | |
281 | wait=False, | |
282 | ), | |
283 | ) | |
284 | ||
285 | log.info('Archiving logs...') | |
286 | path = os.path.join(ctx.archive, 'remote') | |
287 | try: | |
288 | os.makedirs(path) | |
289 | except OSError: | |
290 | pass | |
291 | for remote in ctx.cluster.remotes.keys(): | |
292 | sub = os.path.join(path, remote.name) | |
293 | try: | |
294 | os.makedirs(sub) | |
295 | except OSError: | |
296 | pass | |
297 | try: | |
298 | teuthology.pull_directory(remote, log_dir, | |
299 | os.path.join(sub, 'log')) | |
300 | except ReadError: | |
301 | pass | |
302 | ||
303 | ||
304 | def build_initial_config(ctx, config): | |
305 | path = os.path.join(os.path.dirname(__file__), 'rook-ceph.conf') | |
306 | conf = configobj.ConfigObj(path, file_error=True) | |
307 | ||
308 | # overrides | |
309 | for section, keys in config.get('conf',{}).items(): | |
310 | for key, value in keys.items(): | |
311 | log.info(" override: [%s] %s = %s" % (section, key, value)) | |
312 | if section not in conf: | |
313 | conf[section] = {} | |
314 | conf[section][key] = value | |
315 | ||
316 | return conf | |
317 | ||
318 | ||
319 | @contextlib.contextmanager | |
320 | def rook_cluster(ctx, config): | |
321 | cluster_name = config['cluster'] | |
322 | ||
323 | # count how many OSDs we'll create | |
324 | num_devs = 0 | |
325 | num_hosts = 0 | |
326 | for remote in ctx.cluster.remotes.keys(): | |
327 | ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines() | |
328 | num_devs += len(ls) | |
329 | num_hosts += 1 | |
330 | ctx.rook[cluster_name].num_osds = num_devs | |
331 | ||
332 | # config | |
20effc67 TL |
333 | ceph_conf = build_initial_config(ctx, config) |
334 | ceph_conf_fp = BytesIO() | |
335 | ceph_conf.write(ceph_conf_fp) | |
336 | log.info(f'Config:\n{ceph_conf_fp.getvalue()}') | |
337 | _kubectl(ctx, ceph_conf, ['create', '-f', '-'], stdin=yaml.dump({ | |
b3b6e05e TL |
338 | 'apiVersion': 'v1', |
339 | 'kind': 'ConfigMap', | |
340 | 'metadata': { | |
341 | 'name': 'rook-config-override', | |
342 | 'namespace': 'rook-ceph'}, | |
343 | 'data': { | |
20effc67 | 344 | 'config': ceph_conf_fp.getvalue() |
b3b6e05e TL |
345 | } |
346 | })) | |
347 | ||
348 | # cluster | |
349 | cluster = { | |
350 | 'apiVersion': 'ceph.rook.io/v1', | |
351 | 'kind': 'CephCluster', | |
352 | 'metadata': {'name': 'rook-ceph', 'namespace': 'rook-ceph'}, | |
353 | 'spec': { | |
354 | 'cephVersion': { | |
355 | 'image': ctx.rook[cluster_name].image, | |
356 | 'allowUnsupported': True, | |
357 | }, | |
358 | 'dataDirHostPath': '/var/lib/rook', | |
359 | 'skipUpgradeChecks': True, | |
360 | 'mgr': { | |
361 | 'count': 1, | |
362 | 'modules': [ | |
363 | { 'name': 'rook', 'enabled': True }, | |
364 | ], | |
365 | }, | |
366 | 'mon': { | |
367 | 'count': num_hosts, | |
368 | 'allowMultiplePerNode': True, | |
369 | }, | |
b3b6e05e TL |
370 | } |
371 | } | |
372 | teuthology.deep_merge(cluster['spec'], config.get('spec', {})) | |
373 | ||
374 | cluster_yaml = yaml.dump(cluster) | |
375 | log.info(f'Cluster:\n{cluster_yaml}') | |
376 | try: | |
377 | ctx.rook[cluster_name].remote.write_file('cluster.yaml', cluster_yaml) | |
378 | _kubectl(ctx, config, ['create', '-f', 'cluster.yaml']) | |
379 | yield | |
380 | ||
381 | except Exception as e: | |
382 | log.exception(e) | |
383 | raise | |
384 | ||
385 | finally: | |
386 | _kubectl(ctx, config, ['delete', '-f', 'cluster.yaml'], check_status=False) | |
387 | ||
388 | # wait for cluster to shut down | |
389 | log.info('Waiting for cluster to stop') | |
390 | running = True | |
391 | with safe_while(sleep=5, tries=100, action="wait for teardown") as proceed: | |
392 | while running and proceed(): | |
393 | p = _kubectl( | |
394 | ctx, config, | |
395 | ['-n', 'rook-ceph', 'get', 'pods'], | |
396 | stdout=BytesIO(), | |
397 | ) | |
398 | running = False | |
399 | for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): | |
400 | name, ready, status, _ = line.split(None, 3) | |
401 | if ( | |
402 | name != 'NAME' | |
403 | and not name.startswith('csi-') | |
404 | and not name.startswith('rook-ceph-operator-') | |
405 | and not name.startswith('rook-ceph-tools-') | |
406 | ): | |
407 | running = True | |
408 | break | |
409 | ||
410 | _kubectl( | |
411 | ctx, config, | |
412 | ['-n', 'rook-ceph', 'delete', 'configmap', 'rook-config-override'], | |
413 | check_status=False, | |
414 | ) | |
415 | ctx.rook[cluster_name].remote.run(args=['rm', '-f', 'cluster.yaml']) | |
416 | ||
417 | ||
418 | @contextlib.contextmanager | |
419 | def rook_toolbox(ctx, config): | |
420 | cluster_name = config['cluster'] | |
421 | try: | |
422 | _kubectl(ctx, config, [ | |
423 | 'create', | |
20effc67 | 424 | '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'), |
b3b6e05e TL |
425 | ]) |
426 | ||
427 | log.info('Waiting for tools container to start') | |
428 | toolbox = None | |
429 | with safe_while(sleep=5, tries=100, action="wait for toolbox") as proceed: | |
430 | while not toolbox and proceed(): | |
431 | p = _kubectl( | |
432 | ctx, config, | |
433 | ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-tools'], | |
434 | stdout=BytesIO(), | |
435 | ) | |
436 | for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): | |
437 | name, ready, status, _ = line.split(None, 3) | |
438 | if status == 'Running': | |
439 | toolbox = name | |
440 | break | |
441 | ctx.rook[cluster_name].toolbox = toolbox | |
442 | yield | |
443 | ||
444 | except Exception as e: | |
445 | log.exception(e) | |
446 | raise | |
447 | ||
448 | finally: | |
449 | _kubectl(ctx, config, [ | |
450 | 'delete', | |
20effc67 | 451 | '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'), |
b3b6e05e TL |
452 | ], check_status=False) |
453 | ||
454 | ||
20effc67 TL |
455 | @contextlib.contextmanager |
456 | def wait_for_orch(ctx, config): | |
457 | log.info('Waiting for mgr/rook orchestrator to be available') | |
458 | with safe_while(sleep=10, tries=90, action="check orch status") as proceed: | |
459 | while proceed(): | |
460 | p = _shell(ctx, config, ['ceph', 'orch', 'status', '-f', 'json'], | |
461 | stdout=BytesIO(), | |
462 | check_status=False) | |
463 | if p.exitstatus == 0: | |
464 | r = json.loads(p.stdout.getvalue().decode('utf-8')) | |
465 | if r.get('available') and r.get('backend') == 'rook': | |
466 | log.info(' mgr/rook orchestrator is active') | |
467 | break | |
468 | ||
469 | yield | |
470 | ||
471 | ||
472 | @contextlib.contextmanager | |
473 | def rook_post_config(ctx, config): | |
474 | try: | |
475 | _shell(ctx, config, ['ceph', 'config', 'set', 'mgr', 'mgr/rook/storage_class', | |
476 | 'scratch']) | |
477 | _shell(ctx, config, ['ceph', 'orch', 'apply', 'osd', '--all-available-devices']) | |
478 | yield | |
479 | ||
480 | except Exception as e: | |
481 | log.exception(e) | |
482 | raise | |
483 | ||
484 | finally: | |
485 | pass | |
486 | ||
487 | ||
b3b6e05e TL |
488 | @contextlib.contextmanager |
489 | def wait_for_osds(ctx, config): | |
490 | cluster_name = config.get('cluster', 'ceph') | |
491 | ||
492 | want = ctx.rook[cluster_name].num_osds | |
493 | log.info(f'Waiting for {want} OSDs') | |
494 | with safe_while(sleep=10, tries=90, action="check osd count") as proceed: | |
495 | while proceed(): | |
496 | p = _shell(ctx, config, ['ceph', 'osd', 'stat', '-f', 'json'], | |
497 | stdout=BytesIO(), | |
498 | check_status=False) | |
499 | if p.exitstatus == 0: | |
500 | r = json.loads(p.stdout.getvalue().decode('utf-8')) | |
501 | have = r.get('num_up_osds', 0) | |
502 | if have == want: | |
503 | break | |
504 | log.info(f' have {have}/{want} OSDs') | |
505 | ||
506 | yield | |
507 | ||
b3b6e05e TL |
508 | @contextlib.contextmanager |
509 | def ceph_config_keyring(ctx, config): | |
510 | # get config and push to hosts | |
511 | log.info('Distributing ceph config and client.admin keyring') | |
512 | p = _shell(ctx, config, ['cat', '/etc/ceph/ceph.conf'], stdout=BytesIO()) | |
513 | conf = p.stdout.getvalue() | |
514 | p = _shell(ctx, config, ['cat', '/etc/ceph/keyring'], stdout=BytesIO()) | |
515 | keyring = p.stdout.getvalue() | |
516 | ctx.cluster.run(args=['sudo', 'mkdir', '-p', '/etc/ceph']) | |
517 | for remote in ctx.cluster.remotes.keys(): | |
518 | remote.write_file( | |
519 | '/etc/ceph/ceph.conf', | |
520 | conf, | |
521 | sudo=True, | |
522 | ) | |
523 | remote.write_file( | |
524 | '/etc/ceph/keyring', | |
525 | keyring, | |
526 | sudo=True, | |
527 | ) | |
528 | ||
529 | try: | |
530 | yield | |
531 | ||
532 | except Exception as e: | |
533 | log.exception(e) | |
534 | raise | |
535 | ||
536 | finally: | |
537 | log.info('Cleaning up config and client.admin keyring') | |
538 | ctx.cluster.run(args=[ | |
539 | 'sudo', 'rm', '-f', | |
540 | '/etc/ceph/ceph.conf', | |
541 | '/etc/ceph/ceph.client.admin.keyring' | |
542 | ]) | |
543 | ||
544 | ||
545 | @contextlib.contextmanager | |
546 | def ceph_clients(ctx, config): | |
547 | cluster_name = config['cluster'] | |
548 | ||
549 | log.info('Setting up client nodes...') | |
550 | clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) | |
551 | for remote, roles_for_host in clients.remotes.items(): | |
552 | for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', | |
553 | cluster_name): | |
554 | name = teuthology.ceph_role(role) | |
555 | client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, | |
556 | name) | |
557 | r = _shell(ctx, config, | |
558 | args=[ | |
559 | 'ceph', 'auth', | |
560 | 'get-or-create', name, | |
561 | 'mon', 'allow *', | |
562 | 'osd', 'allow *', | |
563 | 'mds', 'allow *', | |
564 | 'mgr', 'allow *', | |
565 | ], | |
566 | stdout=BytesIO(), | |
567 | ) | |
568 | keyring = r.stdout.getvalue() | |
569 | remote.write_file(client_keyring, keyring, sudo=True, mode='0644') | |
570 | yield | |
571 | ||
572 | ||
573 | @contextlib.contextmanager | |
574 | def task(ctx, config): | |
575 | """ | |
576 | Deploy rook-ceph cluster | |
577 | ||
578 | tasks: | |
579 | - kubeadm: | |
580 | - rook: | |
581 | branch: wip-foo | |
582 | spec: | |
583 | mon: | |
584 | count: 1 | |
585 | ||
586 | The spec item is deep-merged against the cluster.yaml. The branch, sha1, or | |
587 | image items are used to determine the Ceph container image. | |
588 | """ | |
589 | if not config: | |
590 | config = {} | |
591 | assert isinstance(config, dict), \ | |
592 | "task only supports a dictionary for configuration" | |
593 | ||
594 | log.info('Rook start') | |
595 | ||
596 | overrides = ctx.config.get('overrides', {}) | |
597 | teuthology.deep_merge(config, overrides.get('ceph', {})) | |
598 | teuthology.deep_merge(config, overrides.get('rook', {})) | |
599 | log.info('Config: ' + str(config)) | |
600 | ||
601 | # set up cluster context | |
602 | if not hasattr(ctx, 'rook'): | |
603 | ctx.rook = {} | |
604 | if 'cluster' not in config: | |
605 | config['cluster'] = 'ceph' | |
606 | cluster_name = config['cluster'] | |
607 | if cluster_name not in ctx.rook: | |
608 | ctx.rook[cluster_name] = argparse.Namespace() | |
609 | ||
610 | ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0] | |
611 | ||
612 | # image | |
613 | teuth_defaults = teuth_config.get('defaults', {}) | |
614 | cephadm_defaults = teuth_defaults.get('cephadm', {}) | |
615 | containers_defaults = cephadm_defaults.get('containers', {}) | |
616 | container_image_name = containers_defaults.get('image', None) | |
617 | if 'image' in config: | |
618 | ctx.rook[cluster_name].image = config.get('image') | |
619 | else: | |
620 | sha1 = config.get('sha1') | |
621 | flavor = config.get('flavor', 'default') | |
622 | if sha1: | |
623 | if flavor == "crimson": | |
624 | ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor | |
625 | else: | |
626 | ctx.rook[cluster_name].image = container_image_name + ':' + sha1 | |
627 | else: | |
628 | # hmm, fall back to branch? | |
629 | branch = config.get('branch', 'master') | |
630 | ctx.rook[cluster_name].image = container_image_name + ':' + branch | |
631 | log.info('Ceph image is %s' % ctx.rook[cluster_name].image) | |
632 | ||
633 | with contextutil.nested( | |
634 | lambda: rook_operator(ctx, config), | |
635 | lambda: ceph_log(ctx, config), | |
636 | lambda: rook_cluster(ctx, config), | |
637 | lambda: rook_toolbox(ctx, config), | |
20effc67 TL |
638 | lambda: wait_for_orch(ctx, config), |
639 | lambda: rook_post_config(ctx, config), | |
b3b6e05e TL |
640 | lambda: wait_for_osds(ctx, config), |
641 | lambda: ceph_config_keyring(ctx, config), | |
642 | lambda: ceph_clients(ctx, config), | |
643 | ): | |
644 | if not hasattr(ctx, 'managers'): | |
645 | ctx.managers = {} | |
646 | ctx.managers[cluster_name] = CephManager( | |
647 | ctx.rook[cluster_name].remote, | |
648 | ctx=ctx, | |
649 | logger=log.getChild('ceph_manager.' + cluster_name), | |
650 | cluster=cluster_name, | |
651 | rook=True, | |
652 | ) | |
653 | try: | |
654 | if config.get('wait-for-healthy', True): | |
655 | healthy(ctx=ctx, config=config) | |
656 | log.info('Rook complete, yielding') | |
657 | yield | |
658 | ||
659 | finally: | |
20effc67 TL |
660 | to_remove = [] |
661 | ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) | |
662 | if ret.exitstatus == 0: | |
663 | r = json.loads(ret.stdout.getvalue().decode('utf-8')) | |
664 | for service in r: | |
665 | if service['service_type'] in ['rgw', 'mds', 'nfs', 'rbd-mirror']: | |
666 | _shell(ctx, config, ['ceph', 'orch', 'rm', service['service_name']]) | |
667 | to_remove.append(service['service_name']) | |
668 | with safe_while(sleep=10, tries=90, action="waiting for service removal") as proceed: | |
669 | while proceed(): | |
670 | ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) | |
671 | if ret.exitstatus == 0: | |
672 | r = json.loads(ret.stdout.getvalue().decode('utf-8')) | |
673 | still_up = [service['service_name'] for service in r] | |
674 | matches = set(still_up).intersection(to_remove) | |
675 | if not matches: | |
676 | break | |
b3b6e05e | 677 | log.info('Tearing down rook') |