]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephadm.py
import ceph pacific 16.2.5
[ceph.git] / ceph / qa / tasks / cephadm.py
1 """
2 Ceph cluster task, deployed via cephadm orchestrator
3 """
4 import argparse
5 import configobj
6 import contextlib
7 import logging
8 import os
9 import json
10 import re
11 import uuid
12 import yaml
13
14 from io import BytesIO, StringIO
15 from tarfile import ReadError
16 from tasks.ceph_manager import CephManager
17 from teuthology import misc as teuthology
18 from teuthology import contextutil
19 from teuthology.orchestra import run
20 from teuthology.orchestra.daemon import DaemonGroup
21 from teuthology.config import config as teuth_config
22
23 # these items we use from ceph.py should probably eventually move elsewhere
24 from tasks.ceph import get_mons, healthy
25 from tasks.vip import subst_vip
26
27 CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
28
29 log = logging.getLogger(__name__)
30
31
32 def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs):
33 teuthology.get_testdir(ctx)
34 return remote.run(
35 args=[
36 'sudo',
37 ctx.cephadm,
38 '--image', ctx.ceph[cluster_name].image,
39 'shell',
40 '-c', '/etc/ceph/{}.conf'.format(cluster_name),
41 '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
42 '--fsid', ctx.ceph[cluster_name].fsid,
43 ] + extra_cephadm_args + [
44 '--',
45 ] + args,
46 **kwargs
47 )
48
49
50 def build_initial_config(ctx, config):
51 cluster_name = config['cluster']
52
53 path = os.path.join(os.path.dirname(__file__), 'cephadm.conf')
54 conf = configobj.ConfigObj(path, file_error=True)
55
56 conf.setdefault('global', {})
57 conf['global']['fsid'] = ctx.ceph[cluster_name].fsid
58
59 # overrides
60 for section, keys in config.get('conf',{}).items():
61 for key, value in keys.items():
62 log.info(" override: [%s] %s = %s" % (section, key, value))
63 if section not in conf:
64 conf[section] = {}
65 conf[section][key] = value
66
67 return conf
68
69
70 def update_archive_setting(ctx, key, value):
71 """
72 Add logs directory to job's info log file
73 """
74 if ctx.archive is None:
75 return
76 with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
77 info_yaml = yaml.safe_load(info_file)
78 info_file.seek(0)
79 if 'archive' in info_yaml:
80 info_yaml['archive'][key] = value
81 else:
82 info_yaml['archive'] = {key: value}
83 yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
84
85
86 @contextlib.contextmanager
87 def normalize_hostnames(ctx):
88 """
89 Ensure we have short hostnames throughout, for consistency between
90 remote.shortname and socket.gethostname() in cephadm.
91 """
92 log.info('Normalizing hostnames...')
93 ctx.cluster.run(args=[
94 'sudo',
95 'hostname',
96 run.Raw('$(hostname -s)'),
97 ])
98
99 try:
100 yield
101 finally:
102 pass
103
104
105 @contextlib.contextmanager
106 def download_cephadm(ctx, config, ref):
107 cluster_name = config['cluster']
108
109 if config.get('cephadm_mode') != 'cephadm-package':
110 ref = config.get('cephadm_branch', ref)
111 git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url())
112 log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref))
113 if ctx.config.get('redhat'):
114 log.info("Install cephadm using RPM")
115 # cephadm already installed from redhat.install task
116 ctx.cluster.run(
117 args=[
118 'cp',
119 run.Raw('$(which cephadm)'),
120 ctx.cephadm,
121 run.Raw('&&'),
122 'ls', '-l',
123 ctx.cephadm,
124 ]
125 )
126 elif git_url.startswith('https://github.com/'):
127 # git archive doesn't like https:// URLs, which we use with github.
128 rest = git_url.split('https://github.com/', 1)[1]
129 rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix
130 ctx.cluster.run(
131 args=[
132 'curl', '--silent',
133 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm',
134 run.Raw('>'),
135 ctx.cephadm,
136 run.Raw('&&'),
137 'ls', '-l',
138 ctx.cephadm,
139 ],
140 )
141 else:
142 ctx.cluster.run(
143 args=[
144 'git', 'archive',
145 '--remote=' + git_url,
146 ref,
147 'src/cephadm/cephadm',
148 run.Raw('|'),
149 'tar', '-xO', 'src/cephadm/cephadm',
150 run.Raw('>'),
151 ctx.cephadm,
152 ],
153 )
154 # sanity-check the resulting file and set executable bit
155 cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
156 ctx.cluster.run(
157 args=[
158 'test', '-s', ctx.cephadm,
159 run.Raw('&&'),
160 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
161 run.Raw('&&'),
162 'chmod', '+x', ctx.cephadm,
163 ],
164 )
165
166 try:
167 yield
168 finally:
169 log.info('Removing cluster...')
170 ctx.cluster.run(args=[
171 'sudo',
172 ctx.cephadm,
173 'rm-cluster',
174 '--fsid', ctx.ceph[cluster_name].fsid,
175 '--force',
176 ])
177
178 if config.get('cephadm_mode') == 'root':
179 log.info('Removing cephadm ...')
180 ctx.cluster.run(
181 args=[
182 'rm',
183 '-rf',
184 ctx.cephadm,
185 ],
186 )
187
188
189 @contextlib.contextmanager
190 def ceph_log(ctx, config):
191 cluster_name = config['cluster']
192 fsid = ctx.ceph[cluster_name].fsid
193
194 update_archive_setting(ctx, 'log', '/var/log/ceph')
195
196
197 try:
198 yield
199
200 except Exception:
201 # we need to know this below
202 ctx.summary['success'] = False
203 raise
204
205 finally:
206 log.info('Checking cluster log for badness...')
207 def first_in_ceph_log(pattern, excludes):
208 """
209 Find the first occurrence of the pattern specified in the Ceph log,
210 Returns None if none found.
211
212 :param pattern: Pattern scanned for.
213 :param excludes: Patterns to ignore.
214 :return: First line of text (or None if not found)
215 """
216 args = [
217 'sudo',
218 'egrep', pattern,
219 '/var/log/ceph/{fsid}/ceph.log'.format(
220 fsid=fsid),
221 ]
222 if excludes:
223 for exclude in excludes:
224 args.extend([run.Raw('|'), 'egrep', '-v', exclude])
225 args.extend([
226 run.Raw('|'), 'head', '-n', '1',
227 ])
228 r = ctx.ceph[cluster_name].bootstrap_remote.run(
229 stdout=StringIO(),
230 args=args,
231 )
232 stdout = r.stdout.getvalue()
233 if stdout != '':
234 return stdout
235 return None
236
237 if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
238 config.get('log-ignorelist')) is not None:
239 log.warning('Found errors (ERR|WRN|SEC) in cluster log')
240 ctx.summary['success'] = False
241 # use the most severe problem as the failure reason
242 if 'failure_reason' not in ctx.summary:
243 for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
244 match = first_in_ceph_log(pattern, config['log-ignorelist'])
245 if match is not None:
246 ctx.summary['failure_reason'] = \
247 '"{match}" in cluster log'.format(
248 match=match.rstrip('\n'),
249 )
250 break
251
252 if ctx.archive is not None and \
253 not (ctx.config.get('archive-on-error') and ctx.summary['success']):
254 # and logs
255 log.info('Compressing logs...')
256 run.wait(
257 ctx.cluster.run(
258 args=[
259 'sudo',
260 'find',
261 '/var/log/ceph', # all logs, not just for the cluster
262 '/var/log/rbd-target-api', # ceph-iscsi
263 '-name',
264 '*.log',
265 '-print0',
266 run.Raw('|'),
267 'sudo',
268 'xargs',
269 '-0',
270 '--no-run-if-empty',
271 '--',
272 'gzip',
273 '--',
274 ],
275 wait=False,
276 ),
277 )
278
279 log.info('Archiving logs...')
280 path = os.path.join(ctx.archive, 'remote')
281 try:
282 os.makedirs(path)
283 except OSError:
284 pass
285 for remote in ctx.cluster.remotes.keys():
286 sub = os.path.join(path, remote.name)
287 try:
288 os.makedirs(sub)
289 except OSError:
290 pass
291 try:
292 teuthology.pull_directory(remote, '/var/log/ceph', # everything
293 os.path.join(sub, 'log'))
294 except ReadError:
295 pass
296
297
298 @contextlib.contextmanager
299 def ceph_crash(ctx, config):
300 """
301 Gather crash dumps from /var/lib/ceph/$fsid/crash
302 """
303 cluster_name = config['cluster']
304 fsid = ctx.ceph[cluster_name].fsid
305
306 update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash')
307
308 try:
309 yield
310
311 finally:
312 if ctx.archive is not None:
313 log.info('Archiving crash dumps...')
314 path = os.path.join(ctx.archive, 'remote')
315 try:
316 os.makedirs(path)
317 except OSError:
318 pass
319 for remote in ctx.cluster.remotes.keys():
320 sub = os.path.join(path, remote.name)
321 try:
322 os.makedirs(sub)
323 except OSError:
324 pass
325 try:
326 teuthology.pull_directory(remote,
327 '/var/lib/ceph/%s/crash' % fsid,
328 os.path.join(sub, 'crash'))
329 except ReadError:
330 pass
331
332
333 @contextlib.contextmanager
334 def ceph_bootstrap(ctx, config):
335 """
336 Bootstrap ceph cluster.
337
338 :param ctx: the argparse.Namespace object
339 :param config: the config dict
340 """
341 cluster_name = config['cluster']
342 testdir = teuthology.get_testdir(ctx)
343 fsid = ctx.ceph[cluster_name].fsid
344
345 bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
346 first_mon = ctx.ceph[cluster_name].first_mon
347 first_mon_role = ctx.ceph[cluster_name].first_mon_role
348 mons = ctx.ceph[cluster_name].mons
349
350 ctx.cluster.run(args=[
351 'sudo', 'mkdir', '-p', '/etc/ceph',
352 ]);
353 ctx.cluster.run(args=[
354 'sudo', 'chmod', '777', '/etc/ceph',
355 ]);
356 try:
357 # write seed config
358 log.info('Writing seed config...')
359 conf_fp = BytesIO()
360 seed_config = build_initial_config(ctx, config)
361 seed_config.write(conf_fp)
362 bootstrap_remote.write_file(
363 path='{}/seed.{}.conf'.format(testdir, cluster_name),
364 data=conf_fp.getvalue())
365 log.debug('Final config:\n' + conf_fp.getvalue().decode())
366 ctx.ceph[cluster_name].conf = seed_config
367
368 # register initial daemons
369 ctx.daemons.register_daemon(
370 bootstrap_remote, 'mon', first_mon,
371 cluster=cluster_name,
372 fsid=fsid,
373 logger=log.getChild('mon.' + first_mon),
374 wait=False,
375 started=True,
376 )
377 if not ctx.ceph[cluster_name].roleless:
378 first_mgr = ctx.ceph[cluster_name].first_mgr
379 ctx.daemons.register_daemon(
380 bootstrap_remote, 'mgr', first_mgr,
381 cluster=cluster_name,
382 fsid=fsid,
383 logger=log.getChild('mgr.' + first_mgr),
384 wait=False,
385 started=True,
386 )
387
388 # bootstrap
389 log.info('Bootstrapping...')
390 cmd = [
391 'sudo',
392 ctx.cephadm,
393 '--image', ctx.ceph[cluster_name].image,
394 '-v',
395 'bootstrap',
396 '--fsid', fsid,
397 '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
398 '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
399 '--output-keyring',
400 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
401 '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name),
402 ]
403
404 if config.get('registry-login'):
405 registry = config['registry-login']
406 cmd += [
407 "--registry-url", registry['url'],
408 "--registry-username", registry['username'],
409 "--registry-password", registry['password'],
410 ]
411
412 if not ctx.ceph[cluster_name].roleless:
413 cmd += [
414 '--mon-id', first_mon,
415 '--mgr-id', first_mgr,
416 '--orphan-initial-daemons', # we will do it explicitly!
417 '--skip-monitoring-stack', # we'll provision these explicitly
418 ]
419
420 if mons[first_mon_role].startswith('['):
421 cmd += ['--mon-addrv', mons[first_mon_role]]
422 else:
423 cmd += ['--mon-ip', mons[first_mon_role]]
424 if config.get('skip_dashboard'):
425 cmd += ['--skip-dashboard']
426 if config.get('skip_monitoring_stack'):
427 cmd += ['--skip-monitoring-stack']
428 if config.get('single_host_defaults'):
429 cmd += ['--single-host-defaults']
430 if not config.get('avoid_pacific_features', False):
431 cmd += ['--skip-admin-label']
432 # bootstrap makes the keyring root 0600, so +r it for our purposes
433 cmd += [
434 run.Raw('&&'),
435 'sudo', 'chmod', '+r',
436 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
437 ]
438 bootstrap_remote.run(args=cmd)
439
440 # fetch keys and configs
441 log.info('Fetching config...')
442 ctx.ceph[cluster_name].config_file = \
443 bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf')
444 log.info('Fetching client.admin keyring...')
445 ctx.ceph[cluster_name].admin_keyring = \
446 bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring')
447 log.info('Fetching mon keyring...')
448 ctx.ceph[cluster_name].mon_keyring = \
449 bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True)
450
451 # fetch ssh key, distribute to additional nodes
452 log.info('Fetching pub ssh key...')
453 ssh_pub_key = bootstrap_remote.read_file(
454 f'{testdir}/{cluster_name}.pub').decode('ascii').strip()
455
456 log.info('Installing pub ssh key for root users...')
457 ctx.cluster.run(args=[
458 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
459 run.Raw('&&'),
460 'echo', ssh_pub_key,
461 run.Raw('|'),
462 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
463 run.Raw('&&'),
464 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
465 ])
466
467 # set options
468 if config.get('allow_ptrace', True):
469 _shell(ctx, cluster_name, bootstrap_remote,
470 ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
471
472 if not config.get('avoid_pacific_features', False):
473 log.info('Distributing conf and client.admin keyring to all hosts + 0755')
474 _shell(ctx, cluster_name, bootstrap_remote,
475 ['ceph', 'orch', 'client-keyring', 'set', 'client.admin',
476 '*', '--mode', '0755'],
477 check_status=False)
478
479 # add other hosts
480 for remote in ctx.cluster.remotes.keys():
481 if remote == bootstrap_remote:
482 continue
483
484 # note: this may be redundant (see above), but it avoids
485 # us having to wait for cephadm to do it.
486 log.info('Writing (initial) conf and keyring to %s' % remote.shortname)
487 remote.write_file(
488 path='/etc/ceph/{}.conf'.format(cluster_name),
489 data=ctx.ceph[cluster_name].config_file)
490 remote.write_file(
491 path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
492 data=ctx.ceph[cluster_name].admin_keyring)
493
494 log.info('Adding host %s to orchestrator...' % remote.shortname)
495 _shell(ctx, cluster_name, remote, [
496 'ceph', 'orch', 'host', 'add',
497 remote.shortname
498 ])
499 r = _shell(ctx, cluster_name, remote,
500 ['ceph', 'orch', 'host', 'ls', '--format=json'],
501 stdout=StringIO())
502 hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())]
503 assert remote.shortname in hosts
504
505 yield
506
507 finally:
508 log.info('Cleaning up testdir ceph.* files...')
509 ctx.cluster.run(args=[
510 'rm', '-f',
511 '{}/seed.{}.conf'.format(testdir, cluster_name),
512 '{}/{}.pub'.format(testdir, cluster_name),
513 ])
514
515 log.info('Stopping all daemons...')
516
517 # this doesn't block until they are all stopped...
518 #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
519
520 # stop the daemons we know
521 for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True):
522 cluster, type_, id_ = teuthology.split_role(role)
523 try:
524 ctx.daemons.get_daemon(type_, id_, cluster).stop()
525 except Exception:
526 log.exception(f'Failed to stop "{role}"')
527 raise
528
529 # tear down anything left (but leave the logs behind)
530 ctx.cluster.run(
531 args=[
532 'sudo',
533 ctx.cephadm,
534 'rm-cluster',
535 '--fsid', fsid,
536 '--force',
537 '--keep-logs',
538 ],
539 check_status=False, # may fail if upgrading from old cephadm
540 )
541
542 # clean up /etc/ceph
543 ctx.cluster.run(args=[
544 'sudo', 'rm', '-f',
545 '/etc/ceph/{}.conf'.format(cluster_name),
546 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
547 ])
548
549
550 @contextlib.contextmanager
551 def ceph_mons(ctx, config):
552 """
553 Deploy any additional mons
554 """
555 cluster_name = config['cluster']
556 fsid = ctx.ceph[cluster_name].fsid
557
558 try:
559 daemons = {}
560 if config.get('add_mons_via_daemon_add'):
561 # This is the old way of adding mons that works with the (early) octopus
562 # cephadm scheduler.
563 num_mons = 1
564 for remote, roles in ctx.cluster.remotes.items():
565 for mon in [r for r in roles
566 if teuthology.is_type('mon', cluster_name)(r)]:
567 c_, _, id_ = teuthology.split_role(mon)
568 if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
569 continue
570 log.info('Adding %s on %s' % (mon, remote.shortname))
571 num_mons += 1
572 _shell(ctx, cluster_name, remote, [
573 'ceph', 'orch', 'daemon', 'add', 'mon',
574 remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
575 ])
576 ctx.daemons.register_daemon(
577 remote, 'mon', id_,
578 cluster=cluster_name,
579 fsid=fsid,
580 logger=log.getChild(mon),
581 wait=False,
582 started=True,
583 )
584 daemons[mon] = (remote, id_)
585
586 with contextutil.safe_while(sleep=1, tries=180) as proceed:
587 while proceed():
588 log.info('Waiting for %d mons in monmap...' % (num_mons))
589 r = _shell(
590 ctx=ctx,
591 cluster_name=cluster_name,
592 remote=remote,
593 args=[
594 'ceph', 'mon', 'dump', '-f', 'json',
595 ],
596 stdout=StringIO(),
597 )
598 j = json.loads(r.stdout.getvalue())
599 if len(j['mons']) == num_mons:
600 break
601 else:
602 nodes = []
603 for remote, roles in ctx.cluster.remotes.items():
604 for mon in [r for r in roles
605 if teuthology.is_type('mon', cluster_name)(r)]:
606 c_, _, id_ = teuthology.split_role(mon)
607 log.info('Adding %s on %s' % (mon, remote.shortname))
608 nodes.append(remote.shortname
609 + ':' + ctx.ceph[cluster_name].mons[mon]
610 + '=' + id_)
611 if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
612 continue
613 daemons[mon] = (remote, id_)
614
615 _shell(ctx, cluster_name, remote, [
616 'ceph', 'orch', 'apply', 'mon',
617 str(len(nodes)) + ';' + ';'.join(nodes)]
618 )
619 for mgr, i in daemons.items():
620 remote, id_ = i
621 ctx.daemons.register_daemon(
622 remote, 'mon', id_,
623 cluster=cluster_name,
624 fsid=fsid,
625 logger=log.getChild(mon),
626 wait=False,
627 started=True,
628 )
629
630 with contextutil.safe_while(sleep=1, tries=180) as proceed:
631 while proceed():
632 log.info('Waiting for %d mons in monmap...' % (len(nodes)))
633 r = _shell(
634 ctx=ctx,
635 cluster_name=cluster_name,
636 remote=remote,
637 args=[
638 'ceph', 'mon', 'dump', '-f', 'json',
639 ],
640 stdout=StringIO(),
641 )
642 j = json.loads(r.stdout.getvalue())
643 if len(j['mons']) == len(nodes):
644 break
645
646 # refresh our (final) ceph.conf file
647 bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
648 log.info('Generating final ceph.conf file...')
649 r = _shell(
650 ctx=ctx,
651 cluster_name=cluster_name,
652 remote=bootstrap_remote,
653 args=[
654 'ceph', 'config', 'generate-minimal-conf',
655 ],
656 stdout=StringIO(),
657 )
658 ctx.ceph[cluster_name].config_file = r.stdout.getvalue()
659
660 yield
661
662 finally:
663 pass
664
665
666 @contextlib.contextmanager
667 def ceph_mgrs(ctx, config):
668 """
669 Deploy any additional mgrs
670 """
671 cluster_name = config['cluster']
672 fsid = ctx.ceph[cluster_name].fsid
673
674 try:
675 nodes = []
676 daemons = {}
677 for remote, roles in ctx.cluster.remotes.items():
678 for mgr in [r for r in roles
679 if teuthology.is_type('mgr', cluster_name)(r)]:
680 c_, _, id_ = teuthology.split_role(mgr)
681 log.info('Adding %s on %s' % (mgr, remote.shortname))
682 nodes.append(remote.shortname + '=' + id_)
683 if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr:
684 continue
685 daemons[mgr] = (remote, id_)
686 if nodes:
687 _shell(ctx, cluster_name, remote, [
688 'ceph', 'orch', 'apply', 'mgr',
689 str(len(nodes)) + ';' + ';'.join(nodes)]
690 )
691 for mgr, i in daemons.items():
692 remote, id_ = i
693 ctx.daemons.register_daemon(
694 remote, 'mgr', id_,
695 cluster=cluster_name,
696 fsid=fsid,
697 logger=log.getChild(mgr),
698 wait=False,
699 started=True,
700 )
701
702 yield
703
704 finally:
705 pass
706
707
708 @contextlib.contextmanager
709 def ceph_osds(ctx, config):
710 """
711 Deploy OSDs
712 """
713 cluster_name = config['cluster']
714 fsid = ctx.ceph[cluster_name].fsid
715
716 try:
717 log.info('Deploying OSDs...')
718
719 # provision OSDs in numeric order
720 id_to_remote = {}
721 devs_by_remote = {}
722 for remote, roles in ctx.cluster.remotes.items():
723 devs_by_remote[remote] = teuthology.get_scratch_devices(remote)
724 for osd in [r for r in roles
725 if teuthology.is_type('osd', cluster_name)(r)]:
726 _, _, id_ = teuthology.split_role(osd)
727 id_to_remote[int(id_)] = (osd, remote)
728
729 cur = 0
730 for osd_id in sorted(id_to_remote.keys()):
731 osd, remote = id_to_remote[osd_id]
732 _, _, id_ = teuthology.split_role(osd)
733 assert int(id_) == cur
734 devs = devs_by_remote[remote]
735 assert devs ## FIXME ##
736 dev = devs.pop()
737 if all(_ in dev for _ in ('lv', 'vg')):
738 short_dev = dev.replace('/dev/', '')
739 else:
740 short_dev = dev
741 log.info('Deploying %s on %s with %s...' % (
742 osd, remote.shortname, dev))
743 _shell(ctx, cluster_name, remote, [
744 'ceph-volume', 'lvm', 'zap', dev])
745 _shell(ctx, cluster_name, remote, [
746 'ceph', 'orch', 'daemon', 'add', 'osd',
747 remote.shortname + ':' + short_dev
748 ])
749 ctx.daemons.register_daemon(
750 remote, 'osd', id_,
751 cluster=cluster_name,
752 fsid=fsid,
753 logger=log.getChild(osd),
754 wait=False,
755 started=True,
756 )
757 cur += 1
758
759 yield
760 finally:
761 pass
762
763
764 @contextlib.contextmanager
765 def ceph_mdss(ctx, config):
766 """
767 Deploy MDSss
768 """
769 cluster_name = config['cluster']
770 fsid = ctx.ceph[cluster_name].fsid
771
772 nodes = []
773 daemons = {}
774 for remote, roles in ctx.cluster.remotes.items():
775 for role in [r for r in roles
776 if teuthology.is_type('mds', cluster_name)(r)]:
777 c_, _, id_ = teuthology.split_role(role)
778 log.info('Adding %s on %s' % (role, remote.shortname))
779 nodes.append(remote.shortname + '=' + id_)
780 daemons[role] = (remote, id_)
781 if nodes:
782 _shell(ctx, cluster_name, remote, [
783 'ceph', 'orch', 'apply', 'mds',
784 'all',
785 str(len(nodes)) + ';' + ';'.join(nodes)]
786 )
787 for role, i in daemons.items():
788 remote, id_ = i
789 ctx.daemons.register_daemon(
790 remote, 'mds', id_,
791 cluster=cluster_name,
792 fsid=fsid,
793 logger=log.getChild(role),
794 wait=False,
795 started=True,
796 )
797
798 yield
799
800
801 @contextlib.contextmanager
802 def ceph_monitoring(daemon_type, ctx, config):
803 """
804 Deploy prometheus, node-exporter, etc.
805 """
806 cluster_name = config['cluster']
807 fsid = ctx.ceph[cluster_name].fsid
808
809 nodes = []
810 daemons = {}
811 for remote, roles in ctx.cluster.remotes.items():
812 for role in [r for r in roles
813 if teuthology.is_type(daemon_type, cluster_name)(r)]:
814 c_, _, id_ = teuthology.split_role(role)
815 log.info('Adding %s on %s' % (role, remote.shortname))
816 nodes.append(remote.shortname + '=' + id_)
817 daemons[role] = (remote, id_)
818 if nodes:
819 _shell(ctx, cluster_name, remote, [
820 'ceph', 'orch', 'apply', daemon_type,
821 str(len(nodes)) + ';' + ';'.join(nodes)]
822 )
823 for role, i in daemons.items():
824 remote, id_ = i
825 ctx.daemons.register_daemon(
826 remote, daemon_type, id_,
827 cluster=cluster_name,
828 fsid=fsid,
829 logger=log.getChild(role),
830 wait=False,
831 started=True,
832 )
833
834 yield
835
836
837 @contextlib.contextmanager
838 def ceph_rgw(ctx, config):
839 """
840 Deploy rgw
841 """
842 cluster_name = config['cluster']
843 fsid = ctx.ceph[cluster_name].fsid
844
845 nodes = {}
846 daemons = {}
847 for remote, roles in ctx.cluster.remotes.items():
848 for role in [r for r in roles
849 if teuthology.is_type('rgw', cluster_name)(r)]:
850 c_, _, id_ = teuthology.split_role(role)
851 log.info('Adding %s on %s' % (role, remote.shortname))
852 svc = '.'.join(id_.split('.')[0:2])
853 if svc not in nodes:
854 nodes[svc] = []
855 nodes[svc].append(remote.shortname + '=' + id_)
856 daemons[role] = (remote, id_)
857
858 for svc, nodes in nodes.items():
859 _shell(ctx, cluster_name, remote, [
860 'ceph', 'orch', 'apply', 'rgw', svc,
861 '--placement',
862 str(len(nodes)) + ';' + ';'.join(nodes)]
863 )
864 for role, i in daemons.items():
865 remote, id_ = i
866 ctx.daemons.register_daemon(
867 remote, 'rgw', id_,
868 cluster=cluster_name,
869 fsid=fsid,
870 logger=log.getChild(role),
871 wait=False,
872 started=True,
873 )
874
875 yield
876
877
878 @contextlib.contextmanager
879 def ceph_iscsi(ctx, config):
880 """
881 Deploy iSCSIs
882 """
883 cluster_name = config['cluster']
884 fsid = ctx.ceph[cluster_name].fsid
885
886 nodes = []
887 daemons = {}
888 for remote, roles in ctx.cluster.remotes.items():
889 for role in [r for r in roles
890 if teuthology.is_type('iscsi', cluster_name)(r)]:
891 c_, _, id_ = teuthology.split_role(role)
892 log.info('Adding %s on %s' % (role, remote.shortname))
893 nodes.append(remote.shortname + '=' + id_)
894 daemons[role] = (remote, id_)
895 if nodes:
896 poolname = 'iscsi'
897 # ceph osd pool create iscsi 3 3 replicated
898 _shell(ctx, cluster_name, remote, [
899 'ceph', 'osd', 'pool', 'create',
900 poolname, '3', '3', 'replicated']
901 )
902
903 _shell(ctx, cluster_name, remote, [
904 'ceph', 'osd', 'pool', 'application', 'enable',
905 poolname, 'rbd']
906 )
907
908 # ceph orch apply iscsi iscsi user password
909 _shell(ctx, cluster_name, remote, [
910 'ceph', 'orch', 'apply', 'iscsi',
911 poolname, 'user', 'password',
912 '--placement', str(len(nodes)) + ';' + ';'.join(nodes)]
913 )
914 for role, i in daemons.items():
915 remote, id_ = i
916 ctx.daemons.register_daemon(
917 remote, 'iscsi', id_,
918 cluster=cluster_name,
919 fsid=fsid,
920 logger=log.getChild(role),
921 wait=False,
922 started=True,
923 )
924
925 yield
926
927
928 @contextlib.contextmanager
929 def ceph_clients(ctx, config):
930 cluster_name = config['cluster']
931
932 log.info('Setting up client nodes...')
933 clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
934 for remote, roles_for_host in clients.remotes.items():
935 for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
936 cluster_name):
937 name = teuthology.ceph_role(role)
938 client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name,
939 name)
940 r = _shell(
941 ctx=ctx,
942 cluster_name=cluster_name,
943 remote=remote,
944 args=[
945 'ceph', 'auth',
946 'get-or-create', name,
947 'mon', 'allow *',
948 'osd', 'allow *',
949 'mds', 'allow *',
950 'mgr', 'allow *',
951 ],
952 stdout=StringIO(),
953 )
954 keyring = r.stdout.getvalue()
955 remote.sudo_write_file(client_keyring, keyring, mode='0644')
956 yield
957
958
959 @contextlib.contextmanager
960 def ceph_initial():
961 try:
962 yield
963 finally:
964 log.info('Teardown complete')
965
966
967 ## public methods
968 @contextlib.contextmanager
969 def stop(ctx, config):
970 """
971 Stop ceph daemons
972
973 For example::
974 tasks:
975 - ceph.stop: [mds.*]
976
977 tasks:
978 - ceph.stop: [osd.0, osd.2]
979
980 tasks:
981 - ceph.stop:
982 daemons: [osd.0, osd.2]
983
984 """
985 if config is None:
986 config = {}
987 elif isinstance(config, list):
988 config = {'daemons': config}
989
990 daemons = ctx.daemons.resolve_role_list(
991 config.get('daemons', None), CEPH_ROLE_TYPES, True)
992 clusters = set()
993
994 for role in daemons:
995 cluster, type_, id_ = teuthology.split_role(role)
996 ctx.daemons.get_daemon(type_, id_, cluster).stop()
997 clusters.add(cluster)
998
999 # for cluster in clusters:
1000 # ctx.ceph[cluster].watchdog.stop()
1001 # ctx.ceph[cluster].watchdog.join()
1002
1003 yield
1004
1005
1006 def shell(ctx, config):
1007 """
1008 Execute (shell) commands
1009 """
1010 cluster_name = config.get('cluster', 'ceph')
1011
1012 args = []
1013 for k in config.pop('env', []):
1014 args.extend(['-e', k + '=' + ctx.config.get(k, '')])
1015 for k in config.pop('volumes', []):
1016 args.extend(['-v', k])
1017
1018 if 'all-roles' in config and len(config) == 1:
1019 a = config['all-roles']
1020 roles = teuthology.all_roles(ctx.cluster)
1021 config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
1022 elif 'all-hosts' in config and len(config) == 1:
1023 a = config['all-hosts']
1024 roles = teuthology.all_roles(ctx.cluster)
1025 config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
1026
1027 for role, cmd in config.items():
1028 (remote,) = ctx.cluster.only(role).remotes.keys()
1029 log.info('Running commands on role %s host %s', role, remote.name)
1030 if isinstance(cmd, list):
1031 for c in cmd:
1032 _shell(ctx, cluster_name, remote,
1033 ['bash', '-c', subst_vip(ctx, c)],
1034 extra_cephadm_args=args)
1035 else:
1036 assert isinstance(cmd, str)
1037 _shell(ctx, cluster_name, remote,
1038 ['bash', '-ex', '-c', subst_vip(ctx, cmd)],
1039 extra_cephadm_args=args)
1040
1041
1042 def apply(ctx, config):
1043 """
1044 Apply spec
1045
1046 tasks:
1047 - cephadm.apply:
1048 specs:
1049 - service_type: rgw
1050 service_id: foo
1051 spec:
1052 rgw_frontend_port: 8000
1053 - service_type: rgw
1054 service_id: bar
1055 spec:
1056 rgw_frontend_port: 9000
1057 zone: bar
1058 realm: asdf
1059
1060 """
1061 cluster_name = config.get('cluster', 'ceph')
1062
1063 specs = config.get('specs', [])
1064 y = subst_vip(ctx, yaml.dump_all(specs))
1065
1066 log.info(f'Applying spec(s):\n{y}')
1067 _shell(
1068 ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
1069 ['ceph', 'orch', 'apply', '-i', '-'],
1070 stdin=y,
1071 )
1072
1073
1074 def wait_for_service(ctx, config):
1075 """
1076 Wait for a service to be fully started
1077
1078 tasks:
1079 - cephadm.wait_for_service:
1080 service: rgw.foo
1081 timeout: 60 # defaults to 300
1082
1083 """
1084 cluster_name = config.get('cluster', 'ceph')
1085 timeout = config.get('timeout', 300)
1086 service = config.get('service')
1087 assert service
1088
1089 log.info(
1090 f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...'
1091 )
1092 with contextutil.safe_while(sleep=1, tries=timeout) as proceed:
1093 while proceed():
1094 r = _shell(
1095 ctx=ctx,
1096 cluster_name=cluster_name,
1097 remote=ctx.ceph[cluster_name].bootstrap_remote,
1098 args=[
1099 'ceph', 'orch', 'ls', '-f', 'json',
1100 ],
1101 stdout=StringIO(),
1102 )
1103 j = json.loads(r.stdout.getvalue())
1104 svc = None
1105 for s in j:
1106 if s['service_name'] == service:
1107 svc = s
1108 break
1109 if svc:
1110 log.info(
1111 f"{service} has {s['status']['running']}/{s['status']['size']}"
1112 )
1113 if s['status']['running'] == s['status']['size']:
1114 break
1115
1116
1117 @contextlib.contextmanager
1118 def tweaked_option(ctx, config):
1119 """
1120 set an option, and then restore it with its original value
1121
1122 Note, due to the way how tasks are executed/nested, it's not suggested to
1123 use this method as a standalone task. otherwise, it's likely that it will
1124 restore the tweaked option at the /end/ of 'tasks' block.
1125 """
1126 saved_options = {}
1127 # we can complicate this when necessary
1128 options = ['mon-health-to-clog']
1129 type_, id_ = 'mon', '*'
1130 cluster = config.get('cluster', 'ceph')
1131 manager = ctx.managers[cluster]
1132 if id_ == '*':
1133 get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_))
1134 else:
1135 get_from = id_
1136 for option in options:
1137 if option not in config:
1138 continue
1139 value = 'true' if config[option] else 'false'
1140 option = option.replace('-', '_')
1141 old_value = manager.get_config(type_, get_from, option)
1142 if value != old_value:
1143 saved_options[option] = old_value
1144 manager.inject_args(type_, id_, option, value)
1145 yield
1146 for option, value in saved_options.items():
1147 manager.inject_args(type_, id_, option, value)
1148
1149
1150 @contextlib.contextmanager
1151 def restart(ctx, config):
1152 """
1153 restart ceph daemons
1154
1155 For example::
1156 tasks:
1157 - ceph.restart: [all]
1158
1159 For example::
1160 tasks:
1161 - ceph.restart: [osd.0, mon.1, mds.*]
1162
1163 or::
1164
1165 tasks:
1166 - ceph.restart:
1167 daemons: [osd.0, mon.1]
1168 wait-for-healthy: false
1169 wait-for-osds-up: true
1170
1171 :param ctx: Context
1172 :param config: Configuration
1173 """
1174 if config is None:
1175 config = {}
1176 elif isinstance(config, list):
1177 config = {'daemons': config}
1178
1179 daemons = ctx.daemons.resolve_role_list(
1180 config.get('daemons', None), CEPH_ROLE_TYPES, True)
1181 clusters = set()
1182
1183 log.info('daemons %s' % daemons)
1184 with tweaked_option(ctx, config):
1185 for role in daemons:
1186 cluster, type_, id_ = teuthology.split_role(role)
1187 d = ctx.daemons.get_daemon(type_, id_, cluster)
1188 assert d, 'daemon %s does not exist' % role
1189 d.stop()
1190 if type_ == 'osd':
1191 ctx.managers[cluster].mark_down_osd(id_)
1192 d.restart()
1193 clusters.add(cluster)
1194
1195 if config.get('wait-for-healthy', True):
1196 for cluster in clusters:
1197 healthy(ctx=ctx, config=dict(cluster=cluster))
1198 if config.get('wait-for-osds-up', False):
1199 for cluster in clusters:
1200 ctx.managers[cluster].wait_for_all_osds_up()
1201 yield
1202
1203
1204 @contextlib.contextmanager
1205 def distribute_config_and_admin_keyring(ctx, config):
1206 """
1207 Distribute a sufficient config and keyring for clients
1208 """
1209 cluster_name = config['cluster']
1210 log.info('Distributing (final) config and client.admin keyring...')
1211 for remote, roles in ctx.cluster.remotes.items():
1212 remote.write_file(
1213 '/etc/ceph/{}.conf'.format(cluster_name),
1214 ctx.ceph[cluster_name].config_file,
1215 sudo=True)
1216 remote.write_file(
1217 path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
1218 data=ctx.ceph[cluster_name].admin_keyring,
1219 sudo=True)
1220 try:
1221 yield
1222 finally:
1223 ctx.cluster.run(args=[
1224 'sudo', 'rm', '-f',
1225 '/etc/ceph/{}.conf'.format(cluster_name),
1226 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
1227 ])
1228
1229
1230 @contextlib.contextmanager
1231 def crush_setup(ctx, config):
1232 cluster_name = config['cluster']
1233
1234 profile = config.get('crush_tunables', 'default')
1235 log.info('Setting crush tunables to %s', profile)
1236 _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
1237 args=['ceph', 'osd', 'crush', 'tunables', profile])
1238 yield
1239
1240
1241 @contextlib.contextmanager
1242 def create_rbd_pool(ctx, config):
1243 if config.get('create_rbd_pool', False):
1244 cluster_name = config['cluster']
1245 log.info('Waiting for OSDs to come up')
1246 teuthology.wait_until_osds_up(
1247 ctx,
1248 cluster=ctx.cluster,
1249 remote=ctx.ceph[cluster_name].bootstrap_remote,
1250 ceph_cluster=cluster_name,
1251 )
1252 log.info('Creating RBD pool')
1253 _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
1254 args=['sudo', 'ceph', '--cluster', cluster_name,
1255 'osd', 'pool', 'create', 'rbd', '8'])
1256 _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
1257 args=['sudo', 'ceph', '--cluster', cluster_name,
1258 'osd', 'pool', 'application', 'enable',
1259 'rbd', 'rbd', '--yes-i-really-mean-it'
1260 ])
1261 yield
1262
1263
1264 @contextlib.contextmanager
1265 def _bypass():
1266 yield
1267
1268
1269 @contextlib.contextmanager
1270 def initialize_config(ctx, config):
1271 cluster_name = config['cluster']
1272 testdir = teuthology.get_testdir(ctx)
1273
1274 ctx.ceph[cluster_name].thrashers = []
1275 # fixme: setup watchdog, ala ceph.py
1276
1277 ctx.ceph[cluster_name].roleless = False # see below
1278
1279 first_ceph_cluster = False
1280 if not hasattr(ctx, 'daemons'):
1281 first_ceph_cluster = True
1282
1283 # cephadm mode?
1284 if 'cephadm_mode' not in config:
1285 config['cephadm_mode'] = 'root'
1286 assert config['cephadm_mode'] in ['root', 'cephadm-package']
1287 if config['cephadm_mode'] == 'root':
1288 ctx.cephadm = testdir + '/cephadm'
1289 else:
1290 ctx.cephadm = 'cephadm' # in the path
1291
1292 if first_ceph_cluster:
1293 # FIXME: this is global for all clusters
1294 ctx.daemons = DaemonGroup(
1295 use_cephadm=ctx.cephadm)
1296
1297 # uuid
1298 fsid = str(uuid.uuid1())
1299 log.info('Cluster fsid is %s' % fsid)
1300 ctx.ceph[cluster_name].fsid = fsid
1301
1302 # mon ips
1303 log.info('Choosing monitor IPs and ports...')
1304 remotes_and_roles = ctx.cluster.remotes.items()
1305 ips = [host for (host, port) in
1306 (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
1307
1308 if config.get('roleless', False):
1309 # mons will be named after hosts
1310 first_mon = None
1311 for remote, _ in remotes_and_roles:
1312 ctx.cluster.remotes[remote].append('mon.' + remote.shortname)
1313 if not first_mon:
1314 first_mon = remote.shortname
1315 bootstrap_remote = remote
1316 log.info('No mon roles; fabricating mons')
1317
1318 roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()]
1319
1320 ctx.ceph[cluster_name].mons = get_mons(
1321 roles, ips, cluster_name,
1322 mon_bind_msgr2=config.get('mon_bind_msgr2', True),
1323 mon_bind_addrvec=config.get('mon_bind_addrvec', True),
1324 )
1325 log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)
1326
1327 if config.get('roleless', False):
1328 ctx.ceph[cluster_name].roleless = True
1329 ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
1330 ctx.ceph[cluster_name].first_mon = first_mon
1331 ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
1332 else:
1333 first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
1334 _, _, first_mon = teuthology.split_role(first_mon_role)
1335 (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys()
1336 log.info('First mon is mon.%s on %s' % (first_mon,
1337 bootstrap_remote.shortname))
1338 ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
1339 ctx.ceph[cluster_name].first_mon = first_mon
1340 ctx.ceph[cluster_name].first_mon_role = first_mon_role
1341
1342 others = ctx.cluster.remotes[bootstrap_remote]
1343 mgrs = sorted([r for r in others
1344 if teuthology.is_type('mgr', cluster_name)(r)])
1345 if not mgrs:
1346 raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon)
1347 _, _, first_mgr = teuthology.split_role(mgrs[0])
1348 log.info('First mgr is %s' % (first_mgr))
1349 ctx.ceph[cluster_name].first_mgr = first_mgr
1350 yield
1351
1352
1353 @contextlib.contextmanager
1354 def task(ctx, config):
1355 """
1356 Deploy ceph cluster using cephadm
1357
1358 For example, teuthology.yaml can contain the 'defaults' section:
1359
1360 defaults:
1361 cephadm:
1362 containers:
1363 image: 'quay.io/ceph-ci/ceph'
1364
1365 Using overrides makes it possible to customize it per run.
1366 The equivalent 'overrides' section looks like:
1367
1368 overrides:
1369 cephadm:
1370 containers:
1371 image: 'quay.io/ceph-ci/ceph'
1372 registry-login:
1373 url: registry-url
1374 username: registry-user
1375 password: registry-password
1376
1377 :param ctx: the argparse.Namespace object
1378 :param config: the config dict
1379 """
1380 if config is None:
1381 config = {}
1382
1383 assert isinstance(config, dict), \
1384 "task only supports a dictionary for configuration"
1385
1386 overrides = ctx.config.get('overrides', {})
1387 teuthology.deep_merge(config, overrides.get('ceph', {}))
1388 teuthology.deep_merge(config, overrides.get('cephadm', {}))
1389 log.info('Config: ' + str(config))
1390
1391 # set up cluster context
1392 if not hasattr(ctx, 'ceph'):
1393 ctx.ceph = {}
1394 if 'cluster' not in config:
1395 config['cluster'] = 'ceph'
1396 cluster_name = config['cluster']
1397 if cluster_name not in ctx.ceph:
1398 ctx.ceph[cluster_name] = argparse.Namespace()
1399 ctx.ceph[cluster_name].bootstrapped = False
1400
1401 # image
1402 teuth_defaults = teuth_config.get('defaults', {})
1403 cephadm_defaults = teuth_defaults.get('cephadm', {})
1404 containers_defaults = cephadm_defaults.get('containers', {})
1405 container_image_name = containers_defaults.get('image', None)
1406
1407 containers = config.get('containers', {})
1408 container_image_name = containers.get('image', container_image_name)
1409
1410 if not hasattr(ctx.ceph[cluster_name], 'image'):
1411 ctx.ceph[cluster_name].image = config.get('image')
1412 ref = None
1413 if not ctx.ceph[cluster_name].image:
1414 if not container_image_name:
1415 raise Exception("Configuration error occurred. "
1416 "The 'image' value is undefined for 'cephadm' task. "
1417 "Please provide corresponding options in the task's "
1418 "config, task 'overrides', or teuthology 'defaults' "
1419 "section.")
1420 sha1 = config.get('sha1')
1421 flavor = config.get('flavor', 'default')
1422
1423 if sha1:
1424 if flavor == "crimson":
1425 ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor
1426 else:
1427 ctx.ceph[cluster_name].image = container_image_name + ':' + sha1
1428 ref = sha1
1429 else:
1430 # hmm, fall back to branch?
1431 branch = config.get('branch', 'master')
1432 ref = branch
1433 ctx.ceph[cluster_name].image = container_image_name + ':' + branch
1434 log.info('Cluster image is %s' % ctx.ceph[cluster_name].image)
1435
1436
1437 with contextutil.nested(
1438 #if the cluster is already bootstrapped bypass corresponding methods
1439 lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1440 else initialize_config(ctx=ctx, config=config),
1441 lambda: ceph_initial(),
1442 lambda: normalize_hostnames(ctx=ctx),
1443 lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1444 else download_cephadm(ctx=ctx, config=config, ref=ref),
1445 lambda: ceph_log(ctx=ctx, config=config),
1446 lambda: ceph_crash(ctx=ctx, config=config),
1447 lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1448 else ceph_bootstrap(ctx, config),
1449 lambda: crush_setup(ctx=ctx, config=config),
1450 lambda: ceph_mons(ctx=ctx, config=config),
1451 lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config),
1452 lambda: ceph_mgrs(ctx=ctx, config=config),
1453 lambda: ceph_osds(ctx=ctx, config=config),
1454 lambda: ceph_mdss(ctx=ctx, config=config),
1455 lambda: ceph_rgw(ctx=ctx, config=config),
1456 lambda: ceph_iscsi(ctx=ctx, config=config),
1457 lambda: ceph_monitoring('prometheus', ctx=ctx, config=config),
1458 lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config),
1459 lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config),
1460 lambda: ceph_monitoring('grafana', ctx=ctx, config=config),
1461 lambda: ceph_clients(ctx=ctx, config=config),
1462 lambda: create_rbd_pool(ctx=ctx, config=config),
1463 ):
1464 if not hasattr(ctx, 'managers'):
1465 ctx.managers = {}
1466 ctx.managers[cluster_name] = CephManager(
1467 ctx.ceph[cluster_name].bootstrap_remote,
1468 ctx=ctx,
1469 logger=log.getChild('ceph_manager.' + cluster_name),
1470 cluster=cluster_name,
1471 cephadm=True,
1472 )
1473
1474 try:
1475 if config.get('wait-for-healthy', True):
1476 healthy(ctx=ctx, config=config)
1477
1478 log.info('Setup complete, yielding')
1479 yield
1480
1481 finally:
1482 log.info('Teardown begin')
1483