2 Ceph cluster task, deployed via cephadm orchestrator
16 from io
import BytesIO
17 from six
import StringIO
18 from tarfile
import ReadError
19 from tasks
.ceph_manager
import CephManager
20 from teuthology
import misc
as teuthology
21 from teuthology
import contextutil
22 from teuthology
.orchestra
import run
23 from teuthology
.orchestra
.daemon
import DaemonGroup
24 from teuthology
.config
import config
as teuth_config
26 # these items we use from ceph.py should probably eventually move elsewhere
27 from tasks
.ceph
import get_mons
, healthy
29 CEPH_ROLE_TYPES
= ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
31 log
= logging
.getLogger(__name__
)
34 def _shell(ctx
, cluster_name
, remote
, args
, extra_cephadm_args
=[], **kwargs
):
35 testdir
= teuthology
.get_testdir(ctx
)
40 '--image', ctx
.ceph
[cluster_name
].image
,
42 '-c', '/etc/ceph/{}.conf'.format(cluster_name
),
43 '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
44 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
45 ] + extra_cephadm_args
+ [
51 def build_initial_config(ctx
, config
):
52 cluster_name
= config
['cluster']
54 path
= os
.path
.join(os
.path
.dirname(__file__
), 'cephadm.conf')
55 conf
= configobj
.ConfigObj(path
, file_error
=True)
57 conf
.setdefault('global', {})
58 conf
['global']['fsid'] = ctx
.ceph
[cluster_name
].fsid
61 for section
, keys
in config
.get('conf',{}).items():
62 for key
, value
in keys
.items():
63 log
.info(" override: [%s] %s = %s" % (section
, key
, value
))
64 if section
not in conf
:
66 conf
[section
][key
] = value
70 @contextlib.contextmanager
71 def normalize_hostnames(ctx
):
73 Ensure we have short hostnames throughout, for consistency between
74 remote.shortname and socket.gethostname() in cephadm.
76 log
.info('Normalizing hostnames...')
77 ctx
.cluster
.run(args
=[
80 run
.Raw('$(hostname -s)'),
88 @contextlib.contextmanager
89 def download_cephadm(ctx
, config
, ref
):
90 cluster_name
= config
['cluster']
92 if config
.get('cephadm_mode') != 'cephadm-package':
93 ref
= config
.get('cephadm_branch', ref
)
94 git_url
= teuth_config
.get_ceph_git_url()
95 log
.info('Downloading cephadm (repo %s ref %s)...' % (git_url
, ref
))
96 if git_url
.startswith('https://github.com/'):
97 # git archive doesn't like https:// URLs, which we use with github.
98 rest
= git_url
.split('https://github.com/', 1)[1]
99 rest
= re
.sub(r
'\.git/?$', '', rest
).strip() # no .git suffix
103 'https://raw.githubusercontent.com/' + rest
+ '/' + ref
+ '/src/cephadm/cephadm',
115 '--remote=' + git_url
,
117 'src/cephadm/cephadm',
119 'tar', '-xO', 'src/cephadm/cephadm',
124 # sanity-check the resulting file and set executable bit
125 cephadm_file_size
= '$(stat -c%s {})'.format(ctx
.cephadm
)
128 'test', '-s', ctx
.cephadm
,
130 'test', run
.Raw(cephadm_file_size
), "-gt", run
.Raw('1000'),
132 'chmod', '+x', ctx
.cephadm
,
139 log
.info('Removing cluster...')
140 ctx
.cluster
.run(args
=[
144 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
148 if config
.get('cephadm_mode') == 'root':
149 log
.info('Removing cephadm ...')
158 @contextlib.contextmanager
159 def ceph_log(ctx
, config
):
160 cluster_name
= config
['cluster']
161 fsid
= ctx
.ceph
[cluster_name
].fsid
167 # we need to know this below
168 ctx
.summary
['success'] = False
172 log
.info('Checking cluster log for badness...')
173 def first_in_ceph_log(pattern
, excludes
):
175 Find the first occurrence of the pattern specified in the Ceph log,
176 Returns None if none found.
178 :param pattern: Pattern scanned for.
179 :param excludes: Patterns to ignore.
180 :return: First line of text (or None if not found)
185 '/var/log/ceph/{fsid}/ceph.log'.format(
189 for exclude
in excludes
:
190 args
.extend([run
.Raw('|'), 'egrep', '-v', exclude
])
192 run
.Raw('|'), 'head', '-n', '1',
194 r
= ctx
.ceph
[cluster_name
].bootstrap_remote
.run(
198 stdout
= r
.stdout
.getvalue()
203 if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
204 config
.get('log-whitelist')) is not None:
205 log
.warning('Found errors (ERR|WRN|SEC) in cluster log')
206 ctx
.summary
['success'] = False
207 # use the most severe problem as the failure reason
208 if 'failure_reason' not in ctx
.summary
:
209 for pattern
in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
210 match
= first_in_ceph_log(pattern
, config
['log-whitelist'])
211 if match
is not None:
212 ctx
.summary
['failure_reason'] = \
213 '"{match}" in cluster log'.format(
214 match
=match
.rstrip('\n'),
218 if ctx
.archive
is not None and \
219 not (ctx
.config
.get('archive-on-error') and ctx
.summary
['success']):
221 log
.info('Compressing logs...')
227 '/var/log/ceph', # all logs, not just for the cluster
244 log
.info('Archiving logs...')
245 path
= os
.path
.join(ctx
.archive
, 'remote')
250 for remote
in ctx
.cluster
.remotes
.keys():
251 sub
= os
.path
.join(path
, remote
.name
)
257 teuthology
.pull_directory(remote
, '/var/log/ceph', # everything
258 os
.path
.join(sub
, 'log'))
262 @contextlib.contextmanager
263 def ceph_crash(ctx
, config
):
265 Gather crash dumps from /var/lib/ceph/$fsid/crash
267 cluster_name
= config
['cluster']
268 fsid
= ctx
.ceph
[cluster_name
].fsid
274 if ctx
.archive
is not None:
275 log
.info('Archiving crash dumps...')
276 path
= os
.path
.join(ctx
.archive
, 'remote')
281 for remote
in ctx
.cluster
.remotes
.keys():
282 sub
= os
.path
.join(path
, remote
.name
)
288 teuthology
.pull_directory(remote
,
289 '/var/lib/ceph/%s/crash' % fsid
,
290 os
.path
.join(sub
, 'crash'))
294 @contextlib.contextmanager
295 def ceph_bootstrap(ctx
, config
, registry
):
297 Bootstrap ceph cluster, setup containers' registry mirror before
298 the bootstrap if the registry is provided.
300 :param ctx: the argparse.Namespace object
301 :param config: the config dict
302 :param registry: url to containers' mirror registry
304 cluster_name
= config
['cluster']
305 testdir
= teuthology
.get_testdir(ctx
)
306 fsid
= ctx
.ceph
[cluster_name
].fsid
308 bootstrap_remote
= ctx
.ceph
[cluster_name
].bootstrap_remote
309 first_mon
= ctx
.ceph
[cluster_name
].first_mon
310 first_mon_role
= ctx
.ceph
[cluster_name
].first_mon_role
311 mons
= ctx
.ceph
[cluster_name
].mons
313 ctx
.cluster
.run(args
=[
314 'sudo', 'mkdir', '-p', '/etc/ceph',
316 ctx
.cluster
.run(args
=[
317 'sudo', 'chmod', '777', '/etc/ceph',
320 add_mirror_to_cluster(ctx
, registry
)
323 log
.info('Writing seed config...')
325 seed_config
= build_initial_config(ctx
, config
)
326 seed_config
.write(conf_fp
)
327 teuthology
.write_file(
328 remote
=bootstrap_remote
,
329 path
='{}/seed.{}.conf'.format(testdir
, cluster_name
),
330 data
=conf_fp
.getvalue())
331 log
.debug('Final config:\n' + conf_fp
.getvalue().decode())
332 ctx
.ceph
[cluster_name
].conf
= seed_config
334 # register initial daemons
335 ctx
.daemons
.register_daemon(
336 bootstrap_remote
, 'mon', first_mon
,
337 cluster
=cluster_name
,
339 logger
=log
.getChild('mon.' + first_mon
),
343 if not ctx
.ceph
[cluster_name
].roleless
:
344 first_mgr
= ctx
.ceph
[cluster_name
].first_mgr
345 ctx
.daemons
.register_daemon(
346 bootstrap_remote
, 'mgr', first_mgr
,
347 cluster
=cluster_name
,
349 logger
=log
.getChild('mgr.' + first_mgr
),
355 log
.info('Bootstrapping...')
359 '--image', ctx
.ceph
[cluster_name
].image
,
363 '--config', '{}/seed.{}.conf'.format(testdir
, cluster_name
),
364 '--output-config', '/etc/ceph/{}.conf'.format(cluster_name
),
366 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
367 '--output-pub-ssh-key', '{}/{}.pub'.format(testdir
, cluster_name
),
369 if not ctx
.ceph
[cluster_name
].roleless
:
371 '--mon-id', first_mon
,
372 '--mgr-id', first_mgr
,
373 '--orphan-initial-daemons', # we will do it explicitly!
374 '--skip-monitoring-stack', # we'll provision these explicitly
376 if mons
[first_mon_role
].startswith('['):
377 cmd
+= ['--mon-addrv', mons
[first_mon_role
]]
379 cmd
+= ['--mon-ip', mons
[first_mon_role
]]
380 if config
.get('skip_dashboard'):
381 cmd
+= ['--skip-dashboard']
382 # bootstrap makes the keyring root 0600, so +r it for our purposes
385 'sudo', 'chmod', '+r',
386 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
388 bootstrap_remote
.run(args
=cmd
)
390 # fetch keys and configs
391 log
.info('Fetching config...')
392 ctx
.ceph
[cluster_name
].config_file
= teuthology
.get_file(
393 remote
=bootstrap_remote
,
394 path
='/etc/ceph/{}.conf'.format(cluster_name
))
395 log
.info('Fetching client.admin keyring...')
396 ctx
.ceph
[cluster_name
].admin_keyring
= teuthology
.get_file(
397 remote
=bootstrap_remote
,
398 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
))
399 log
.info('Fetching mon keyring...')
400 ctx
.ceph
[cluster_name
].mon_keyring
= teuthology
.get_file(
401 remote
=bootstrap_remote
,
402 path
='/var/lib/ceph/%s/mon.%s/keyring' % (fsid
, first_mon
),
405 # fetch ssh key, distribute to additional nodes
406 log
.info('Fetching pub ssh key...')
407 ssh_pub_key
= teuthology
.get_file(
408 remote
=bootstrap_remote
,
409 path
='{}/{}.pub'.format(testdir
, cluster_name
)
410 ).decode('ascii').strip()
412 log
.info('Installing pub ssh key for root users...')
413 ctx
.cluster
.run(args
=[
414 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
418 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
420 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
424 _shell(ctx
, cluster_name
, bootstrap_remote
,
425 ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
428 for remote
in ctx
.cluster
.remotes
.keys():
429 if remote
== bootstrap_remote
:
431 log
.info('Writing (initial) conf and keyring to %s' % remote
.shortname
)
432 teuthology
.write_file(
434 path
='/etc/ceph/{}.conf'.format(cluster_name
),
435 data
=ctx
.ceph
[cluster_name
].config_file
)
436 teuthology
.write_file(
438 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
439 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
441 log
.info('Adding host %s to orchestrator...' % remote
.shortname
)
442 _shell(ctx
, cluster_name
, remote
, [
443 'ceph', 'orch', 'host', 'add',
446 r
= _shell(ctx
, cluster_name
, remote
,
447 ['ceph', 'orch', 'host', 'ls', '--format=json'],
449 hosts
= [node
['hostname'] for node
in json
.loads(r
.stdout
.getvalue())]
450 assert remote
.shortname
in hosts
455 log
.info('Cleaning up testdir ceph.* files...')
456 ctx
.cluster
.run(args
=[
458 '{}/seed.{}.conf'.format(testdir
, cluster_name
),
459 '{}/{}.pub'.format(testdir
, cluster_name
),
462 log
.info('Stopping all daemons...')
464 # this doesn't block until they are all stopped...
465 #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
467 # so, stop them individually
468 for role
in ctx
.daemons
.resolve_role_list(None, CEPH_ROLE_TYPES
, True):
469 cluster
, type_
, id_
= teuthology
.split_role(role
)
471 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
473 log
.exception('Failed to stop "{role}"'.format(role
=role
))
477 ctx
.cluster
.run(args
=[
479 '/etc/ceph/{}.conf'.format(cluster_name
),
480 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
483 @contextlib.contextmanager
484 def ceph_mons(ctx
, config
):
486 Deploy any additional mons
488 cluster_name
= config
['cluster']
489 fsid
= ctx
.ceph
[cluster_name
].fsid
493 for remote
, roles
in ctx
.cluster
.remotes
.items():
494 for mon
in [r
for r
in roles
495 if teuthology
.is_type('mon', cluster_name
)(r
)]:
496 c_
, _
, id_
= teuthology
.split_role(mon
)
497 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mon
:
499 log
.info('Adding %s on %s' % (mon
, remote
.shortname
))
501 _shell(ctx
, cluster_name
, remote
, [
502 'ceph', 'orch', 'daemon', 'add', 'mon',
503 remote
.shortname
+ ':' + ctx
.ceph
[cluster_name
].mons
[mon
] + '=' + id_
,
505 ctx
.daemons
.register_daemon(
507 cluster
=cluster_name
,
509 logger
=log
.getChild(mon
),
514 with contextutil
.safe_while(sleep
=1, tries
=180) as proceed
:
516 log
.info('Waiting for %d mons in monmap...' % (num_mons
))
519 cluster_name
=cluster_name
,
522 'ceph', 'mon', 'dump', '-f', 'json',
526 j
= json
.loads(r
.stdout
.getvalue())
527 if len(j
['mons']) == num_mons
:
530 # refresh our (final) ceph.conf file
531 log
.info('Generating final ceph.conf file...')
534 cluster_name
=cluster_name
,
537 'ceph', 'config', 'generate-minimal-conf',
541 ctx
.ceph
[cluster_name
].config_file
= r
.stdout
.getvalue()
548 @contextlib.contextmanager
549 def ceph_mgrs(ctx
, config
):
551 Deploy any additional mgrs
553 cluster_name
= config
['cluster']
554 fsid
= ctx
.ceph
[cluster_name
].fsid
559 for remote
, roles
in ctx
.cluster
.remotes
.items():
560 for mgr
in [r
for r
in roles
561 if teuthology
.is_type('mgr', cluster_name
)(r
)]:
562 c_
, _
, id_
= teuthology
.split_role(mgr
)
563 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mgr
:
565 log
.info('Adding %s on %s' % (mgr
, remote
.shortname
))
566 nodes
.append(remote
.shortname
+ '=' + id_
)
567 daemons
[mgr
] = (remote
, id_
)
569 _shell(ctx
, cluster_name
, remote
, [
570 'ceph', 'orch', 'apply', 'mgr',
571 str(len(nodes
) + 1) + ';' + ';'.join(nodes
)]
573 for mgr
, i
in daemons
.items():
575 ctx
.daemons
.register_daemon(
577 cluster
=cluster_name
,
579 logger
=log
.getChild(mgr
),
589 @contextlib.contextmanager
590 def ceph_osds(ctx
, config
):
594 cluster_name
= config
['cluster']
595 fsid
= ctx
.ceph
[cluster_name
].fsid
598 log
.info('Deploying OSDs...')
600 # provision OSDs in numeric order
603 for remote
, roles
in ctx
.cluster
.remotes
.items():
604 devs_by_remote
[remote
] = teuthology
.get_scratch_devices(remote
)
605 for osd
in [r
for r
in roles
606 if teuthology
.is_type('osd', cluster_name
)(r
)]:
607 _
, _
, id_
= teuthology
.split_role(osd
)
608 id_to_remote
[int(id_
)] = (osd
, remote
)
611 for osd_id
in sorted(id_to_remote
.keys()):
612 osd
, remote
= id_to_remote
[osd_id
]
613 _
, _
, id_
= teuthology
.split_role(osd
)
614 assert int(id_
) == cur
615 devs
= devs_by_remote
[remote
]
616 assert devs
## FIXME ##
618 if all(_
in dev
for _
in ('lv', 'vg')):
619 short_dev
= dev
.replace('/dev/', '')
622 log
.info('Deploying %s on %s with %s...' % (
623 osd
, remote
.shortname
, dev
))
624 _shell(ctx
, cluster_name
, remote
, [
625 'ceph-volume', 'lvm', 'zap', dev
])
626 _shell(ctx
, cluster_name
, remote
, [
627 'ceph', 'orch', 'daemon', 'add', 'osd',
628 remote
.shortname
+ ':' + short_dev
630 ctx
.daemons
.register_daemon(
632 cluster
=cluster_name
,
634 logger
=log
.getChild(osd
),
644 @contextlib.contextmanager
645 def ceph_mdss(ctx
, config
):
649 cluster_name
= config
['cluster']
650 fsid
= ctx
.ceph
[cluster_name
].fsid
654 for remote
, roles
in ctx
.cluster
.remotes
.items():
655 for role
in [r
for r
in roles
656 if teuthology
.is_type('mds', cluster_name
)(r
)]:
657 c_
, _
, id_
= teuthology
.split_role(role
)
658 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
659 nodes
.append(remote
.shortname
+ '=' + id_
)
660 daemons
[role
] = (remote
, id_
)
662 _shell(ctx
, cluster_name
, remote
, [
663 'ceph', 'orch', 'apply', 'mds',
665 str(len(nodes
)) + ';' + ';'.join(nodes
)]
667 for role
, i
in daemons
.items():
669 ctx
.daemons
.register_daemon(
671 cluster
=cluster_name
,
673 logger
=log
.getChild(role
),
680 @contextlib.contextmanager
681 def ceph_monitoring(daemon_type
, ctx
, config
):
683 Deploy prometheus, node-exporter, etc.
685 cluster_name
= config
['cluster']
686 fsid
= ctx
.ceph
[cluster_name
].fsid
690 for remote
, roles
in ctx
.cluster
.remotes
.items():
691 for role
in [r
for r
in roles
692 if teuthology
.is_type(daemon_type
, cluster_name
)(r
)]:
693 c_
, _
, id_
= teuthology
.split_role(role
)
694 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
695 nodes
.append(remote
.shortname
+ '=' + id_
)
696 daemons
[role
] = (remote
, id_
)
698 _shell(ctx
, cluster_name
, remote
, [
699 'ceph', 'orch', 'apply', daemon_type
,
700 str(len(nodes
)) + ';' + ';'.join(nodes
)]
702 for role
, i
in daemons
.items():
704 ctx
.daemons
.register_daemon(
705 remote
, daemon_type
, id_
,
706 cluster
=cluster_name
,
708 logger
=log
.getChild(role
),
715 @contextlib.contextmanager
716 def ceph_rgw(ctx
, config
):
720 cluster_name
= config
['cluster']
721 fsid
= ctx
.ceph
[cluster_name
].fsid
725 for remote
, roles
in ctx
.cluster
.remotes
.items():
726 for role
in [r
for r
in roles
727 if teuthology
.is_type('rgw', cluster_name
)(r
)]:
728 c_
, _
, id_
= teuthology
.split_role(role
)
729 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
730 realmzone
= '.'.join(id_
.split('.')[0:2])
731 if realmzone
not in nodes
:
732 nodes
[realmzone
] = []
733 nodes
[realmzone
].append(remote
.shortname
+ '=' + id_
)
734 daemons
[role
] = (remote
, id_
)
736 for realmzone
in nodes
.keys():
737 (realm
, zone
) = realmzone
.split('.', 1)
739 # TODO: those should be moved to mgr/cephadm
740 _shell(ctx
, cluster_name
, remote
,
741 ['radosgw-admin', 'realm', 'create', '--rgw-realm', realm
, '--default']
743 _shell(ctx
, cluster_name
, remote
,
744 ['radosgw-admin', 'zonegroup', 'create', '--rgw-zonegroup=default', '--master', '--default']
746 _shell(ctx
, cluster_name
, remote
,
747 ['radosgw-admin', 'zone', 'create', '--rgw-zonegroup=default', '--rgw-zone', zone
, '--master', '--default']
750 for realmzone
, nodes
in nodes
.items():
751 (realm
, zone
) = realmzone
.split('.', 1)
752 _shell(ctx
, cluster_name
, remote
, [
753 'ceph', 'orch', 'apply', 'rgw', realm
, zone
,
755 str(len(nodes
)) + ';' + ';'.join(nodes
)]
757 for role
, i
in daemons
.items():
759 ctx
.daemons
.register_daemon(
761 cluster
=cluster_name
,
763 logger
=log
.getChild(role
),
770 @contextlib.contextmanager
771 def ceph_clients(ctx
, config
):
772 cluster_name
= config
['cluster']
773 testdir
= teuthology
.get_testdir(ctx
)
775 log
.info('Setting up client nodes...')
776 clients
= ctx
.cluster
.only(teuthology
.is_type('client', cluster_name
))
777 testdir
= teuthology
.get_testdir(ctx
)
778 coverage_dir
= '{tdir}/archive/coverage'.format(tdir
=testdir
)
779 for remote
, roles_for_host
in clients
.remotes
.items():
780 for role
in teuthology
.cluster_roles_of_type(roles_for_host
, 'client',
782 name
= teuthology
.ceph_role(role
)
783 client_keyring
= '/etc/ceph/{0}.{1}.keyring'.format(cluster_name
,
787 cluster_name
=cluster_name
,
791 'get-or-create', name
,
799 keyring
= r
.stdout
.getvalue()
800 teuthology
.sudo_write_file(
808 @contextlib.contextmanager
813 log
.info('Teardown complete')
816 @contextlib.contextmanager
817 def stop(ctx
, config
):
826 - ceph.stop: [osd.0, osd.2]
830 daemons: [osd.0, osd.2]
835 elif isinstance(config
, list):
836 config
= {'daemons': config
}
838 daemons
= ctx
.daemons
.resolve_role_list(
839 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
843 cluster
, type_
, id_
= teuthology
.split_role(role
)
844 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
845 clusters
.add(cluster
)
847 # for cluster in clusters:
848 # ctx.ceph[cluster].watchdog.stop()
849 # ctx.ceph[cluster].watchdog.join()
853 def shell(ctx
, config
):
855 Execute (shell) commands
857 cluster_name
= config
.get('cluster', 'ceph')
861 for k
in config
['env']:
862 env
.extend(['-e', k
+ '=' + ctx
.config
.get(k
, '')])
865 if 'all' in config
and len(config
) == 1:
867 roles
= teuthology
.all_roles(ctx
.cluster
)
868 config
= dict((id_
, a
) for id_
in roles
)
870 for role
, ls
in config
.items():
871 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
872 log
.info('Running commands on role %s host %s', role
, remote
.name
)
874 _shell(ctx
, cluster_name
, remote
,
876 extra_cephadm_args
=env
)
878 @contextlib.contextmanager
879 def tweaked_option(ctx
, config
):
881 set an option, and then restore it with its original value
883 Note, due to the way how tasks are executed/nested, it's not suggested to
884 use this method as a standalone task. otherwise, it's likely that it will
885 restore the tweaked option at the /end/ of 'tasks' block.
888 # we can complicate this when necessary
889 options
= ['mon-health-to-clog']
890 type_
, id_
= 'mon', '*'
891 cluster
= config
.get('cluster', 'ceph')
892 manager
= ctx
.managers
[cluster
]
894 get_from
= next(teuthology
.all_roles_of_type(ctx
.cluster
, type_
))
897 for option
in options
:
898 if option
not in config
:
900 value
= 'true' if config
[option
] else 'false'
901 option
= option
.replace('-', '_')
902 old_value
= manager
.get_config(type_
, get_from
, option
)
903 if value
!= old_value
:
904 saved_options
[option
] = old_value
905 manager
.inject_args(type_
, id_
, option
, value
)
907 for option
, value
in saved_options
.items():
908 manager
.inject_args(type_
, id_
, option
, value
)
910 @contextlib.contextmanager
911 def restart(ctx
, config
):
917 - ceph.restart: [all]
921 - ceph.restart: [osd.0, mon.1, mds.*]
927 daemons: [osd.0, mon.1]
928 wait-for-healthy: false
929 wait-for-osds-up: true
932 :param config: Configuration
936 elif isinstance(config
, list):
937 config
= {'daemons': config
}
939 daemons
= ctx
.daemons
.resolve_role_list(
940 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
943 log
.info('daemons %s' % daemons
)
944 with
tweaked_option(ctx
, config
):
946 cluster
, type_
, id_
= teuthology
.split_role(role
)
947 d
= ctx
.daemons
.get_daemon(type_
, id_
, cluster
)
948 assert d
, 'daemon %s does not exist' % role
951 ctx
.managers
[cluster
].mark_down_osd(id_
)
953 clusters
.add(cluster
)
955 if config
.get('wait-for-healthy', True):
956 for cluster
in clusters
:
957 healthy(ctx
=ctx
, config
=dict(cluster
=cluster
))
958 if config
.get('wait-for-osds-up', False):
959 for cluster
in clusters
:
960 ctx
.managers
[cluster
].wait_for_all_osds_up()
963 @contextlib.contextmanager
964 def distribute_config_and_admin_keyring(ctx
, config
):
966 Distribute a sufficient config and keyring for clients
968 cluster_name
= config
['cluster']
969 log
.info('Distributing (final) config and client.admin keyring...')
970 for remote
, roles
in ctx
.cluster
.remotes
.items():
971 teuthology
.sudo_write_file(
973 path
='/etc/ceph/{}.conf'.format(cluster_name
),
974 data
=ctx
.ceph
[cluster_name
].config_file
)
975 teuthology
.sudo_write_file(
977 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
978 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
982 ctx
.cluster
.run(args
=[
984 '/etc/ceph/{}.conf'.format(cluster_name
),
985 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
988 @contextlib.contextmanager
989 def crush_setup(ctx
, config
):
990 cluster_name
= config
['cluster']
992 profile
= config
.get('crush_tunables', 'default')
993 log
.info('Setting crush tunables to %s', profile
)
994 _shell(ctx
, cluster_name
, ctx
.ceph
[cluster_name
].bootstrap_remote
,
995 args
=['ceph', 'osd', 'crush', 'tunables', profile
])
998 @contextlib.contextmanager
1002 @contextlib.contextmanager
1003 def initialize_config(ctx
, config
):
1004 cluster_name
= config
['cluster']
1005 testdir
= teuthology
.get_testdir(ctx
)
1007 ctx
.ceph
[cluster_name
].thrashers
= []
1008 # fixme: setup watchdog, ala ceph.py
1010 ctx
.ceph
[cluster_name
].roleless
= False # see below
1012 first_ceph_cluster
= False
1013 if not hasattr(ctx
, 'daemons'):
1014 first_ceph_cluster
= True
1017 if 'cephadm_mode' not in config
:
1018 config
['cephadm_mode'] = 'root'
1019 assert config
['cephadm_mode'] in ['root', 'cephadm-package']
1020 if config
['cephadm_mode'] == 'root':
1021 ctx
.cephadm
= testdir
+ '/cephadm'
1023 ctx
.cephadm
= 'cephadm' # in the path
1025 if first_ceph_cluster
:
1026 # FIXME: this is global for all clusters
1027 ctx
.daemons
= DaemonGroup(
1028 use_cephadm
=ctx
.cephadm
)
1031 fsid
= str(uuid
.uuid1())
1032 log
.info('Cluster fsid is %s' % fsid
)
1033 ctx
.ceph
[cluster_name
].fsid
= fsid
1036 log
.info('Choosing monitor IPs and ports...')
1037 remotes_and_roles
= ctx
.cluster
.remotes
.items()
1038 ips
= [host
for (host
, port
) in
1039 (remote
.ssh
.get_transport().getpeername() for (remote
, role_list
) in remotes_and_roles
)]
1041 if config
.get('roleless', False):
1042 # mons will be named after hosts
1044 for remote
, _
in remotes_and_roles
:
1045 ctx
.cluster
.remotes
[remote
].append('mon.' + remote
.shortname
)
1047 first_mon
= remote
.shortname
1048 bootstrap_remote
= remote
1049 log
.info('No mon roles; fabricating mons')
1051 roles
= [role_list
for (remote
, role_list
) in ctx
.cluster
.remotes
.items()]
1053 ctx
.ceph
[cluster_name
].mons
= get_mons(
1054 roles
, ips
, cluster_name
,
1055 mon_bind_msgr2
=config
.get('mon_bind_msgr2', True),
1056 mon_bind_addrvec
=config
.get('mon_bind_addrvec', True),
1058 log
.info('Monitor IPs: %s' % ctx
.ceph
[cluster_name
].mons
)
1060 if config
.get('roleless', False):
1061 ctx
.ceph
[cluster_name
].roleless
= True
1062 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1063 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1064 ctx
.ceph
[cluster_name
].first_mon_role
= 'mon.' + first_mon
1066 first_mon_role
= sorted(ctx
.ceph
[cluster_name
].mons
.keys())[0]
1067 _
, _
, first_mon
= teuthology
.split_role(first_mon_role
)
1068 (bootstrap_remote
,) = ctx
.cluster
.only(first_mon_role
).remotes
.keys()
1069 log
.info('First mon is mon.%s on %s' % (first_mon
,
1070 bootstrap_remote
.shortname
))
1071 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1072 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1073 ctx
.ceph
[cluster_name
].first_mon_role
= first_mon_role
1075 others
= ctx
.cluster
.remotes
[bootstrap_remote
]
1076 mgrs
= sorted([r
for r
in others
1077 if teuthology
.is_type('mgr', cluster_name
)(r
)])
1079 raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon
)
1080 _
, _
, first_mgr
= teuthology
.split_role(mgrs
[0])
1081 log
.info('First mgr is %s' % (first_mgr
))
1082 ctx
.ceph
[cluster_name
].first_mgr
= first_mgr
1085 @contextlib.contextmanager
1086 def task(ctx
, config
):
1088 Deploy ceph cluster using cephadm
1090 Setup containers' mirrors before the bootstrap, if corresponding
1091 config provided in teuthology server config yaml file.
1093 For example, teuthology.yaml can contain the 'defaults' section:
1099 docker.io: 'registry.mirror.example.com:5000'
1100 image: 'quay.io/ceph-ci/ceph'
1102 Using overrides makes it possible to customize it per run.
1103 The equivalent 'overrides' section looks like:
1109 docker.io: 'registry.mirror.example.com:5000'
1110 image: 'quay.io/ceph-ci/ceph'
1112 :param ctx: the argparse.Namespace object
1113 :param config: the config dict
1118 assert isinstance(config
, dict), \
1119 "task only supports a dictionary for configuration"
1121 overrides
= ctx
.config
.get('overrides', {})
1122 teuthology
.deep_merge(config
, overrides
.get('ceph', {}))
1123 teuthology
.deep_merge(config
, overrides
.get('cephadm', {}))
1124 log
.info('Config: ' + str(config
))
1126 testdir
= teuthology
.get_testdir(ctx
)
1128 # set up cluster context
1129 if not hasattr(ctx
, 'ceph'):
1132 if 'cluster' not in config
:
1133 config
['cluster'] = 'ceph'
1134 cluster_name
= config
['cluster']
1135 if cluster_name
not in ctx
.ceph
:
1136 ctx
.ceph
[cluster_name
] = argparse
.Namespace()
1137 ctx
.ceph
[cluster_name
].bootstrapped
= False
1140 teuth_defaults
= teuth_config
.get('defaults', {})
1141 cephadm_defaults
= teuth_defaults
.get('cephadm', {})
1142 containers_defaults
= cephadm_defaults
.get('containers', {})
1143 mirrors_defaults
= containers_defaults
.get('registry_mirrors', {})
1144 container_registry_mirror
= mirrors_defaults
.get('docker.io', None)
1145 container_image_name
= containers_defaults
.get('image', None)
1147 containers
= config
.get('containers', {})
1148 mirrors
= containers
.get('registry_mirrors', {})
1149 container_image_name
= containers
.get('image', container_image_name
)
1150 container_registry_mirror
= mirrors
.get('docker.io',
1151 container_registry_mirror
)
1153 if not container_image_name
:
1154 raise Exception("Configuration error occurred. "
1155 "The 'image' value is undefined for 'cephadm' task. "
1156 "Please provide corresponding options in the task's "
1157 "config, task 'overrides', or teuthology 'defaults' "
1160 if not hasattr(ctx
.ceph
[cluster_name
], 'image'):
1161 ctx
.ceph
[cluster_name
].image
= config
.get('image')
1163 if not ctx
.ceph
[cluster_name
].image
:
1164 sha1
= config
.get('sha1')
1166 ctx
.ceph
[cluster_name
].image
= container_image_name
+ ':' + sha1
1169 # hmm, fall back to branch?
1170 branch
= config
.get('branch', 'master')
1172 ctx
.ceph
[cluster_name
].image
= container_image_name
+ ':' + branch
1173 log
.info('Cluster image is %s' % ctx
.ceph
[cluster_name
].image
)
1176 with contextutil
.nested(
1177 #if the cluster is already bootstrapped bypass corresponding methods
1178 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1179 else initialize_config(ctx
=ctx
, config
=config
),
1180 lambda: ceph_initial(),
1181 lambda: normalize_hostnames(ctx
=ctx
),
1182 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1183 else download_cephadm(ctx
=ctx
, config
=config
, ref
=ref
),
1184 lambda: ceph_log(ctx
=ctx
, config
=config
),
1185 lambda: ceph_crash(ctx
=ctx
, config
=config
),
1186 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1187 else ceph_bootstrap(ctx
, config
,
1188 container_registry_mirror
),
1189 lambda: crush_setup(ctx
=ctx
, config
=config
),
1190 lambda: ceph_mons(ctx
=ctx
, config
=config
),
1191 lambda: distribute_config_and_admin_keyring(ctx
=ctx
, config
=config
),
1192 lambda: ceph_mgrs(ctx
=ctx
, config
=config
),
1193 lambda: ceph_osds(ctx
=ctx
, config
=config
),
1194 lambda: ceph_mdss(ctx
=ctx
, config
=config
),
1195 lambda: ceph_rgw(ctx
=ctx
, config
=config
),
1196 lambda: ceph_monitoring('prometheus', ctx
=ctx
, config
=config
),
1197 lambda: ceph_monitoring('node-exporter', ctx
=ctx
, config
=config
),
1198 lambda: ceph_monitoring('alertmanager', ctx
=ctx
, config
=config
),
1199 lambda: ceph_monitoring('grafana', ctx
=ctx
, config
=config
),
1200 lambda: ceph_clients(ctx
=ctx
, config
=config
),
1202 ctx
.managers
[cluster_name
] = CephManager(
1203 ctx
.ceph
[cluster_name
].bootstrap_remote
,
1205 logger
=log
.getChild('ceph_manager.' + cluster_name
),
1206 cluster
=cluster_name
,
1211 if config
.get('wait-for-healthy', True):
1212 healthy(ctx
=ctx
, config
=config
)
1214 log
.info('Setup complete, yielding')
1218 log
.info('Teardown begin')
1221 def registries_add_mirror_to_docker_io(conf
, mirror
):
1222 config
= toml
.loads(conf
)
1223 is_v1
= 'registries' in config
1225 search
= config
.get('registries', {}).get('search', {}).get('registries', [])
1226 insecure
= config
.get('registries', {}).get('search', {}).get('insecure', [])
1227 # v2: MutableMapping[str, Any] = { needs Python 3
1229 'unqualified-search-registries': search
,
1234 'insecure': reg
in insecure
,
1240 v2
= config
# type: ignore
1241 dockers
= [r
for r
in v2
['registry'] if r
['prefix'] == 'docker.io']
1244 docker
['mirror'] = [{
1251 def add_mirror_to_cluster(ctx
, mirror
):
1252 log
.info('Adding local image mirror %s' % mirror
)
1254 registries_conf
= '/etc/containers/registries.conf'
1256 for remote
in ctx
.cluster
.remotes
.keys():
1258 config
= teuthology
.get_file(
1260 path
=registries_conf
1262 new_config
= toml
.dumps(registries_add_mirror_to_docker_io(config
.decode('utf-8'), mirror
))
1264 teuthology
.sudo_write_file(
1266 path
=registries_conf
,
1267 data
=six
.ensure_str(new_config
),
1269 except IOError as e
: # py3: use FileNotFoundError instead.
1270 if e
.errno
!= errno
.ENOENT
:
1273 # Docker doesn't ship a registries.conf
1274 log
.info('Failed to add mirror: %s' % str(e
))