]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephadm.py
2 Ceph cluster task, deployed via cephadm orchestrator
15 from ceph_manager
import CephManager
16 from tarfile
import ReadError
17 from teuthology
import misc
as teuthology
18 from teuthology
import contextutil
19 from teuthology
.orchestra
import run
20 from teuthology
.orchestra
.daemon
import DaemonGroup
21 from teuthology
.config
import config
as teuth_config
23 # these items we use from ceph.py should probably eventually move elsewhere
24 from tasks
.ceph
import get_mons
, healthy
26 CEPH_ROLE_TYPES
= ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
28 log
= logging
.getLogger(__name__
)
31 def _shell(ctx
, cluster_name
, remote
, args
, extra_cephadm_args
=[], **kwargs
):
32 testdir
= teuthology
.get_testdir(ctx
)
37 '--image', ctx
.ceph
[cluster_name
].image
,
39 '-c', '/etc/ceph/{}.conf'.format(cluster_name
),
40 '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
41 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
42 ] + extra_cephadm_args
+ [
48 def build_initial_config(ctx
, config
):
49 cluster_name
= config
['cluster']
51 path
= os
.path
.join(os
.path
.dirname(__file__
), 'cephadm.conf')
52 conf
= configobj
.ConfigObj(path
, file_error
=True)
54 conf
.setdefault('global', {})
55 conf
['global']['fsid'] = ctx
.ceph
[cluster_name
].fsid
58 for section
, keys
in config
.get('conf',{}).items():
59 for key
, value
in keys
.items():
60 log
.info(" override: [%s] %s = %s" % (section
, key
, value
))
61 if section
not in conf
:
63 conf
[section
][key
] = value
67 @contextlib.contextmanager
68 def normalize_hostnames(ctx
):
70 Ensure we have short hostnames throughout, for consistency between
71 remote.shortname and socket.gethostname() in cephadm.
73 log
.info('Normalizing hostnames...')
74 ctx
.cluster
.run(args
=[
77 run
.Raw('$(hostname -s)'),
85 @contextlib.contextmanager
86 def download_cephadm(ctx
, config
, ref
):
87 cluster_name
= config
['cluster']
89 if config
.get('cephadm_mode') != 'cephadm-package':
90 ref
= config
.get('cephadm_branch', ref
)
91 git_url
= teuth_config
.get_ceph_git_url()
92 log
.info('Downloading cephadm (repo %s ref %s)...' % (git_url
, ref
))
93 if git_url
.startswith('https://github.com/'):
94 # git archive doesn't like https:// URLs, which we use with github.
95 rest
= git_url
.split('https://github.com/', 1)[1]
96 rest
= re
.sub(r
'\.git/?$', '', rest
).strip() # no .git suffix
100 'https://raw.githubusercontent.com/' + rest
+ '/' + ref
+ '/src/cephadm/cephadm',
112 '--remote=' + git_url
,
114 'src/cephadm/cephadm',
116 'tar', '-xO', 'src/cephadm/cephadm',
121 # sanity-check the resulting file and set executable bit
122 cephadm_file_size
= '$(stat -c%s {})'.format(ctx
.cephadm
)
125 'test', '-s', ctx
.cephadm
,
127 'test', run
.Raw(cephadm_file_size
), "-gt", run
.Raw('1000'),
129 'chmod', '+x', ctx
.cephadm
,
136 log
.info('Removing cluster...')
137 ctx
.cluster
.run(args
=[
141 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
145 if config
.get('cephadm_mode') == 'root':
146 log
.info('Removing cephadm ...')
155 @contextlib.contextmanager
156 def ceph_log(ctx
, config
):
157 cluster_name
= config
['cluster']
158 fsid
= ctx
.ceph
[cluster_name
].fsid
164 # we need to know this below
165 ctx
.summary
['success'] = False
169 log
.info('Checking cluster log for badness...')
170 def first_in_ceph_log(pattern
, excludes
):
172 Find the first occurrence of the pattern specified in the Ceph log,
173 Returns None if none found.
175 :param pattern: Pattern scanned for.
176 :param excludes: Patterns to ignore.
177 :return: First line of text (or None if not found)
182 '/var/log/ceph/{fsid}/ceph.log'.format(
186 for exclude
in excludes
:
187 args
.extend([run
.Raw('|'), 'egrep', '-v', exclude
])
189 run
.Raw('|'), 'head', '-n', '1',
191 r
= ctx
.ceph
[cluster_name
].bootstrap_remote
.run(
195 stdout
= r
.stdout
.getvalue()
200 if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
201 config
.get('log-whitelist')) is not None:
202 log
.warning('Found errors (ERR|WRN|SEC) in cluster log')
203 ctx
.summary
['success'] = False
204 # use the most severe problem as the failure reason
205 if 'failure_reason' not in ctx
.summary
:
206 for pattern
in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
207 match
= first_in_ceph_log(pattern
, config
['log-whitelist'])
208 if match
is not None:
209 ctx
.summary
['failure_reason'] = \
210 '"{match}" in cluster log'.format(
211 match
=match
.rstrip('\n'),
215 if ctx
.archive
is not None and \
216 not (ctx
.config
.get('archive-on-error') and ctx
.summary
['success']):
218 log
.info('Compressing logs...')
224 '/var/log/ceph', # all logs, not just for the cluster
241 log
.info('Archiving logs...')
242 path
= os
.path
.join(ctx
.archive
, 'remote')
247 for remote
in ctx
.cluster
.remotes
.keys():
248 sub
= os
.path
.join(path
, remote
.name
)
253 teuthology
.pull_directory(remote
, '/var/log/ceph', # everything
254 os
.path
.join(sub
, 'log'))
256 @contextlib.contextmanager
257 def ceph_crash(ctx
, config
):
259 Gather crash dumps from /var/lib/ceph/$fsid/crash
261 cluster_name
= config
['cluster']
262 fsid
= ctx
.ceph
[cluster_name
].fsid
268 if ctx
.archive
is not None:
269 log
.info('Archiving crash dumps...')
270 path
= os
.path
.join(ctx
.archive
, 'remote')
275 for remote
in ctx
.cluster
.remotes
.keys():
276 sub
= os
.path
.join(path
, remote
.name
)
282 teuthology
.pull_directory(remote
,
283 '/var/lib/ceph/%s/crash' % fsid
,
284 os
.path
.join(sub
, 'crash'))
288 @contextlib.contextmanager
289 def ceph_bootstrap(ctx
, config
):
290 cluster_name
= config
['cluster']
291 testdir
= teuthology
.get_testdir(ctx
)
292 fsid
= ctx
.ceph
[cluster_name
].fsid
294 bootstrap_remote
= ctx
.ceph
[cluster_name
].bootstrap_remote
295 first_mon
= ctx
.ceph
[cluster_name
].first_mon
296 first_mon_role
= ctx
.ceph
[cluster_name
].first_mon_role
297 mons
= ctx
.ceph
[cluster_name
].mons
299 ctx
.cluster
.run(args
=[
300 'sudo', 'mkdir', '-p', '/etc/ceph',
302 ctx
.cluster
.run(args
=[
303 'sudo', 'chmod', '777', '/etc/ceph',
307 log
.info('Writing seed config...')
309 seed_config
= build_initial_config(ctx
, config
)
310 seed_config
.write(conf_fp
)
311 teuthology
.write_file(
312 remote
=bootstrap_remote
,
313 path
='{}/seed.{}.conf'.format(testdir
, cluster_name
),
314 data
=conf_fp
.getvalue())
315 log
.debug('Final config:\n' + conf_fp
.getvalue())
316 ctx
.ceph
[cluster_name
].conf
= seed_config
318 # register initial daemons
319 ctx
.daemons
.register_daemon(
320 bootstrap_remote
, 'mon', first_mon
,
321 cluster
=cluster_name
,
323 logger
=log
.getChild('mon.' + first_mon
),
327 if not ctx
.ceph
[cluster_name
].roleless
:
328 first_mgr
= ctx
.ceph
[cluster_name
].first_mgr
329 ctx
.daemons
.register_daemon(
330 bootstrap_remote
, 'mgr', first_mgr
,
331 cluster
=cluster_name
,
333 logger
=log
.getChild('mgr.' + first_mgr
),
339 log
.info('Bootstrapping...')
343 '--image', ctx
.ceph
[cluster_name
].image
,
347 '--config', '{}/seed.{}.conf'.format(testdir
, cluster_name
),
348 '--output-config', '/etc/ceph/{}.conf'.format(cluster_name
),
350 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
351 '--output-pub-ssh-key', '{}/{}.pub'.format(testdir
, cluster_name
),
353 if not ctx
.ceph
[cluster_name
].roleless
:
355 '--mon-id', first_mon
,
356 '--mgr-id', first_mgr
,
357 '--orphan-initial-daemons', # we will do it explicitly!
358 '--skip-monitoring-stack', # we'll provision these explicitly
360 if mons
[first_mon_role
].startswith('['):
361 cmd
+= ['--mon-addrv', mons
[first_mon_role
]]
363 cmd
+= ['--mon-ip', mons
[first_mon_role
]]
364 if config
.get('skip_dashboard'):
365 cmd
+= ['--skip-dashboard']
366 # bootstrap makes the keyring root 0600, so +r it for our purposes
369 'sudo', 'chmod', '+r',
370 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
372 bootstrap_remote
.run(args
=cmd
)
374 # fetch keys and configs
375 log
.info('Fetching config...')
376 ctx
.ceph
[cluster_name
].config_file
= teuthology
.get_file(
377 remote
=bootstrap_remote
,
378 path
='/etc/ceph/{}.conf'.format(cluster_name
))
379 log
.info('Fetching client.admin keyring...')
380 ctx
.ceph
[cluster_name
].admin_keyring
= teuthology
.get_file(
381 remote
=bootstrap_remote
,
382 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
))
383 log
.info('Fetching mon keyring...')
384 ctx
.ceph
[cluster_name
].mon_keyring
= teuthology
.get_file(
385 remote
=bootstrap_remote
,
386 path
='/var/lib/ceph/%s/mon.%s/keyring' % (fsid
, first_mon
),
389 # fetch ssh key, distribute to additional nodes
390 log
.info('Fetching pub ssh key...')
391 ssh_pub_key
= teuthology
.get_file(
392 remote
=bootstrap_remote
,
393 path
='{}/{}.pub'.format(testdir
, cluster_name
)
396 log
.info('Installing pub ssh key for root users...')
397 ctx
.cluster
.run(args
=[
398 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
402 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
404 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
408 _shell(ctx
, cluster_name
, bootstrap_remote
,
409 ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
412 for remote
in ctx
.cluster
.remotes
.keys():
413 if remote
== bootstrap_remote
:
415 log
.info('Writing (initial) conf and keyring to %s' % remote
.shortname
)
416 teuthology
.write_file(
418 path
='/etc/ceph/{}.conf'.format(cluster_name
),
419 data
=ctx
.ceph
[cluster_name
].config_file
)
420 teuthology
.write_file(
422 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
423 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
425 log
.info('Adding host %s to orchestrator...' % remote
.shortname
)
426 _shell(ctx
, cluster_name
, remote
, [
427 'ceph', 'orch', 'host', 'add',
430 r
= _shell(ctx
, cluster_name
, remote
,
431 ['ceph', 'orch', 'host', 'ls', '--format=json'],
433 hosts
= [node
['hostname'] for node
in json
.loads(r
.stdout
.getvalue())]
434 assert remote
.shortname
in hosts
439 log
.info('Cleaning up testdir ceph.* files...')
440 ctx
.cluster
.run(args
=[
442 '{}/seed.{}.conf'.format(testdir
, cluster_name
),
443 '{}/{}.pub'.format(testdir
, cluster_name
),
446 log
.info('Stopping all daemons...')
448 # this doesn't block until they are all stopped...
449 #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
451 # so, stop them individually
452 for role
in ctx
.daemons
.resolve_role_list(None, CEPH_ROLE_TYPES
):
453 cluster
, type_
, id_
= teuthology
.split_role(role
)
454 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
457 ctx
.cluster
.run(args
=[
459 '/etc/ceph/{}.conf'.format(cluster_name
),
460 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
463 @contextlib.contextmanager
464 def ceph_mons(ctx
, config
):
466 Deploy any additional mons
468 cluster_name
= config
['cluster']
469 fsid
= ctx
.ceph
[cluster_name
].fsid
473 for remote
, roles
in ctx
.cluster
.remotes
.items():
474 for mon
in [r
for r
in roles
475 if teuthology
.is_type('mon', cluster_name
)(r
)]:
476 c_
, _
, id_
= teuthology
.split_role(mon
)
477 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mon
:
479 log
.info('Adding %s on %s' % (mon
, remote
.shortname
))
481 _shell(ctx
, cluster_name
, remote
, [
482 'ceph', 'orch', 'daemon', 'add', 'mon',
483 remote
.shortname
+ ':' + ctx
.ceph
[cluster_name
].mons
[mon
] + '=' + id_
,
485 ctx
.daemons
.register_daemon(
487 cluster
=cluster_name
,
489 logger
=log
.getChild(mon
),
494 with contextutil
.safe_while(sleep
=1, tries
=180) as proceed
:
496 log
.info('Waiting for %d mons in monmap...' % (num_mons
))
499 cluster_name
=cluster_name
,
502 'ceph', 'mon', 'dump', '-f', 'json',
506 j
= json
.loads(r
.stdout
.getvalue())
507 if len(j
['mons']) == num_mons
:
510 # refresh our (final) ceph.conf file
511 log
.info('Generating final ceph.conf file...')
514 cluster_name
=cluster_name
,
517 'ceph', 'config', 'generate-minimal-conf',
521 ctx
.ceph
[cluster_name
].config_file
= r
.stdout
.getvalue()
528 @contextlib.contextmanager
529 def ceph_mgrs(ctx
, config
):
531 Deploy any additional mgrs
533 cluster_name
= config
['cluster']
534 fsid
= ctx
.ceph
[cluster_name
].fsid
539 for remote
, roles
in ctx
.cluster
.remotes
.items():
540 for mgr
in [r
for r
in roles
541 if teuthology
.is_type('mgr', cluster_name
)(r
)]:
542 c_
, _
, id_
= teuthology
.split_role(mgr
)
543 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mgr
:
545 log
.info('Adding %s on %s' % (mgr
, remote
.shortname
))
546 nodes
.append(remote
.shortname
+ '=' + id_
)
547 daemons
[mgr
] = (remote
, id_
)
549 _shell(ctx
, cluster_name
, remote
, [
550 'ceph', 'orch', 'apply', 'mgr',
551 str(len(nodes
) + 1) + ';' + ';'.join(nodes
)]
553 for mgr
, i
in daemons
.items():
555 ctx
.daemons
.register_daemon(
557 cluster
=cluster_name
,
559 logger
=log
.getChild(mgr
),
569 @contextlib.contextmanager
570 def ceph_osds(ctx
, config
):
574 cluster_name
= config
['cluster']
575 fsid
= ctx
.ceph
[cluster_name
].fsid
578 log
.info('Deploying OSDs...')
580 # provision OSDs in numeric order
583 for remote
, roles
in ctx
.cluster
.remotes
.items():
584 devs_by_remote
[remote
] = teuthology
.get_scratch_devices(remote
)
585 for osd
in [r
for r
in roles
586 if teuthology
.is_type('osd', cluster_name
)(r
)]:
587 _
, _
, id_
= teuthology
.split_role(osd
)
588 id_to_remote
[int(id_
)] = (osd
, remote
)
591 for osd_id
in sorted(id_to_remote
.keys()):
592 osd
, remote
= id_to_remote
[osd_id
]
593 _
, _
, id_
= teuthology
.split_role(osd
)
594 assert int(id_
) == cur
595 devs
= devs_by_remote
[remote
]
596 assert devs
## FIXME ##
598 short_dev
= dev
.replace('/dev/', '')
599 log
.info('Deploying %s on %s with %s...' % (
600 osd
, remote
.shortname
, dev
))
601 _shell(ctx
, cluster_name
, remote
, [
602 'ceph-volume', 'lvm', 'zap', dev
])
603 _shell(ctx
, cluster_name
, remote
, [
604 'ceph', 'orch', 'daemon', 'add', 'osd',
605 remote
.shortname
+ ':' + short_dev
607 ctx
.daemons
.register_daemon(
609 cluster
=cluster_name
,
611 logger
=log
.getChild(osd
),
621 @contextlib.contextmanager
622 def ceph_mdss(ctx
, config
):
626 cluster_name
= config
['cluster']
627 fsid
= ctx
.ceph
[cluster_name
].fsid
631 for remote
, roles
in ctx
.cluster
.remotes
.items():
632 for role
in [r
for r
in roles
633 if teuthology
.is_type('mds', cluster_name
)(r
)]:
634 c_
, _
, id_
= teuthology
.split_role(role
)
635 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
636 nodes
.append(remote
.shortname
+ '=' + id_
)
637 daemons
[role
] = (remote
, id_
)
639 _shell(ctx
, cluster_name
, remote
, [
640 'ceph', 'orch', 'apply', 'mds',
642 str(len(nodes
)) + ';' + ';'.join(nodes
)]
644 for role
, i
in daemons
.items():
646 ctx
.daemons
.register_daemon(
648 cluster
=cluster_name
,
650 logger
=log
.getChild(role
),
657 @contextlib.contextmanager
658 def ceph_monitoring(daemon_type
, ctx
, config
):
660 Deploy prometheus, node-exporter, etc.
662 cluster_name
= config
['cluster']
663 fsid
= ctx
.ceph
[cluster_name
].fsid
667 for remote
, roles
in ctx
.cluster
.remotes
.items():
668 for role
in [r
for r
in roles
669 if teuthology
.is_type(daemon_type
, cluster_name
)(r
)]:
670 c_
, _
, id_
= teuthology
.split_role(role
)
671 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
672 nodes
.append(remote
.shortname
+ '=' + id_
)
673 daemons
[role
] = (remote
, id_
)
675 _shell(ctx
, cluster_name
, remote
, [
676 'ceph', 'orch', 'apply', daemon_type
,
677 str(len(nodes
)) + ';' + ';'.join(nodes
)]
679 for role
, i
in daemons
.items():
681 ctx
.daemons
.register_daemon(
682 remote
, daemon_type
, id_
,
683 cluster
=cluster_name
,
685 logger
=log
.getChild(role
),
692 @contextlib.contextmanager
693 def ceph_rgw(ctx
, config
):
697 cluster_name
= config
['cluster']
698 fsid
= ctx
.ceph
[cluster_name
].fsid
702 for remote
, roles
in ctx
.cluster
.remotes
.items():
703 for role
in [r
for r
in roles
704 if teuthology
.is_type('rgw', cluster_name
)(r
)]:
705 c_
, _
, id_
= teuthology
.split_role(role
)
706 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
707 realmzone
= '.'.join(id_
.split('.')[0:2])
708 if realmzone
not in nodes
:
709 nodes
[realmzone
] = []
710 nodes
[realmzone
].append(remote
.shortname
+ '=' + id_
)
711 daemons
[role
] = (remote
, id_
)
712 for realmzone
, nodes
in nodes
.items():
713 (realm
, zone
) = realmzone
.split('.', 1)
714 _shell(ctx
, cluster_name
, remote
, [
715 'ceph', 'orch', 'apply', 'rgw',
717 str(len(nodes
)) + ';' + ';'.join(nodes
)]
719 for role
, i
in daemons
.items():
721 ctx
.daemons
.register_daemon(
723 cluster
=cluster_name
,
725 logger
=log
.getChild(role
),
732 @contextlib.contextmanager
733 def ceph_clients(ctx
, config
):
734 cluster_name
= config
['cluster']
735 testdir
= teuthology
.get_testdir(ctx
)
737 log
.info('Setting up client nodes...')
738 clients
= ctx
.cluster
.only(teuthology
.is_type('client', cluster_name
))
739 testdir
= teuthology
.get_testdir(ctx
)
740 coverage_dir
= '{tdir}/archive/coverage'.format(tdir
=testdir
)
741 for remote
, roles_for_host
in clients
.remotes
.items():
742 for role
in teuthology
.cluster_roles_of_type(roles_for_host
, 'client',
744 name
= teuthology
.ceph_role(role
)
745 client_keyring
= '/etc/ceph/{0}.{1}.keyring'.format(cluster_name
,
749 cluster_name
=cluster_name
,
753 'get-or-create', name
,
761 keyring
= r
.stdout
.getvalue()
762 teuthology
.sudo_write_file(
770 @contextlib.contextmanager
775 log
.info('Teardown complete')
778 @contextlib.contextmanager
779 def stop(ctx
, config
):
788 - ceph.stop: [osd.0, osd.2]
792 daemons: [osd.0, osd.2]
797 elif isinstance(config
, list):
798 config
= {'daemons': config
}
800 daemons
= ctx
.daemons
.resolve_role_list(
801 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
805 cluster
, type_
, id_
= teuthology
.split_role(role
)
806 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
807 clusters
.add(cluster
)
809 # for cluster in clusters:
810 # ctx.ceph[cluster].watchdog.stop()
811 # ctx.ceph[cluster].watchdog.join()
815 def shell(ctx
, config
):
817 Execute (shell) commands
819 cluster_name
= config
.get('cluster', 'ceph')
823 for k
in config
['env']:
824 env
.extend(['-e', k
+ '=' + ctx
.config
.get(k
, '')])
827 if 'all' in config
and len(config
) == 1:
829 roles
= teuthology
.all_roles(ctx
.cluster
)
830 config
= dict((id_
, a
) for id_
in roles
)
832 for role
, ls
in config
.items():
833 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
834 log
.info('Running commands on role %s host %s', role
, remote
.name
)
836 _shell(ctx
, cluster_name
, remote
,
838 extra_cephadm_args
=env
)
840 @contextlib.contextmanager
841 def tweaked_option(ctx
, config
):
843 set an option, and then restore it with its original value
845 Note, due to the way how tasks are executed/nested, it's not suggested to
846 use this method as a standalone task. otherwise, it's likely that it will
847 restore the tweaked option at the /end/ of 'tasks' block.
850 # we can complicate this when necessary
851 options
= ['mon-health-to-clog']
852 type_
, id_
= 'mon', '*'
853 cluster
= config
.get('cluster', 'ceph')
854 manager
= ctx
.managers
[cluster
]
856 get_from
= next(teuthology
.all_roles_of_type(ctx
.cluster
, type_
))
859 for option
in options
:
860 if option
not in config
:
862 value
= 'true' if config
[option
] else 'false'
863 option
= option
.replace('-', '_')
864 old_value
= manager
.get_config(type_
, get_from
, option
)
865 if value
!= old_value
:
866 saved_options
[option
] = old_value
867 manager
.inject_args(type_
, id_
, option
, value
)
869 for option
, value
in saved_options
.items():
870 manager
.inject_args(type_
, id_
, option
, value
)
872 @contextlib.contextmanager
873 def restart(ctx
, config
):
879 - ceph.restart: [all]
883 - ceph.restart: [osd.0, mon.1, mds.*]
889 daemons: [osd.0, mon.1]
890 wait-for-healthy: false
891 wait-for-osds-up: true
894 :param config: Configuration
898 elif isinstance(config
, list):
899 config
= {'daemons': config
}
901 daemons
= ctx
.daemons
.resolve_role_list(
902 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
905 log
.info('daemons %s' % daemons
)
906 with
tweaked_option(ctx
, config
):
908 cluster
, type_
, id_
= teuthology
.split_role(role
)
909 d
= ctx
.daemons
.get_daemon(type_
, id_
, cluster
)
910 assert d
, 'daemon %s does not exist' % role
913 ctx
.managers
[cluster
].mark_down_osd(id_
)
915 clusters
.add(cluster
)
917 if config
.get('wait-for-healthy', True):
918 for cluster
in clusters
:
919 healthy(ctx
=ctx
, config
=dict(cluster
=cluster
))
920 if config
.get('wait-for-osds-up', False):
921 for cluster
in clusters
:
922 ctx
.managers
[cluster
].wait_for_all_osds_up()
925 @contextlib.contextmanager
926 def distribute_config_and_admin_keyring(ctx
, config
):
928 Distribute a sufficient config and keyring for clients
930 cluster_name
= config
['cluster']
931 log
.info('Distributing (final) config and client.admin keyring...')
932 for remote
, roles
in ctx
.cluster
.remotes
.items():
933 teuthology
.sudo_write_file(
935 path
='/etc/ceph/{}.conf'.format(cluster_name
),
936 data
=ctx
.ceph
[cluster_name
].config_file
)
937 teuthology
.sudo_write_file(
939 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
940 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
944 ctx
.cluster
.run(args
=[
946 '/etc/ceph/{}.conf'.format(cluster_name
),
947 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
950 @contextlib.contextmanager
951 def crush_setup(ctx
, config
):
952 cluster_name
= config
['cluster']
954 profile
= config
.get('crush_tunables', 'default')
955 log
.info('Setting crush tunables to %s', profile
)
956 _shell(ctx
, cluster_name
, ctx
.ceph
[cluster_name
].bootstrap_remote
,
957 args
=['ceph', 'osd', 'crush', 'tunables', profile
])
960 @contextlib.contextmanager
961 def task(ctx
, config
):
965 assert isinstance(config
, dict), \
966 "task only supports a dictionary for configuration"
968 overrides
= ctx
.config
.get('overrides', {})
969 teuthology
.deep_merge(config
, overrides
.get('ceph', {}))
970 log
.info('Config: ' + str(config
))
972 testdir
= teuthology
.get_testdir(ctx
)
974 # set up cluster context
975 first_ceph_cluster
= False
976 if not hasattr(ctx
, 'daemons'):
977 first_ceph_cluster
= True
978 if not hasattr(ctx
, 'ceph'):
981 if 'cluster' not in config
:
982 config
['cluster'] = 'ceph'
983 cluster_name
= config
['cluster']
984 ctx
.ceph
[cluster_name
] = argparse
.Namespace()
986 ctx
.ceph
[cluster_name
].thrashers
= []
987 # fixme: setup watchdog, ala ceph.py
989 ctx
.ceph
[cluster_name
].roleless
= False # see below
992 if 'cephadm_mode' not in config
:
993 config
['cephadm_mode'] = 'root'
994 assert config
['cephadm_mode'] in ['root', 'cephadm-package']
995 if config
['cephadm_mode'] == 'root':
996 ctx
.cephadm
= testdir
+ '/cephadm'
998 ctx
.cephadm
= 'cephadm' # in the path
1000 if first_ceph_cluster
:
1001 # FIXME: this is global for all clusters
1002 ctx
.daemons
= DaemonGroup(
1003 use_cephadm
=ctx
.cephadm
)
1006 ctx
.ceph
[cluster_name
].image
= config
.get('image')
1008 if not ctx
.ceph
[cluster_name
].image
:
1009 sha1
= config
.get('sha1')
1011 ctx
.ceph
[cluster_name
].image
= 'quay.io/ceph-ci/ceph:%s' % sha1
1014 # hmm, fall back to branch?
1015 branch
= config
.get('branch', 'master')
1017 ctx
.ceph
[cluster_name
].image
= 'quay.io/ceph-ci/ceph:%s' % branch
1018 log
.info('Cluster image is %s' % ctx
.ceph
[cluster_name
].image
)
1021 fsid
= str(uuid
.uuid1())
1022 log
.info('Cluster fsid is %s' % fsid
)
1023 ctx
.ceph
[cluster_name
].fsid
= fsid
1026 log
.info('Choosing monitor IPs and ports...')
1027 remotes_and_roles
= ctx
.cluster
.remotes
.items()
1028 roles
= [role_list
for (remote
, role_list
) in remotes_and_roles
]
1029 ips
= [host
for (host
, port
) in
1030 (remote
.ssh
.get_transport().getpeername() for (remote
, role_list
) in remotes_and_roles
)]
1032 if config
.get('roleless', False):
1033 # mons will be named after hosts
1037 for remote
, _
in remotes_and_roles
:
1038 roles
.append(['mon.' + remote
.shortname
])
1040 first_mon
= remote
.shortname
1041 bootstrap_remote
= remote
1042 log
.info('No roles; fabricating mons %s' % roles
)
1044 ctx
.ceph
[cluster_name
].mons
= get_mons(
1045 roles
, ips
, cluster_name
,
1046 mon_bind_msgr2
=config
.get('mon_bind_msgr2', True),
1047 mon_bind_addrvec
=config
.get('mon_bind_addrvec', True),
1049 log
.info('Monitor IPs: %s' % ctx
.ceph
[cluster_name
].mons
)
1051 if config
.get('roleless', False):
1052 ctx
.ceph
[cluster_name
].roleless
= True
1053 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1054 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1055 ctx
.ceph
[cluster_name
].first_mon_role
= 'mon.' + first_mon
1057 first_mon_role
= sorted(ctx
.ceph
[cluster_name
].mons
.keys())[0]
1058 _
, _
, first_mon
= teuthology
.split_role(first_mon_role
)
1059 (bootstrap_remote
,) = ctx
.cluster
.only(first_mon_role
).remotes
.keys()
1060 log
.info('First mon is mon.%s on %s' % (first_mon
,
1061 bootstrap_remote
.shortname
))
1062 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1063 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1064 ctx
.ceph
[cluster_name
].first_mon_role
= first_mon_role
1066 others
= ctx
.cluster
.remotes
[bootstrap_remote
]
1067 mgrs
= sorted([r
for r
in others
1068 if teuthology
.is_type('mgr', cluster_name
)(r
)])
1070 raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon
)
1071 _
, _
, first_mgr
= teuthology
.split_role(mgrs
[0])
1072 log
.info('First mgr is %s' % (first_mgr
))
1073 ctx
.ceph
[cluster_name
].first_mgr
= first_mgr
1076 with contextutil
.nested(
1077 lambda: ceph_initial(),
1078 lambda: normalize_hostnames(ctx
=ctx
),
1079 lambda: download_cephadm(ctx
=ctx
, config
=config
, ref
=ref
),
1080 lambda: ceph_log(ctx
=ctx
, config
=config
),
1081 lambda: ceph_crash(ctx
=ctx
, config
=config
),
1082 lambda: ceph_bootstrap(ctx
=ctx
, config
=config
),
1083 lambda: crush_setup(ctx
=ctx
, config
=config
),
1084 lambda: ceph_mons(ctx
=ctx
, config
=config
),
1085 lambda: distribute_config_and_admin_keyring(ctx
=ctx
, config
=config
),
1086 lambda: ceph_mgrs(ctx
=ctx
, config
=config
),
1087 lambda: ceph_osds(ctx
=ctx
, config
=config
),
1088 lambda: ceph_mdss(ctx
=ctx
, config
=config
),
1089 lambda: ceph_rgw(ctx
=ctx
, config
=config
),
1090 lambda: ceph_monitoring('prometheus', ctx
=ctx
, config
=config
),
1091 lambda: ceph_monitoring('node-exporter', ctx
=ctx
, config
=config
),
1092 lambda: ceph_monitoring('alertmanager', ctx
=ctx
, config
=config
),
1093 lambda: ceph_monitoring('grafana', ctx
=ctx
, config
=config
),
1094 lambda: ceph_clients(ctx
=ctx
, config
=config
),
1096 ctx
.managers
[cluster_name
] = CephManager(
1097 ctx
.ceph
[cluster_name
].bootstrap_remote
,
1099 logger
=log
.getChild('ceph_manager.' + cluster_name
),
1100 cluster
=cluster_name
,
1105 if config
.get('wait-for-healthy', True):
1106 healthy(ctx
=ctx
, config
=config
)
1108 log
.info('Setup complete, yielding')
1112 log
.info('Teardown begin')