]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephadm.py
2b076053a784cb1745a5db278b27b61e40efb4c0
2 Ceph cluster task, deployed via cephadm orchestrator
15 from ceph_manager
import CephManager
16 from tarfile
import ReadError
17 from teuthology
import misc
as teuthology
18 from teuthology
import contextutil
19 from teuthology
.orchestra
import run
20 from teuthology
.orchestra
.daemon
import DaemonGroup
21 from teuthology
.config
import config
as teuth_config
23 # these items we use from ceph.py should probably eventually move elsewhere
24 from tasks
.ceph
import get_mons
, healthy
26 CEPH_ROLE_TYPES
= ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
28 log
= logging
.getLogger(__name__
)
31 def _shell(ctx
, cluster_name
, remote
, args
, extra_cephadm_args
=[], **kwargs
):
32 testdir
= teuthology
.get_testdir(ctx
)
37 '--image', ctx
.ceph
[cluster_name
].image
,
39 '-c', '/etc/ceph/{}.conf'.format(cluster_name
),
40 '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
41 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
42 ] + extra_cephadm_args
+ [
48 def build_initial_config(ctx
, config
):
49 cluster_name
= config
['cluster']
51 path
= os
.path
.join(os
.path
.dirname(__file__
), 'cephadm.conf')
52 conf
= configobj
.ConfigObj(path
, file_error
=True)
54 conf
.setdefault('global', {})
55 conf
['global']['fsid'] = ctx
.ceph
[cluster_name
].fsid
58 for section
, keys
in config
.get('conf',{}).items():
59 for key
, value
in keys
.items():
60 log
.info(" override: [%s] %s = %s" % (section
, key
, value
))
61 if section
not in conf
:
63 conf
[section
][key
] = value
67 @contextlib.contextmanager
68 def normalize_hostnames(ctx
):
70 Ensure we have short hostnames throughout, for consistency between
71 remote.shortname and socket.gethostname() in cephadm.
73 log
.info('Normalizing hostnames...')
74 ctx
.cluster
.run(args
=[
77 run
.Raw('$(hostname -s)'),
85 @contextlib.contextmanager
86 def download_cephadm(ctx
, config
, ref
):
87 cluster_name
= config
['cluster']
89 if config
.get('cephadm_mode') != 'cephadm-package':
90 ref
= config
.get('cephadm_branch', ref
)
91 git_url
= teuth_config
.get_ceph_git_url()
92 log
.info('Downloading cephadm (repo %s ref %s)...' % (git_url
, ref
))
93 if git_url
.startswith('https://github.com/'):
94 # git archive doesn't like https:// URLs, which we use with github.
95 rest
= git_url
.split('https://github.com/', 1)[1]
96 rest
= re
.sub(r
'\.git/?$', '', rest
).strip() # no .git suffix
100 'https://raw.githubusercontent.com/' + rest
+ '/' + ref
+ '/src/cephadm/cephadm',
112 '--remote=' + git_url
,
114 'src/cephadm/cephadm',
116 'tar', '-xO', 'src/cephadm/cephadm',
121 # sanity-check the resulting file and set executable bit
122 cephadm_file_size
= '$(stat -c%s {})'.format(ctx
.cephadm
)
125 'test', '-s', ctx
.cephadm
,
127 'test', run
.Raw(cephadm_file_size
), "-gt", run
.Raw('1000'),
129 'chmod', '+x', ctx
.cephadm
,
136 log
.info('Removing cluster...')
137 ctx
.cluster
.run(args
=[
141 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
145 if config
.get('cephadm_mode') == 'root':
146 log
.info('Removing cephadm ...')
155 @contextlib.contextmanager
156 def ceph_log(ctx
, config
):
157 cluster_name
= config
['cluster']
158 fsid
= ctx
.ceph
[cluster_name
].fsid
164 # we need to know this below
165 ctx
.summary
['success'] = False
169 log
.info('Checking cluster log for badness...')
170 def first_in_ceph_log(pattern
, excludes
):
172 Find the first occurrence of the pattern specified in the Ceph log,
173 Returns None if none found.
175 :param pattern: Pattern scanned for.
176 :param excludes: Patterns to ignore.
177 :return: First line of text (or None if not found)
182 '/var/log/ceph/{fsid}/ceph.log'.format(
186 for exclude
in excludes
:
187 args
.extend([run
.Raw('|'), 'egrep', '-v', exclude
])
189 run
.Raw('|'), 'head', '-n', '1',
191 r
= ctx
.ceph
[cluster_name
].bootstrap_remote
.run(
195 stdout
= r
.stdout
.getvalue()
200 if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
201 config
.get('log-whitelist')) is not None:
202 log
.warning('Found errors (ERR|WRN|SEC) in cluster log')
203 ctx
.summary
['success'] = False
204 # use the most severe problem as the failure reason
205 if 'failure_reason' not in ctx
.summary
:
206 for pattern
in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
207 match
= first_in_ceph_log(pattern
, config
['log-whitelist'])
208 if match
is not None:
209 ctx
.summary
['failure_reason'] = \
210 '"{match}" in cluster log'.format(
211 match
=match
.rstrip('\n'),
215 if ctx
.archive
is not None and \
216 not (ctx
.config
.get('archive-on-error') and ctx
.summary
['success']):
218 log
.info('Compressing logs...')
224 '/var/log/ceph', # all logs, not just for the cluster
241 log
.info('Archiving logs...')
242 path
= os
.path
.join(ctx
.archive
, 'remote')
247 for remote
in ctx
.cluster
.remotes
.keys():
248 sub
= os
.path
.join(path
, remote
.name
)
253 teuthology
.pull_directory(remote
, '/var/log/ceph', # everything
254 os
.path
.join(sub
, 'log'))
256 @contextlib.contextmanager
257 def ceph_crash(ctx
, config
):
259 Gather crash dumps from /var/lib/ceph/$fsid/crash
261 cluster_name
= config
['cluster']
262 fsid
= ctx
.ceph
[cluster_name
].fsid
268 if ctx
.archive
is not None:
269 log
.info('Archiving crash dumps...')
270 path
= os
.path
.join(ctx
.archive
, 'remote')
275 for remote
in ctx
.cluster
.remotes
.keys():
276 sub
= os
.path
.join(path
, remote
.name
)
282 teuthology
.pull_directory(remote
,
283 '/var/lib/ceph/%s/crash' % fsid
,
284 os
.path
.join(sub
, 'crash'))
288 @contextlib.contextmanager
289 def ceph_bootstrap(ctx
, config
):
290 cluster_name
= config
['cluster']
291 testdir
= teuthology
.get_testdir(ctx
)
292 fsid
= ctx
.ceph
[cluster_name
].fsid
294 mons
= ctx
.ceph
[cluster_name
].mons
295 first_mon_role
= sorted(mons
.keys())[0]
296 _
, _
, first_mon
= teuthology
.split_role(first_mon_role
)
297 (bootstrap_remote
,) = ctx
.cluster
.only(first_mon_role
).remotes
.keys()
298 log
.info('First mon is mon.%s on %s' % (first_mon
,
299 bootstrap_remote
.shortname
))
300 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
301 ctx
.ceph
[cluster_name
].first_mon
= first_mon
303 others
= ctx
.cluster
.remotes
[bootstrap_remote
]
304 log
.info('others %s' % others
)
305 mgrs
= sorted([r
for r
in others
306 if teuthology
.is_type('mgr', cluster_name
)(r
)])
308 raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon
)
309 _
, _
, first_mgr
= teuthology
.split_role(mgrs
[0])
310 log
.info('First mgr is %s' % (first_mgr
))
311 ctx
.ceph
[cluster_name
].first_mgr
= first_mgr
313 ctx
.cluster
.run(args
=[
314 'sudo', 'mkdir', '-p', '/etc/ceph',
316 ctx
.cluster
.run(args
=[
317 'sudo', 'chmod', '777', '/etc/ceph',
321 log
.info('Writing seed config...')
323 seed_config
= build_initial_config(ctx
, config
)
324 seed_config
.write(conf_fp
)
325 teuthology
.write_file(
326 remote
=bootstrap_remote
,
327 path
='{}/seed.{}.conf'.format(testdir
, cluster_name
),
328 data
=conf_fp
.getvalue())
329 log
.debug('Final config:\n' + conf_fp
.getvalue())
330 ctx
.ceph
[cluster_name
].conf
= seed_config
332 # register initial daemons
333 ctx
.daemons
.register_daemon(
334 bootstrap_remote
, 'mon', first_mon
,
335 cluster
=cluster_name
,
337 logger
=log
.getChild('mon.' + first_mon
),
341 ctx
.daemons
.register_daemon(
342 bootstrap_remote
, 'mgr', first_mgr
,
343 cluster
=cluster_name
,
345 logger
=log
.getChild('mgr.' + first_mgr
),
351 log
.info('Bootstrapping...')
355 '--image', ctx
.ceph
[cluster_name
].image
,
359 '--mon-id', first_mon
,
360 '--mgr-id', first_mgr
,
361 '--orphan-initial-daemons', # we will do it explicitly!
362 '--skip-monitoring-stack', # we'll provision these explicitly
363 '--config', '{}/seed.{}.conf'.format(testdir
, cluster_name
),
364 '--output-config', '/etc/ceph/{}.conf'.format(cluster_name
),
366 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
367 '--output-pub-ssh-key', '{}/{}.pub'.format(testdir
, cluster_name
),
369 if mons
[first_mon_role
].startswith('['):
370 cmd
+= ['--mon-addrv', mons
[first_mon_role
]]
372 cmd
+= ['--mon-ip', mons
[first_mon_role
]]
373 if config
.get('skip_dashboard'):
374 cmd
+= ['--skip-dashboard']
375 # bootstrap makes the keyring root 0600, so +r it for our purposes
378 'sudo', 'chmod', '+r',
379 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
381 bootstrap_remote
.run(args
=cmd
)
383 # fetch keys and configs
384 log
.info('Fetching config...')
385 ctx
.ceph
[cluster_name
].config_file
= teuthology
.get_file(
386 remote
=bootstrap_remote
,
387 path
='/etc/ceph/{}.conf'.format(cluster_name
))
388 log
.info('Fetching client.admin keyring...')
389 ctx
.ceph
[cluster_name
].admin_keyring
= teuthology
.get_file(
390 remote
=bootstrap_remote
,
391 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
))
392 log
.info('Fetching mon keyring...')
393 ctx
.ceph
[cluster_name
].mon_keyring
= teuthology
.get_file(
394 remote
=bootstrap_remote
,
395 path
='/var/lib/ceph/%s/mon.%s/keyring' % (fsid
, first_mon
),
398 # fetch ssh key, distribute to additional nodes
399 log
.info('Fetching pub ssh key...')
400 ssh_pub_key
= teuthology
.get_file(
401 remote
=bootstrap_remote
,
402 path
='{}/{}.pub'.format(testdir
, cluster_name
)
405 log
.info('Installing pub ssh key for root users...')
406 ctx
.cluster
.run(args
=[
407 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
411 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
413 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
417 _shell(ctx
, cluster_name
, bootstrap_remote
,
418 ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
421 for remote
in ctx
.cluster
.remotes
.keys():
422 if remote
== bootstrap_remote
:
424 log
.info('Writing conf and keyring to %s' % remote
.shortname
)
425 teuthology
.write_file(
427 path
='/etc/ceph/{}.conf'.format(cluster_name
),
428 data
=ctx
.ceph
[cluster_name
].config_file
)
429 teuthology
.write_file(
431 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
432 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
434 log
.info('Adding host %s to orchestrator...' % remote
.shortname
)
435 _shell(ctx
, cluster_name
, remote
, [
436 'ceph', 'orch', 'host', 'add',
439 r
= _shell(ctx
, cluster_name
, remote
,
440 ['ceph', 'orch', 'host', 'ls', '--format=json'],
442 hosts
= [node
['hostname'] for node
in json
.loads(r
.stdout
.getvalue())]
443 assert remote
.shortname
in hosts
448 log
.info('Cleaning up testdir ceph.* files...')
449 ctx
.cluster
.run(args
=[
451 '{}/seed.{}.conf'.format(testdir
, cluster_name
),
452 '{}/{}.pub'.format(testdir
, cluster_name
),
455 log
.info('Stopping all daemons...')
457 # this doesn't block until they are all stopped...
458 #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
460 # so, stop them individually
461 for role
in ctx
.daemons
.resolve_role_list(None, CEPH_ROLE_TYPES
):
462 cluster
, type_
, id_
= teuthology
.split_role(role
)
463 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
466 ctx
.cluster
.run(args
=[
468 '/etc/ceph/{}.conf'.format(cluster_name
),
469 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
472 @contextlib.contextmanager
473 def ceph_mons(ctx
, config
):
475 Deploy any additional mons
477 cluster_name
= config
['cluster']
478 fsid
= ctx
.ceph
[cluster_name
].fsid
482 for remote
, roles
in ctx
.cluster
.remotes
.items():
483 for mon
in [r
for r
in roles
484 if teuthology
.is_type('mon', cluster_name
)(r
)]:
485 c_
, _
, id_
= teuthology
.split_role(mon
)
486 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mon
:
488 log
.info('Adding %s on %s' % (mon
, remote
.shortname
))
490 _shell(ctx
, cluster_name
, remote
, [
491 'ceph', 'orch', 'daemon', 'add', 'mon',
492 remote
.shortname
+ ':' + ctx
.ceph
[cluster_name
].mons
[mon
] + '=' + id_
,
494 ctx
.daemons
.register_daemon(
496 cluster
=cluster_name
,
498 logger
=log
.getChild(mon
),
503 with contextutil
.safe_while(sleep
=1, tries
=180) as proceed
:
505 log
.info('Waiting for %d mons in monmap...' % (num_mons
))
508 cluster_name
=cluster_name
,
511 'ceph', 'mon', 'dump', '-f', 'json',
515 j
= json
.loads(r
.stdout
.getvalue())
516 if len(j
['mons']) == num_mons
:
519 # refresh ceph.conf files for all mons + first mgr
520 for remote
, roles
in ctx
.cluster
.remotes
.items():
521 for mon
in [r
for r
in roles
522 if teuthology
.is_type('mon', cluster_name
)(r
)]:
523 c_
, _
, id_
= teuthology
.split_role(mon
)
524 _shell(ctx
, cluster_name
, remote
, [
525 'ceph', 'orch', 'daemon', 'reconfig',
528 _shell(ctx
, cluster_name
, ctx
.ceph
[cluster_name
].bootstrap_remote
, [
529 'ceph', 'orch', 'daemon', 'reconfig',
530 'mgr.' + ctx
.ceph
[cluster_name
].first_mgr
,
538 @contextlib.contextmanager
539 def ceph_mgrs(ctx
, config
):
541 Deploy any additional mgrs
543 cluster_name
= config
['cluster']
544 fsid
= ctx
.ceph
[cluster_name
].fsid
549 for remote
, roles
in ctx
.cluster
.remotes
.items():
550 for mgr
in [r
for r
in roles
551 if teuthology
.is_type('mgr', cluster_name
)(r
)]:
552 c_
, _
, id_
= teuthology
.split_role(mgr
)
553 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mgr
:
555 log
.info('Adding %s on %s' % (mgr
, remote
.shortname
))
556 nodes
.append(remote
.shortname
+ '=' + id_
)
557 daemons
[mgr
] = (remote
, id_
)
559 _shell(ctx
, cluster_name
, remote
, [
560 'ceph', 'orch', 'apply', 'mgr',
561 str(len(nodes
) + 1) + ';' + ';'.join(nodes
)]
563 for mgr
, i
in daemons
.items():
565 ctx
.daemons
.register_daemon(
567 cluster
=cluster_name
,
569 logger
=log
.getChild(mgr
),
579 @contextlib.contextmanager
580 def ceph_osds(ctx
, config
):
584 cluster_name
= config
['cluster']
585 fsid
= ctx
.ceph
[cluster_name
].fsid
587 log
.info('Deploying OSDs...')
589 # provision OSDs in numeric order
592 for remote
, roles
in ctx
.cluster
.remotes
.items():
593 devs_by_remote
[remote
] = teuthology
.get_scratch_devices(remote
)
594 for osd
in [r
for r
in roles
595 if teuthology
.is_type('osd', cluster_name
)(r
)]:
596 _
, _
, id_
= teuthology
.split_role(osd
)
597 id_to_remote
[int(id_
)] = (osd
, remote
)
600 for osd_id
in sorted(id_to_remote
.keys()):
601 osd
, remote
= id_to_remote
[osd_id
]
602 _
, _
, id_
= teuthology
.split_role(osd
)
603 assert int(id_
) == cur
604 devs
= devs_by_remote
[remote
]
605 assert devs
## FIXME ##
607 short_dev
= dev
.replace('/dev/', '')
608 log
.info('Deploying %s on %s with %s...' % (
609 osd
, remote
.shortname
, dev
))
610 _shell(ctx
, cluster_name
, remote
, [
611 'ceph-volume', 'lvm', 'zap', dev
])
612 _shell(ctx
, cluster_name
, remote
, [
613 'ceph', 'orch', 'daemon', 'add', 'osd',
614 remote
.shortname
+ ':' + short_dev
616 ctx
.daemons
.register_daemon(
618 cluster
=cluster_name
,
620 logger
=log
.getChild(osd
),
630 @contextlib.contextmanager
631 def ceph_mdss(ctx
, config
):
635 cluster_name
= config
['cluster']
636 fsid
= ctx
.ceph
[cluster_name
].fsid
640 for remote
, roles
in ctx
.cluster
.remotes
.items():
641 for role
in [r
for r
in roles
642 if teuthology
.is_type('mds', cluster_name
)(r
)]:
643 c_
, _
, id_
= teuthology
.split_role(role
)
644 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
645 nodes
.append(remote
.shortname
+ '=' + id_
)
646 daemons
[role
] = (remote
, id_
)
648 _shell(ctx
, cluster_name
, remote
, [
649 'ceph', 'orch', 'apply', 'mds',
651 str(len(nodes
)) + ';' + ';'.join(nodes
)]
653 for role
, i
in daemons
.items():
655 ctx
.daemons
.register_daemon(
657 cluster
=cluster_name
,
659 logger
=log
.getChild(role
),
666 @contextlib.contextmanager
667 def ceph_monitoring(daemon_type
, ctx
, config
):
669 Deploy prometheus, node-exporter, etc.
671 cluster_name
= config
['cluster']
672 fsid
= ctx
.ceph
[cluster_name
].fsid
676 for remote
, roles
in ctx
.cluster
.remotes
.items():
677 for role
in [r
for r
in roles
678 if teuthology
.is_type(daemon_type
, cluster_name
)(r
)]:
679 c_
, _
, id_
= teuthology
.split_role(role
)
680 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
681 nodes
.append(remote
.shortname
+ '=' + id_
)
682 daemons
[role
] = (remote
, id_
)
684 _shell(ctx
, cluster_name
, remote
, [
685 'ceph', 'orch', 'apply', daemon_type
,
686 str(len(nodes
)) + ';' + ';'.join(nodes
)]
688 for role
, i
in daemons
.items():
690 ctx
.daemons
.register_daemon(
691 remote
, daemon_type
, id_
,
692 cluster
=cluster_name
,
694 logger
=log
.getChild(role
),
701 @contextlib.contextmanager
702 def ceph_rgw(ctx
, config
):
706 cluster_name
= config
['cluster']
707 fsid
= ctx
.ceph
[cluster_name
].fsid
711 for remote
, roles
in ctx
.cluster
.remotes
.items():
712 for role
in [r
for r
in roles
713 if teuthology
.is_type('rgw', cluster_name
)(r
)]:
714 c_
, _
, id_
= teuthology
.split_role(role
)
715 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
716 realmzone
= '.'.join(id_
.split('.')[0:2])
717 if realmzone
not in nodes
:
718 nodes
[realmzone
] = []
719 nodes
[realmzone
].append(remote
.shortname
+ '=' + id_
)
720 daemons
[role
] = (remote
, id_
)
721 for realmzone
, nodes
in nodes
.items():
722 (realm
, zone
) = realmzone
.split('.', 1)
723 _shell(ctx
, cluster_name
, remote
, [
724 'ceph', 'orch', 'apply', 'rgw',
726 str(len(nodes
)) + ';' + ';'.join(nodes
)]
728 for role
, i
in daemons
.items():
730 ctx
.daemons
.register_daemon(
732 cluster
=cluster_name
,
734 logger
=log
.getChild(role
),
741 @contextlib.contextmanager
742 def ceph_clients(ctx
, config
):
743 cluster_name
= config
['cluster']
744 testdir
= teuthology
.get_testdir(ctx
)
746 log
.info('Setting up client nodes...')
747 clients
= ctx
.cluster
.only(teuthology
.is_type('client', cluster_name
))
748 testdir
= teuthology
.get_testdir(ctx
)
749 coverage_dir
= '{tdir}/archive/coverage'.format(tdir
=testdir
)
750 for remote
, roles_for_host
in clients
.remotes
.items():
751 for role
in teuthology
.cluster_roles_of_type(roles_for_host
, 'client',
753 name
= teuthology
.ceph_role(role
)
754 client_keyring
= '/etc/ceph/{0}.{1}.keyring'.format(cluster_name
,
758 cluster_name
=cluster_name
,
762 'get-or-create', name
,
770 keyring
= r
.stdout
.getvalue()
771 teuthology
.sudo_write_file(
779 @contextlib.contextmanager
784 log
.info('Teardown complete')
787 @contextlib.contextmanager
788 def stop(ctx
, config
):
797 - ceph.stop: [osd.0, osd.2]
801 daemons: [osd.0, osd.2]
806 elif isinstance(config
, list):
807 config
= {'daemons': config
}
809 daemons
= ctx
.daemons
.resolve_role_list(
810 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
814 cluster
, type_
, id_
= teuthology
.split_role(role
)
815 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
816 clusters
.add(cluster
)
818 # for cluster in clusters:
819 # ctx.ceph[cluster].watchdog.stop()
820 # ctx.ceph[cluster].watchdog.join()
824 def shell(ctx
, config
):
826 Execute (shell) commands
828 cluster_name
= config
.get('cluster', 'ceph')
832 for k
in config
['env']:
833 env
.extend(['-e', k
+ '=' + ctx
.config
.get(k
, '')])
836 if 'all' in config
and len(config
) == 1:
838 roles
= teuthology
.all_roles(ctx
.cluster
)
839 config
= dict((id_
, a
) for id_
in roles
)
841 for role
, ls
in config
.items():
842 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
843 log
.info('Running commands on role %s host %s', role
, remote
.name
)
845 _shell(ctx
, cluster_name
, remote
,
847 extra_cephadm_args
=env
)
849 @contextlib.contextmanager
850 def tweaked_option(ctx
, config
):
852 set an option, and then restore it with its original value
854 Note, due to the way how tasks are executed/nested, it's not suggested to
855 use this method as a standalone task. otherwise, it's likely that it will
856 restore the tweaked option at the /end/ of 'tasks' block.
859 # we can complicate this when necessary
860 options
= ['mon-health-to-clog']
861 type_
, id_
= 'mon', '*'
862 cluster
= config
.get('cluster', 'ceph')
863 manager
= ctx
.managers
[cluster
]
865 get_from
= next(teuthology
.all_roles_of_type(ctx
.cluster
, type_
))
868 for option
in options
:
869 if option
not in config
:
871 value
= 'true' if config
[option
] else 'false'
872 option
= option
.replace('-', '_')
873 old_value
= manager
.get_config(type_
, get_from
, option
)
874 if value
!= old_value
:
875 saved_options
[option
] = old_value
876 manager
.inject_args(type_
, id_
, option
, value
)
878 for option
, value
in saved_options
.items():
879 manager
.inject_args(type_
, id_
, option
, value
)
881 @contextlib.contextmanager
882 def restart(ctx
, config
):
888 - ceph.restart: [all]
892 - ceph.restart: [osd.0, mon.1, mds.*]
898 daemons: [osd.0, mon.1]
899 wait-for-healthy: false
900 wait-for-osds-up: true
903 :param config: Configuration
907 elif isinstance(config
, list):
908 config
= {'daemons': config
}
910 daemons
= ctx
.daemons
.resolve_role_list(
911 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
914 log
.info('daemons %s' % daemons
)
915 with
tweaked_option(ctx
, config
):
917 cluster
, type_
, id_
= teuthology
.split_role(role
)
918 d
= ctx
.daemons
.get_daemon(type_
, id_
, cluster
)
919 assert d
, 'daemon %s does not exist' % role
922 ctx
.managers
[cluster
].mark_down_osd(id_
)
924 clusters
.add(cluster
)
926 if config
.get('wait-for-healthy', True):
927 for cluster
in clusters
:
928 healthy(ctx
=ctx
, config
=dict(cluster
=cluster
))
929 if config
.get('wait-for-osds-up', False):
930 for cluster
in clusters
:
931 ctx
.managers
[cluster
].wait_for_all_osds_up()
934 @contextlib.contextmanager
935 def crush_setup(ctx
, config
):
936 cluster_name
= config
['cluster']
937 first_mon
= teuthology
.get_first_mon(ctx
, config
, cluster_name
)
938 (mon_remote
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
940 profile
= config
.get('crush_tunables', 'default')
941 log
.info('Setting crush tunables to %s', profile
)
942 _shell(ctx
, cluster_name
, ctx
.ceph
[cluster_name
].bootstrap_remote
,
943 args
=['ceph', 'osd', 'crush', 'tunables', profile
])
946 @contextlib.contextmanager
947 def task(ctx
, config
):
951 assert isinstance(config
, dict), \
952 "task only supports a dictionary for configuration"
954 overrides
= ctx
.config
.get('overrides', {})
955 teuthology
.deep_merge(config
, overrides
.get('ceph', {}))
956 log
.info('Config: ' + str(config
))
958 testdir
= teuthology
.get_testdir(ctx
)
960 # set up cluster context
961 first_ceph_cluster
= False
962 if not hasattr(ctx
, 'daemons'):
963 first_ceph_cluster
= True
964 if not hasattr(ctx
, 'ceph'):
967 if 'cluster' not in config
:
968 config
['cluster'] = 'ceph'
969 cluster_name
= config
['cluster']
970 ctx
.ceph
[cluster_name
] = argparse
.Namespace()
972 ctx
.ceph
[cluster_name
].thrashers
= []
973 # fixme: setup watchdog, ala ceph.py
976 if 'cephadm_mode' not in config
:
977 config
['cephadm_mode'] = 'root'
978 assert config
['cephadm_mode'] in ['root', 'cephadm-package']
979 if config
['cephadm_mode'] == 'root':
980 ctx
.cephadm
= testdir
+ '/cephadm'
982 ctx
.cephadm
= 'cephadm' # in the path
984 if first_ceph_cluster
:
985 # FIXME: this is global for all clusters
986 ctx
.daemons
= DaemonGroup(
987 use_cephadm
=ctx
.cephadm
)
990 ctx
.ceph
[cluster_name
].image
= config
.get('image')
992 if not ctx
.ceph
[cluster_name
].image
:
993 sha1
= config
.get('sha1')
995 ctx
.ceph
[cluster_name
].image
= 'quay.io/ceph-ci/ceph:%s' % sha1
998 # hmm, fall back to branch?
999 branch
= config
.get('branch', 'master')
1001 ctx
.ceph
[cluster_name
].image
= 'quay.io/ceph-ci/ceph:%s' % branch
1002 log
.info('Cluster image is %s' % ctx
.ceph
[cluster_name
].image
)
1005 fsid
= str(uuid
.uuid1())
1006 log
.info('Cluster fsid is %s' % fsid
)
1007 ctx
.ceph
[cluster_name
].fsid
= fsid
1010 log
.info('Choosing monitor IPs and ports...')
1011 remotes_and_roles
= ctx
.cluster
.remotes
.items()
1012 roles
= [role_list
for (remote
, role_list
) in remotes_and_roles
]
1013 ips
= [host
for (host
, port
) in
1014 (remote
.ssh
.get_transport().getpeername() for (remote
, role_list
) in remotes_and_roles
)]
1015 ctx
.ceph
[cluster_name
].mons
= get_mons(
1016 roles
, ips
, cluster_name
,
1017 mon_bind_msgr2
=config
.get('mon_bind_msgr2', True),
1018 mon_bind_addrvec
=config
.get('mon_bind_addrvec', True),
1020 log
.info('Monitor IPs: %s' % ctx
.ceph
[cluster_name
].mons
)
1022 with contextutil
.nested(
1023 lambda: ceph_initial(),
1024 lambda: normalize_hostnames(ctx
=ctx
),
1025 lambda: download_cephadm(ctx
=ctx
, config
=config
, ref
=ref
),
1026 lambda: ceph_log(ctx
=ctx
, config
=config
),
1027 lambda: ceph_crash(ctx
=ctx
, config
=config
),
1028 lambda: ceph_bootstrap(ctx
=ctx
, config
=config
),
1029 lambda: crush_setup(ctx
=ctx
, config
=config
),
1030 lambda: ceph_mons(ctx
=ctx
, config
=config
),
1031 lambda: ceph_mgrs(ctx
=ctx
, config
=config
),
1032 lambda: ceph_osds(ctx
=ctx
, config
=config
),
1033 lambda: ceph_mdss(ctx
=ctx
, config
=config
),
1034 lambda: ceph_rgw(ctx
=ctx
, config
=config
),
1035 lambda: ceph_monitoring('prometheus', ctx
=ctx
, config
=config
),
1036 lambda: ceph_monitoring('node-exporter', ctx
=ctx
, config
=config
),
1037 lambda: ceph_monitoring('alertmanager', ctx
=ctx
, config
=config
),
1038 lambda: ceph_monitoring('grafana', ctx
=ctx
, config
=config
),
1039 lambda: ceph_clients(ctx
=ctx
, config
=config
),
1041 ctx
.managers
[cluster_name
] = CephManager(
1042 ctx
.ceph
[cluster_name
].bootstrap_remote
,
1044 logger
=log
.getChild('ceph_manager.' + cluster_name
),
1045 cluster
=cluster_name
,
1050 if config
.get('wait-for-healthy', True):
1051 healthy(ctx
=ctx
, config
=config
)
1053 log
.info('Setup complete, yielding')
1057 log
.info('Teardown begin')