2 Ceph cluster task, deployed via cephadm orchestrator
17 from io
import BytesIO
18 from six
import StringIO
19 from tarfile
import ReadError
20 from tasks
.ceph_manager
import CephManager
21 from teuthology
import misc
as teuthology
22 from teuthology
import contextutil
23 from teuthology
.orchestra
import run
24 from teuthology
.orchestra
.daemon
import DaemonGroup
25 from teuthology
.config
import config
as teuth_config
27 # these items we use from ceph.py should probably eventually move elsewhere
28 from tasks
.ceph
import get_mons
, healthy
30 CEPH_ROLE_TYPES
= ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
32 log
= logging
.getLogger(__name__
)
35 def _shell(ctx
, cluster_name
, remote
, args
, extra_cephadm_args
=[], **kwargs
):
36 testdir
= teuthology
.get_testdir(ctx
)
41 '--image', ctx
.ceph
[cluster_name
].image
,
43 '-c', '/etc/ceph/{}.conf'.format(cluster_name
),
44 '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
45 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
46 ] + extra_cephadm_args
+ [
52 def build_initial_config(ctx
, config
):
53 cluster_name
= config
['cluster']
55 path
= os
.path
.join(os
.path
.dirname(__file__
), 'cephadm.conf')
56 conf
= configobj
.ConfigObj(path
, file_error
=True)
58 conf
.setdefault('global', {})
59 conf
['global']['fsid'] = ctx
.ceph
[cluster_name
].fsid
62 for section
, keys
in config
.get('conf',{}).items():
63 for key
, value
in keys
.items():
64 log
.info(" override: [%s] %s = %s" % (section
, key
, value
))
65 if section
not in conf
:
67 conf
[section
][key
] = value
71 @contextlib.contextmanager
72 def normalize_hostnames(ctx
):
74 Ensure we have short hostnames throughout, for consistency between
75 remote.shortname and socket.gethostname() in cephadm.
77 log
.info('Normalizing hostnames...')
78 ctx
.cluster
.run(args
=[
81 run
.Raw('$(hostname -s)'),
89 @contextlib.contextmanager
90 def download_cephadm(ctx
, config
, ref
):
91 cluster_name
= config
['cluster']
93 if config
.get('cephadm_mode') != 'cephadm-package':
94 ref
= config
.get('cephadm_branch', ref
)
95 git_url
= teuth_config
.get_ceph_git_url()
96 log
.info('Downloading cephadm (repo %s ref %s)...' % (git_url
, ref
))
97 if git_url
.startswith('https://github.com/'):
98 # git archive doesn't like https:// URLs, which we use with github.
99 rest
= git_url
.split('https://github.com/', 1)[1]
100 rest
= re
.sub(r
'\.git/?$', '', rest
).strip() # no .git suffix
104 'https://raw.githubusercontent.com/' + rest
+ '/' + ref
+ '/src/cephadm/cephadm',
116 '--remote=' + git_url
,
118 'src/cephadm/cephadm',
120 'tar', '-xO', 'src/cephadm/cephadm',
125 # sanity-check the resulting file and set executable bit
126 cephadm_file_size
= '$(stat -c%s {})'.format(ctx
.cephadm
)
129 'test', '-s', ctx
.cephadm
,
131 'test', run
.Raw(cephadm_file_size
), "-gt", run
.Raw('1000'),
133 'chmod', '+x', ctx
.cephadm
,
140 log
.info('Removing cluster...')
141 ctx
.cluster
.run(args
=[
145 '--fsid', ctx
.ceph
[cluster_name
].fsid
,
149 if config
.get('cephadm_mode') == 'root':
150 log
.info('Removing cephadm ...')
159 @contextlib.contextmanager
160 def ceph_log(ctx
, config
):
161 cluster_name
= config
['cluster']
162 fsid
= ctx
.ceph
[cluster_name
].fsid
164 # Add logs directory to job's info log file
165 with
open(os
.path
.join(ctx
.archive
, 'info.yaml'), 'r+') as info_file
:
166 info_yaml
= yaml
.safe_load(info_file
)
168 if 'archive' not in info_yaml
:
169 info_yaml
['archive'] = {'log': '/var/log/ceph'}
171 info_yaml
['archive']['log'] = '/var/log/ceph'
172 yaml
.safe_dump(info_yaml
, info_file
, default_flow_style
=False)
178 # we need to know this below
179 ctx
.summary
['success'] = False
183 log
.info('Checking cluster log for badness...')
184 def first_in_ceph_log(pattern
, excludes
):
186 Find the first occurrence of the pattern specified in the Ceph log,
187 Returns None if none found.
189 :param pattern: Pattern scanned for.
190 :param excludes: Patterns to ignore.
191 :return: First line of text (or None if not found)
196 '/var/log/ceph/{fsid}/ceph.log'.format(
200 for exclude
in excludes
:
201 args
.extend([run
.Raw('|'), 'egrep', '-v', exclude
])
203 run
.Raw('|'), 'head', '-n', '1',
205 r
= ctx
.ceph
[cluster_name
].bootstrap_remote
.run(
209 stdout
= r
.stdout
.getvalue()
214 if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
215 config
.get('log-whitelist')) is not None:
216 log
.warning('Found errors (ERR|WRN|SEC) in cluster log')
217 ctx
.summary
['success'] = False
218 # use the most severe problem as the failure reason
219 if 'failure_reason' not in ctx
.summary
:
220 for pattern
in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
221 match
= first_in_ceph_log(pattern
, config
['log-whitelist'])
222 if match
is not None:
223 ctx
.summary
['failure_reason'] = \
224 '"{match}" in cluster log'.format(
225 match
=match
.rstrip('\n'),
229 if ctx
.archive
is not None and \
230 not (ctx
.config
.get('archive-on-error') and ctx
.summary
['success']):
232 log
.info('Compressing logs...')
238 '/var/log/ceph', # all logs, not just for the cluster
239 '/var/log/rbd-target-api', # ceph-iscsi
256 log
.info('Archiving logs...')
257 path
= os
.path
.join(ctx
.archive
, 'remote')
262 for remote
in ctx
.cluster
.remotes
.keys():
263 sub
= os
.path
.join(path
, remote
.name
)
269 teuthology
.pull_directory(remote
, '/var/log/ceph', # everything
270 os
.path
.join(sub
, 'log'))
274 @contextlib.contextmanager
275 def ceph_crash(ctx
, config
):
277 Gather crash dumps from /var/lib/ceph/$fsid/crash
279 cluster_name
= config
['cluster']
280 fsid
= ctx
.ceph
[cluster_name
].fsid
282 # Add logs directory to job's info log file
283 with
open(os
.path
.join(ctx
.archive
, 'info.yaml'), 'r+') as info_file
:
284 info_yaml
= yaml
.safe_load(info_file
)
286 if 'archive' not in info_yaml
:
287 info_yaml
['archive'] = {'crash': '/var/lib/ceph/%s/crash' % fsid
}
289 info_yaml
['archive']['crash'] = '/var/lib/ceph/%s/crash' % fsid
290 yaml
.safe_dump(info_yaml
, info_file
, default_flow_style
=False)
296 if ctx
.archive
is not None:
297 log
.info('Archiving crash dumps...')
298 path
= os
.path
.join(ctx
.archive
, 'remote')
303 for remote
in ctx
.cluster
.remotes
.keys():
304 sub
= os
.path
.join(path
, remote
.name
)
310 teuthology
.pull_directory(remote
,
311 '/var/lib/ceph/%s/crash' % fsid
,
312 os
.path
.join(sub
, 'crash'))
316 @contextlib.contextmanager
317 def ceph_bootstrap(ctx
, config
, registry
):
319 Bootstrap ceph cluster, setup containers' registry mirror before
320 the bootstrap if the registry is provided.
322 :param ctx: the argparse.Namespace object
323 :param config: the config dict
324 :param registry: url to containers' mirror registry
326 cluster_name
= config
['cluster']
327 testdir
= teuthology
.get_testdir(ctx
)
328 fsid
= ctx
.ceph
[cluster_name
].fsid
330 bootstrap_remote
= ctx
.ceph
[cluster_name
].bootstrap_remote
331 first_mon
= ctx
.ceph
[cluster_name
].first_mon
332 first_mon_role
= ctx
.ceph
[cluster_name
].first_mon_role
333 mons
= ctx
.ceph
[cluster_name
].mons
335 ctx
.cluster
.run(args
=[
336 'sudo', 'mkdir', '-p', '/etc/ceph',
338 ctx
.cluster
.run(args
=[
339 'sudo', 'chmod', '777', '/etc/ceph',
342 add_mirror_to_cluster(ctx
, registry
)
345 log
.info('Writing seed config...')
347 seed_config
= build_initial_config(ctx
, config
)
348 seed_config
.write(conf_fp
)
349 teuthology
.write_file(
350 remote
=bootstrap_remote
,
351 path
='{}/seed.{}.conf'.format(testdir
, cluster_name
),
352 data
=conf_fp
.getvalue())
353 log
.debug('Final config:\n' + conf_fp
.getvalue().decode())
354 ctx
.ceph
[cluster_name
].conf
= seed_config
356 # register initial daemons
357 ctx
.daemons
.register_daemon(
358 bootstrap_remote
, 'mon', first_mon
,
359 cluster
=cluster_name
,
361 logger
=log
.getChild('mon.' + first_mon
),
365 if not ctx
.ceph
[cluster_name
].roleless
:
366 first_mgr
= ctx
.ceph
[cluster_name
].first_mgr
367 ctx
.daemons
.register_daemon(
368 bootstrap_remote
, 'mgr', first_mgr
,
369 cluster
=cluster_name
,
371 logger
=log
.getChild('mgr.' + first_mgr
),
377 log
.info('Bootstrapping...')
381 '--image', ctx
.ceph
[cluster_name
].image
,
385 '--config', '{}/seed.{}.conf'.format(testdir
, cluster_name
),
386 '--output-config', '/etc/ceph/{}.conf'.format(cluster_name
),
388 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
389 '--output-pub-ssh-key', '{}/{}.pub'.format(testdir
, cluster_name
),
391 if not ctx
.ceph
[cluster_name
].roleless
:
393 '--mon-id', first_mon
,
394 '--mgr-id', first_mgr
,
395 '--orphan-initial-daemons', # we will do it explicitly!
396 '--skip-monitoring-stack', # we'll provision these explicitly
398 if mons
[first_mon_role
].startswith('['):
399 cmd
+= ['--mon-addrv', mons
[first_mon_role
]]
401 cmd
+= ['--mon-ip', mons
[first_mon_role
]]
402 if config
.get('skip_dashboard'):
403 cmd
+= ['--skip-dashboard']
404 # bootstrap makes the keyring root 0600, so +r it for our purposes
407 'sudo', 'chmod', '+r',
408 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
410 bootstrap_remote
.run(args
=cmd
)
412 # fetch keys and configs
413 log
.info('Fetching config...')
414 ctx
.ceph
[cluster_name
].config_file
= teuthology
.get_file(
415 remote
=bootstrap_remote
,
416 path
='/etc/ceph/{}.conf'.format(cluster_name
))
417 log
.info('Fetching client.admin keyring...')
418 ctx
.ceph
[cluster_name
].admin_keyring
= teuthology
.get_file(
419 remote
=bootstrap_remote
,
420 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
))
421 log
.info('Fetching mon keyring...')
422 ctx
.ceph
[cluster_name
].mon_keyring
= teuthology
.get_file(
423 remote
=bootstrap_remote
,
424 path
='/var/lib/ceph/%s/mon.%s/keyring' % (fsid
, first_mon
),
427 # fetch ssh key, distribute to additional nodes
428 log
.info('Fetching pub ssh key...')
429 ssh_pub_key
= teuthology
.get_file(
430 remote
=bootstrap_remote
,
431 path
='{}/{}.pub'.format(testdir
, cluster_name
)
432 ).decode('ascii').strip()
434 log
.info('Installing pub ssh key for root users...')
435 ctx
.cluster
.run(args
=[
436 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
440 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
442 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
446 _shell(ctx
, cluster_name
, bootstrap_remote
,
447 ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
450 for remote
in ctx
.cluster
.remotes
.keys():
451 if remote
== bootstrap_remote
:
453 log
.info('Writing (initial) conf and keyring to %s' % remote
.shortname
)
454 teuthology
.write_file(
456 path
='/etc/ceph/{}.conf'.format(cluster_name
),
457 data
=ctx
.ceph
[cluster_name
].config_file
)
458 teuthology
.write_file(
460 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
461 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
463 log
.info('Adding host %s to orchestrator...' % remote
.shortname
)
464 _shell(ctx
, cluster_name
, remote
, [
465 'ceph', 'orch', 'host', 'add',
468 r
= _shell(ctx
, cluster_name
, remote
,
469 ['ceph', 'orch', 'host', 'ls', '--format=json'],
471 hosts
= [node
['hostname'] for node
in json
.loads(r
.stdout
.getvalue())]
472 assert remote
.shortname
in hosts
477 log
.info('Cleaning up testdir ceph.* files...')
478 ctx
.cluster
.run(args
=[
480 '{}/seed.{}.conf'.format(testdir
, cluster_name
),
481 '{}/{}.pub'.format(testdir
, cluster_name
),
484 log
.info('Stopping all daemons...')
486 # this doesn't block until they are all stopped...
487 #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
489 # so, stop them individually
490 for role
in ctx
.daemons
.resolve_role_list(None, CEPH_ROLE_TYPES
, True):
491 cluster
, type_
, id_
= teuthology
.split_role(role
)
493 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
495 log
.exception('Failed to stop "{role}"'.format(role
=role
))
499 ctx
.cluster
.run(args
=[
501 '/etc/ceph/{}.conf'.format(cluster_name
),
502 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
505 @contextlib.contextmanager
506 def ceph_mons(ctx
, config
):
508 Deploy any additional mons
510 cluster_name
= config
['cluster']
511 fsid
= ctx
.ceph
[cluster_name
].fsid
515 for remote
, roles
in ctx
.cluster
.remotes
.items():
516 for mon
in [r
for r
in roles
517 if teuthology
.is_type('mon', cluster_name
)(r
)]:
518 c_
, _
, id_
= teuthology
.split_role(mon
)
519 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mon
:
521 log
.info('Adding %s on %s' % (mon
, remote
.shortname
))
523 _shell(ctx
, cluster_name
, remote
, [
524 'ceph', 'orch', 'daemon', 'add', 'mon',
525 remote
.shortname
+ ':' + ctx
.ceph
[cluster_name
].mons
[mon
] + '=' + id_
,
527 ctx
.daemons
.register_daemon(
529 cluster
=cluster_name
,
531 logger
=log
.getChild(mon
),
536 with contextutil
.safe_while(sleep
=1, tries
=180) as proceed
:
538 log
.info('Waiting for %d mons in monmap...' % (num_mons
))
541 cluster_name
=cluster_name
,
544 'ceph', 'mon', 'dump', '-f', 'json',
548 j
= json
.loads(r
.stdout
.getvalue())
549 if len(j
['mons']) == num_mons
:
552 # refresh our (final) ceph.conf file
553 bootstrap_remote
= ctx
.ceph
[cluster_name
].bootstrap_remote
554 log
.info('Generating final ceph.conf file...')
557 cluster_name
=cluster_name
,
558 remote
=bootstrap_remote
,
560 'ceph', 'config', 'generate-minimal-conf',
564 ctx
.ceph
[cluster_name
].config_file
= r
.stdout
.getvalue()
571 @contextlib.contextmanager
572 def ceph_mgrs(ctx
, config
):
574 Deploy any additional mgrs
576 cluster_name
= config
['cluster']
577 fsid
= ctx
.ceph
[cluster_name
].fsid
582 for remote
, roles
in ctx
.cluster
.remotes
.items():
583 for mgr
in [r
for r
in roles
584 if teuthology
.is_type('mgr', cluster_name
)(r
)]:
585 c_
, _
, id_
= teuthology
.split_role(mgr
)
586 if c_
== cluster_name
and id_
== ctx
.ceph
[cluster_name
].first_mgr
:
588 log
.info('Adding %s on %s' % (mgr
, remote
.shortname
))
589 nodes
.append(remote
.shortname
+ '=' + id_
)
590 daemons
[mgr
] = (remote
, id_
)
592 _shell(ctx
, cluster_name
, remote
, [
593 'ceph', 'orch', 'apply', 'mgr',
594 str(len(nodes
) + 1) + ';' + ';'.join(nodes
)]
596 for mgr
, i
in daemons
.items():
598 ctx
.daemons
.register_daemon(
600 cluster
=cluster_name
,
602 logger
=log
.getChild(mgr
),
612 @contextlib.contextmanager
613 def ceph_osds(ctx
, config
):
617 cluster_name
= config
['cluster']
618 fsid
= ctx
.ceph
[cluster_name
].fsid
621 log
.info('Deploying OSDs...')
623 # provision OSDs in numeric order
626 for remote
, roles
in ctx
.cluster
.remotes
.items():
627 devs_by_remote
[remote
] = teuthology
.get_scratch_devices(remote
)
628 for osd
in [r
for r
in roles
629 if teuthology
.is_type('osd', cluster_name
)(r
)]:
630 _
, _
, id_
= teuthology
.split_role(osd
)
631 id_to_remote
[int(id_
)] = (osd
, remote
)
634 for osd_id
in sorted(id_to_remote
.keys()):
635 osd
, remote
= id_to_remote
[osd_id
]
636 _
, _
, id_
= teuthology
.split_role(osd
)
637 assert int(id_
) == cur
638 devs
= devs_by_remote
[remote
]
639 assert devs
## FIXME ##
641 if all(_
in dev
for _
in ('lv', 'vg')):
642 short_dev
= dev
.replace('/dev/', '')
645 log
.info('Deploying %s on %s with %s...' % (
646 osd
, remote
.shortname
, dev
))
647 _shell(ctx
, cluster_name
, remote
, [
648 'ceph-volume', 'lvm', 'zap', dev
])
649 _shell(ctx
, cluster_name
, remote
, [
650 'ceph', 'orch', 'daemon', 'add', 'osd',
651 remote
.shortname
+ ':' + short_dev
653 ctx
.daemons
.register_daemon(
655 cluster
=cluster_name
,
657 logger
=log
.getChild(osd
),
667 @contextlib.contextmanager
668 def ceph_mdss(ctx
, config
):
672 cluster_name
= config
['cluster']
673 fsid
= ctx
.ceph
[cluster_name
].fsid
677 for remote
, roles
in ctx
.cluster
.remotes
.items():
678 for role
in [r
for r
in roles
679 if teuthology
.is_type('mds', cluster_name
)(r
)]:
680 c_
, _
, id_
= teuthology
.split_role(role
)
681 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
682 nodes
.append(remote
.shortname
+ '=' + id_
)
683 daemons
[role
] = (remote
, id_
)
685 _shell(ctx
, cluster_name
, remote
, [
686 'ceph', 'orch', 'apply', 'mds',
688 str(len(nodes
)) + ';' + ';'.join(nodes
)]
690 for role
, i
in daemons
.items():
692 ctx
.daemons
.register_daemon(
694 cluster
=cluster_name
,
696 logger
=log
.getChild(role
),
703 @contextlib.contextmanager
704 def ceph_monitoring(daemon_type
, ctx
, config
):
706 Deploy prometheus, node-exporter, etc.
708 cluster_name
= config
['cluster']
709 fsid
= ctx
.ceph
[cluster_name
].fsid
713 for remote
, roles
in ctx
.cluster
.remotes
.items():
714 for role
in [r
for r
in roles
715 if teuthology
.is_type(daemon_type
, cluster_name
)(r
)]:
716 c_
, _
, id_
= teuthology
.split_role(role
)
717 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
718 nodes
.append(remote
.shortname
+ '=' + id_
)
719 daemons
[role
] = (remote
, id_
)
721 _shell(ctx
, cluster_name
, remote
, [
722 'ceph', 'orch', 'apply', daemon_type
,
723 str(len(nodes
)) + ';' + ';'.join(nodes
)]
725 for role
, i
in daemons
.items():
727 ctx
.daemons
.register_daemon(
728 remote
, daemon_type
, id_
,
729 cluster
=cluster_name
,
731 logger
=log
.getChild(role
),
738 @contextlib.contextmanager
739 def ceph_rgw(ctx
, config
):
743 cluster_name
= config
['cluster']
744 fsid
= ctx
.ceph
[cluster_name
].fsid
748 for remote
, roles
in ctx
.cluster
.remotes
.items():
749 for role
in [r
for r
in roles
750 if teuthology
.is_type('rgw', cluster_name
)(r
)]:
751 c_
, _
, id_
= teuthology
.split_role(role
)
752 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
753 realmzone
= '.'.join(id_
.split('.')[0:2])
754 if realmzone
not in nodes
:
755 nodes
[realmzone
] = []
756 nodes
[realmzone
].append(remote
.shortname
+ '=' + id_
)
757 daemons
[role
] = (remote
, id_
)
759 for realmzone
in nodes
.keys():
760 (realm
, zone
) = realmzone
.split('.', 1)
762 # TODO: those should be moved to mgr/cephadm
763 _shell(ctx
, cluster_name
, remote
,
764 ['radosgw-admin', 'realm', 'create', '--rgw-realm', realm
, '--default']
766 _shell(ctx
, cluster_name
, remote
,
767 ['radosgw-admin', 'zonegroup', 'create', '--rgw-zonegroup=default', '--master', '--default']
769 _shell(ctx
, cluster_name
, remote
,
770 ['radosgw-admin', 'zone', 'create', '--rgw-zonegroup=default', '--rgw-zone', zone
, '--master', '--default']
773 for realmzone
, nodes
in nodes
.items():
774 (realm
, zone
) = realmzone
.split('.', 1)
775 _shell(ctx
, cluster_name
, remote
, [
776 'ceph', 'orch', 'apply', 'rgw', realm
, zone
,
778 str(len(nodes
)) + ';' + ';'.join(nodes
)]
780 for role
, i
in daemons
.items():
782 ctx
.daemons
.register_daemon(
784 cluster
=cluster_name
,
786 logger
=log
.getChild(role
),
794 @contextlib.contextmanager
795 def ceph_iscsi(ctx
, config
):
799 cluster_name
= config
['cluster']
800 fsid
= ctx
.ceph
[cluster_name
].fsid
804 for remote
, roles
in ctx
.cluster
.remotes
.items():
805 for role
in [r
for r
in roles
806 if teuthology
.is_type('iscsi', cluster_name
)(r
)]:
807 c_
, _
, id_
= teuthology
.split_role(role
)
808 log
.info('Adding %s on %s' % (role
, remote
.shortname
))
809 nodes
.append(remote
.shortname
+ '=' + id_
)
810 daemons
[role
] = (remote
, id_
)
813 # ceph osd pool create iscsi 3 3 replicated
814 _shell(ctx
, cluster_name
, remote
, [
815 'ceph', 'osd', 'pool', 'create',
816 poolname
, '3', '3', 'replicated']
819 _shell(ctx
, cluster_name
, remote
, [
820 'ceph', 'osd', 'pool', 'application', 'enable',
824 # ceph orch apply iscsi iscsi user password
825 _shell(ctx
, cluster_name
, remote
, [
826 'ceph', 'orch', 'apply', 'iscsi',
827 poolname
, 'user', 'password',
828 '--placement', str(len(nodes
)) + ';' + ';'.join(nodes
)]
830 for role
, i
in daemons
.items():
832 ctx
.daemons
.register_daemon(
833 remote
, 'iscsi', id_
,
834 cluster
=cluster_name
,
836 logger
=log
.getChild(role
),
843 @contextlib.contextmanager
844 def ceph_clients(ctx
, config
):
845 cluster_name
= config
['cluster']
846 testdir
= teuthology
.get_testdir(ctx
)
848 log
.info('Setting up client nodes...')
849 clients
= ctx
.cluster
.only(teuthology
.is_type('client', cluster_name
))
850 testdir
= teuthology
.get_testdir(ctx
)
851 coverage_dir
= '{tdir}/archive/coverage'.format(tdir
=testdir
)
852 for remote
, roles_for_host
in clients
.remotes
.items():
853 for role
in teuthology
.cluster_roles_of_type(roles_for_host
, 'client',
855 name
= teuthology
.ceph_role(role
)
856 client_keyring
= '/etc/ceph/{0}.{1}.keyring'.format(cluster_name
,
860 cluster_name
=cluster_name
,
864 'get-or-create', name
,
872 keyring
= r
.stdout
.getvalue()
873 teuthology
.sudo_write_file(
881 @contextlib.contextmanager
886 log
.info('Teardown complete')
889 @contextlib.contextmanager
890 def stop(ctx
, config
):
899 - ceph.stop: [osd.0, osd.2]
903 daemons: [osd.0, osd.2]
908 elif isinstance(config
, list):
909 config
= {'daemons': config
}
911 daemons
= ctx
.daemons
.resolve_role_list(
912 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
916 cluster
, type_
, id_
= teuthology
.split_role(role
)
917 ctx
.daemons
.get_daemon(type_
, id_
, cluster
).stop()
918 clusters
.add(cluster
)
920 # for cluster in clusters:
921 # ctx.ceph[cluster].watchdog.stop()
922 # ctx.ceph[cluster].watchdog.join()
926 def shell(ctx
, config
):
928 Execute (shell) commands
930 cluster_name
= config
.get('cluster', 'ceph')
934 for k
in config
['env']:
935 env
.extend(['-e', k
+ '=' + ctx
.config
.get(k
, '')])
938 if 'all' in config
and len(config
) == 1:
940 roles
= teuthology
.all_roles(ctx
.cluster
)
941 config
= dict((id_
, a
) for id_
in roles
)
943 for role
, ls
in config
.items():
944 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
945 log
.info('Running commands on role %s host %s', role
, remote
.name
)
947 _shell(ctx
, cluster_name
, remote
,
949 extra_cephadm_args
=env
)
951 @contextlib.contextmanager
952 def tweaked_option(ctx
, config
):
954 set an option, and then restore it with its original value
956 Note, due to the way how tasks are executed/nested, it's not suggested to
957 use this method as a standalone task. otherwise, it's likely that it will
958 restore the tweaked option at the /end/ of 'tasks' block.
961 # we can complicate this when necessary
962 options
= ['mon-health-to-clog']
963 type_
, id_
= 'mon', '*'
964 cluster
= config
.get('cluster', 'ceph')
965 manager
= ctx
.managers
[cluster
]
967 get_from
= next(teuthology
.all_roles_of_type(ctx
.cluster
, type_
))
970 for option
in options
:
971 if option
not in config
:
973 value
= 'true' if config
[option
] else 'false'
974 option
= option
.replace('-', '_')
975 old_value
= manager
.get_config(type_
, get_from
, option
)
976 if value
!= old_value
:
977 saved_options
[option
] = old_value
978 manager
.inject_args(type_
, id_
, option
, value
)
980 for option
, value
in saved_options
.items():
981 manager
.inject_args(type_
, id_
, option
, value
)
983 @contextlib.contextmanager
984 def restart(ctx
, config
):
990 - ceph.restart: [all]
994 - ceph.restart: [osd.0, mon.1, mds.*]
1000 daemons: [osd.0, mon.1]
1001 wait-for-healthy: false
1002 wait-for-osds-up: true
1005 :param config: Configuration
1009 elif isinstance(config
, list):
1010 config
= {'daemons': config
}
1012 daemons
= ctx
.daemons
.resolve_role_list(
1013 config
.get('daemons', None), CEPH_ROLE_TYPES
, True)
1016 log
.info('daemons %s' % daemons
)
1017 with
tweaked_option(ctx
, config
):
1018 for role
in daemons
:
1019 cluster
, type_
, id_
= teuthology
.split_role(role
)
1020 d
= ctx
.daemons
.get_daemon(type_
, id_
, cluster
)
1021 assert d
, 'daemon %s does not exist' % role
1024 ctx
.managers
[cluster
].mark_down_osd(id_
)
1026 clusters
.add(cluster
)
1028 if config
.get('wait-for-healthy', True):
1029 for cluster
in clusters
:
1030 healthy(ctx
=ctx
, config
=dict(cluster
=cluster
))
1031 if config
.get('wait-for-osds-up', False):
1032 for cluster
in clusters
:
1033 ctx
.managers
[cluster
].wait_for_all_osds_up()
1036 @contextlib.contextmanager
1037 def distribute_config_and_admin_keyring(ctx
, config
):
1039 Distribute a sufficient config and keyring for clients
1041 cluster_name
= config
['cluster']
1042 log
.info('Distributing (final) config and client.admin keyring...')
1043 for remote
, roles
in ctx
.cluster
.remotes
.items():
1044 teuthology
.sudo_write_file(
1046 path
='/etc/ceph/{}.conf'.format(cluster_name
),
1047 data
=ctx
.ceph
[cluster_name
].config_file
)
1048 teuthology
.sudo_write_file(
1050 path
='/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
1051 data
=ctx
.ceph
[cluster_name
].admin_keyring
)
1055 ctx
.cluster
.run(args
=[
1057 '/etc/ceph/{}.conf'.format(cluster_name
),
1058 '/etc/ceph/{}.client.admin.keyring'.format(cluster_name
),
1061 @contextlib.contextmanager
1062 def crush_setup(ctx
, config
):
1063 cluster_name
= config
['cluster']
1065 profile
= config
.get('crush_tunables', 'default')
1066 log
.info('Setting crush tunables to %s', profile
)
1067 _shell(ctx
, cluster_name
, ctx
.ceph
[cluster_name
].bootstrap_remote
,
1068 args
=['ceph', 'osd', 'crush', 'tunables', profile
])
1071 @contextlib.contextmanager
1075 @contextlib.contextmanager
1076 def initialize_config(ctx
, config
):
1077 cluster_name
= config
['cluster']
1078 testdir
= teuthology
.get_testdir(ctx
)
1080 ctx
.ceph
[cluster_name
].thrashers
= []
1081 # fixme: setup watchdog, ala ceph.py
1083 ctx
.ceph
[cluster_name
].roleless
= False # see below
1085 first_ceph_cluster
= False
1086 if not hasattr(ctx
, 'daemons'):
1087 first_ceph_cluster
= True
1090 if 'cephadm_mode' not in config
:
1091 config
['cephadm_mode'] = 'root'
1092 assert config
['cephadm_mode'] in ['root', 'cephadm-package']
1093 if config
['cephadm_mode'] == 'root':
1094 ctx
.cephadm
= testdir
+ '/cephadm'
1096 ctx
.cephadm
= 'cephadm' # in the path
1098 if first_ceph_cluster
:
1099 # FIXME: this is global for all clusters
1100 ctx
.daemons
= DaemonGroup(
1101 use_cephadm
=ctx
.cephadm
)
1104 fsid
= str(uuid
.uuid1())
1105 log
.info('Cluster fsid is %s' % fsid
)
1106 ctx
.ceph
[cluster_name
].fsid
= fsid
1109 log
.info('Choosing monitor IPs and ports...')
1110 remotes_and_roles
= ctx
.cluster
.remotes
.items()
1111 ips
= [host
for (host
, port
) in
1112 (remote
.ssh
.get_transport().getpeername() for (remote
, role_list
) in remotes_and_roles
)]
1114 if config
.get('roleless', False):
1115 # mons will be named after hosts
1117 for remote
, _
in remotes_and_roles
:
1118 ctx
.cluster
.remotes
[remote
].append('mon.' + remote
.shortname
)
1120 first_mon
= remote
.shortname
1121 bootstrap_remote
= remote
1122 log
.info('No mon roles; fabricating mons')
1124 roles
= [role_list
for (remote
, role_list
) in ctx
.cluster
.remotes
.items()]
1126 ctx
.ceph
[cluster_name
].mons
= get_mons(
1127 roles
, ips
, cluster_name
,
1128 mon_bind_msgr2
=config
.get('mon_bind_msgr2', True),
1129 mon_bind_addrvec
=config
.get('mon_bind_addrvec', True),
1131 log
.info('Monitor IPs: %s' % ctx
.ceph
[cluster_name
].mons
)
1133 if config
.get('roleless', False):
1134 ctx
.ceph
[cluster_name
].roleless
= True
1135 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1136 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1137 ctx
.ceph
[cluster_name
].first_mon_role
= 'mon.' + first_mon
1139 first_mon_role
= sorted(ctx
.ceph
[cluster_name
].mons
.keys())[0]
1140 _
, _
, first_mon
= teuthology
.split_role(first_mon_role
)
1141 (bootstrap_remote
,) = ctx
.cluster
.only(first_mon_role
).remotes
.keys()
1142 log
.info('First mon is mon.%s on %s' % (first_mon
,
1143 bootstrap_remote
.shortname
))
1144 ctx
.ceph
[cluster_name
].bootstrap_remote
= bootstrap_remote
1145 ctx
.ceph
[cluster_name
].first_mon
= first_mon
1146 ctx
.ceph
[cluster_name
].first_mon_role
= first_mon_role
1148 others
= ctx
.cluster
.remotes
[bootstrap_remote
]
1149 mgrs
= sorted([r
for r
in others
1150 if teuthology
.is_type('mgr', cluster_name
)(r
)])
1152 raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon
)
1153 _
, _
, first_mgr
= teuthology
.split_role(mgrs
[0])
1154 log
.info('First mgr is %s' % (first_mgr
))
1155 ctx
.ceph
[cluster_name
].first_mgr
= first_mgr
1158 @contextlib.contextmanager
1159 def task(ctx
, config
):
1161 Deploy ceph cluster using cephadm
1163 Setup containers' mirrors before the bootstrap, if corresponding
1164 config provided in teuthology server config yaml file.
1166 For example, teuthology.yaml can contain the 'defaults' section:
1172 docker.io: 'registry.mirror.example.com:5000'
1173 image: 'quay.io/ceph-ci/ceph'
1175 Using overrides makes it possible to customize it per run.
1176 The equivalent 'overrides' section looks like:
1182 docker.io: 'registry.mirror.example.com:5000'
1183 image: 'quay.io/ceph-ci/ceph'
1185 :param ctx: the argparse.Namespace object
1186 :param config: the config dict
1191 assert isinstance(config
, dict), \
1192 "task only supports a dictionary for configuration"
1194 overrides
= ctx
.config
.get('overrides', {})
1195 teuthology
.deep_merge(config
, overrides
.get('ceph', {}))
1196 teuthology
.deep_merge(config
, overrides
.get('cephadm', {}))
1197 log
.info('Config: ' + str(config
))
1199 testdir
= teuthology
.get_testdir(ctx
)
1201 # set up cluster context
1202 if not hasattr(ctx
, 'ceph'):
1205 if 'cluster' not in config
:
1206 config
['cluster'] = 'ceph'
1207 cluster_name
= config
['cluster']
1208 if cluster_name
not in ctx
.ceph
:
1209 ctx
.ceph
[cluster_name
] = argparse
.Namespace()
1210 ctx
.ceph
[cluster_name
].bootstrapped
= False
1213 teuth_defaults
= teuth_config
.get('defaults', {})
1214 cephadm_defaults
= teuth_defaults
.get('cephadm', {})
1215 containers_defaults
= cephadm_defaults
.get('containers', {})
1216 mirrors_defaults
= containers_defaults
.get('registry_mirrors', {})
1217 container_registry_mirror
= mirrors_defaults
.get('docker.io', None)
1218 container_image_name
= containers_defaults
.get('image', None)
1220 containers
= config
.get('containers', {})
1221 mirrors
= containers
.get('registry_mirrors', {})
1222 container_image_name
= containers
.get('image', container_image_name
)
1223 container_registry_mirror
= mirrors
.get('docker.io',
1224 container_registry_mirror
)
1227 if not hasattr(ctx
.ceph
[cluster_name
], 'image'):
1228 ctx
.ceph
[cluster_name
].image
= config
.get('image')
1230 if not ctx
.ceph
[cluster_name
].image
:
1231 if not container_image_name
:
1232 raise Exception("Configuration error occurred. "
1233 "The 'image' value is undefined for 'cephadm' task. "
1234 "Please provide corresponding options in the task's "
1235 "config, task 'overrides', or teuthology 'defaults' "
1237 sha1
= config
.get('sha1')
1238 flavor
= config
.get('flavor', 'default')
1241 if flavor
== "crimson":
1242 ctx
.ceph
[cluster_name
].image
= container_image_name
+ ':' + sha1
+ '-' + flavor
1244 ctx
.ceph
[cluster_name
].image
= container_image_name
+ ':' + sha1
1247 # hmm, fall back to branch?
1248 branch
= config
.get('branch', 'master')
1250 ctx
.ceph
[cluster_name
].image
= container_image_name
+ ':' + branch
1251 log
.info('Cluster image is %s' % ctx
.ceph
[cluster_name
].image
)
1254 with contextutil
.nested(
1255 #if the cluster is already bootstrapped bypass corresponding methods
1256 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1257 else initialize_config(ctx
=ctx
, config
=config
),
1258 lambda: ceph_initial(),
1259 lambda: normalize_hostnames(ctx
=ctx
),
1260 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1261 else download_cephadm(ctx
=ctx
, config
=config
, ref
=ref
),
1262 lambda: ceph_log(ctx
=ctx
, config
=config
),
1263 lambda: ceph_crash(ctx
=ctx
, config
=config
),
1264 lambda: _bypass() if (ctx
.ceph
[cluster_name
].bootstrapped
)\
1265 else ceph_bootstrap(ctx
, config
,
1266 container_registry_mirror
),
1267 lambda: crush_setup(ctx
=ctx
, config
=config
),
1268 lambda: ceph_mons(ctx
=ctx
, config
=config
),
1269 lambda: distribute_config_and_admin_keyring(ctx
=ctx
, config
=config
),
1270 lambda: ceph_mgrs(ctx
=ctx
, config
=config
),
1271 lambda: ceph_osds(ctx
=ctx
, config
=config
),
1272 lambda: ceph_mdss(ctx
=ctx
, config
=config
),
1273 lambda: ceph_rgw(ctx
=ctx
, config
=config
),
1274 lambda: ceph_iscsi(ctx
=ctx
, config
=config
),
1275 lambda: ceph_monitoring('prometheus', ctx
=ctx
, config
=config
),
1276 lambda: ceph_monitoring('node-exporter', ctx
=ctx
, config
=config
),
1277 lambda: ceph_monitoring('alertmanager', ctx
=ctx
, config
=config
),
1278 lambda: ceph_monitoring('grafana', ctx
=ctx
, config
=config
),
1279 lambda: ceph_clients(ctx
=ctx
, config
=config
),
1281 ctx
.managers
[cluster_name
] = CephManager(
1282 ctx
.ceph
[cluster_name
].bootstrap_remote
,
1284 logger
=log
.getChild('ceph_manager.' + cluster_name
),
1285 cluster
=cluster_name
,
1290 if config
.get('wait-for-healthy', True):
1291 healthy(ctx
=ctx
, config
=config
)
1293 log
.info('Setup complete, yielding')
1297 log
.info('Teardown begin')
1300 def registries_add_mirror_to_docker_io(conf
, mirror
):
1301 config
= toml
.loads(conf
)
1302 is_v1
= 'registries' in config
1304 search
= config
.get('registries', {}).get('search', {}).get('registries', [])
1305 insecure
= config
.get('registries', {}).get('search', {}).get('insecure', [])
1306 # v2: MutableMapping[str, Any] = { needs Python 3
1308 'unqualified-search-registries': search
,
1313 'insecure': reg
in insecure
,
1319 v2
= config
# type: ignore
1321 r
for r
in v2
['registry'] if
1322 r
.get('prefix') == 'docker.io' or r
.get('location') == 'docker.io'
1326 if 'mirror' not in docker
:
1327 docker
['mirror'] = [{
1334 def add_mirror_to_cluster(ctx
, mirror
):
1335 log
.info('Adding local image mirror %s' % mirror
)
1337 registries_conf
= '/etc/containers/registries.conf'
1339 for remote
in ctx
.cluster
.remotes
.keys():
1341 config
= teuthology
.get_file(
1343 path
=registries_conf
1345 new_config
= toml
.dumps(registries_add_mirror_to_docker_io(config
.decode('utf-8'), mirror
))
1347 teuthology
.sudo_write_file(
1349 path
=registries_conf
,
1350 data
=six
.ensure_str(new_config
),
1352 except IOError as e
: # py3: use FileNotFoundError instead.
1353 if e
.errno
!= errno
.ENOENT
:
1356 # Docker doesn't ship a registries.conf
1357 log
.info('Failed to add mirror: %s' % str(e
))