]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/ceph_deploy.py
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / qa / tasks / ceph_deploy.py
1 """
2 Execute ceph-deploy as a task
3 """
4
5 import contextlib
6 import os
7 import time
8 import logging
9 import traceback
10
11 from teuthology import misc as teuthology
12 from teuthology import contextutil
13 from teuthology.config import config as teuth_config
14 from teuthology.task import install as install_fn
15 from teuthology.orchestra import run
16 from tasks.cephfs.filesystem import Filesystem
17 from teuthology.misc import wait_until_healthy
18
19 log = logging.getLogger(__name__)
20
21
22 @contextlib.contextmanager
23 def download_ceph_deploy(ctx, config):
24 """
25 Downloads ceph-deploy from the ceph.com git mirror and (by default)
26 switches to the master branch. If the `ceph-deploy-branch` is specified, it
27 will use that instead. The `bootstrap` script is ran, with the argument
28 obtained from `python_version`, if specified.
29 """
30 # use mon.a for ceph_admin
31 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
32
33 try:
34 py_ver = str(config['python_version'])
35 except KeyError:
36 pass
37 else:
38 supported_versions = ['2', '3']
39 if py_ver not in supported_versions:
40 raise ValueError("python_version must be: {}, not {}".format(
41 ' or '.join(supported_versions), py_ver
42 ))
43
44 log.info("Installing Python")
45 system_type = teuthology.get_system_type(ceph_admin)
46
47 if system_type == 'rpm':
48 package = 'python36' if py_ver == '3' else 'python'
49 ctx.cluster.run(args=[
50 'sudo', 'yum', '-y', 'install',
51 package, 'python-virtualenv'
52 ])
53 else:
54 package = 'python3' if py_ver == '3' else 'python'
55 ctx.cluster.run(args=[
56 'sudo', 'apt-get', '-y', '--force-yes', 'install',
57 package, 'python-virtualenv'
58 ])
59
60 log.info('Downloading ceph-deploy...')
61 testdir = teuthology.get_testdir(ctx)
62 ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
63
64 ceph_admin.run(
65 args=[
66 'git', 'clone', '-b', ceph_deploy_branch,
67 teuth_config.ceph_git_base_url + 'ceph-deploy.git',
68 '{tdir}/ceph-deploy'.format(tdir=testdir),
69 ],
70 )
71 args = [
72 'cd',
73 '{tdir}/ceph-deploy'.format(tdir=testdir),
74 run.Raw('&&'),
75 './bootstrap',
76 ]
77 try:
78 args.append(str(config['python_version']))
79 except KeyError:
80 pass
81 ceph_admin.run(args=args)
82
83 try:
84 yield
85 finally:
86 log.info('Removing ceph-deploy ...')
87 ceph_admin.run(
88 args=[
89 'rm',
90 '-rf',
91 '{tdir}/ceph-deploy'.format(tdir=testdir),
92 ],
93 )
94
95
96 def is_healthy(ctx, config):
97 """Wait until a Ceph cluster is healthy."""
98 testdir = teuthology.get_testdir(ctx)
99 ceph_admin = teuthology.get_first_mon(ctx, config)
100 (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
101 max_tries = 90 # 90 tries * 10 secs --> 15 minutes
102 tries = 0
103 while True:
104 tries += 1
105 if tries >= max_tries:
106 msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
107 remote.run(
108 args=[
109 'cd',
110 '{tdir}'.format(tdir=testdir),
111 run.Raw('&&'),
112 'sudo', 'ceph',
113 'report',
114 ],
115 )
116 raise RuntimeError(msg)
117
118 out = remote.sh(
119 [
120 'cd',
121 '{tdir}'.format(tdir=testdir),
122 run.Raw('&&'),
123 'sudo', 'ceph',
124 'health',
125 ],
126 logger=log.getChild('health'),
127 )
128 log.info('Ceph health: %s', out.rstrip('\n'))
129 if out.split(None, 1)[0] == 'HEALTH_OK':
130 break
131 time.sleep(10)
132
133
134 def get_nodes_using_role(ctx, target_role):
135 """
136 Extract the names of nodes that match a given role from a cluster, and modify the
137 cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
138 uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
139 """
140
141 # Nodes containing a service of the specified role
142 nodes_of_interest = []
143
144 # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
145 modified_remotes = {}
146 ceph_deploy_mapped = dict()
147 for _remote, roles_for_host in ctx.cluster.remotes.items():
148 modified_remotes[_remote] = []
149 for svc_id in roles_for_host:
150 if svc_id.startswith("{0}.".format(target_role)):
151 fqdn = str(_remote).split('@')[-1]
152 nodename = str(str(_remote).split('.')[0]).split('@')[1]
153 if target_role == 'mon':
154 nodes_of_interest.append(fqdn)
155 else:
156 nodes_of_interest.append(nodename)
157 mapped_role = "{0}.{1}".format(target_role, nodename)
158 modified_remotes[_remote].append(mapped_role)
159 # keep dict of mapped role for later use by tasks
160 # eg. mon.a => mon.node1
161 ceph_deploy_mapped[svc_id] = mapped_role
162 else:
163 modified_remotes[_remote].append(svc_id)
164
165 ctx.cluster.remotes = modified_remotes
166 # since the function is called multiple times for target roles
167 # append new mapped roles
168 if not hasattr(ctx.cluster, 'mapped_role'):
169 ctx.cluster.mapped_role = ceph_deploy_mapped
170 else:
171 ctx.cluster.mapped_role.update(ceph_deploy_mapped)
172 log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role))
173 return nodes_of_interest
174
175
176 def get_dev_for_osd(ctx, config):
177 """Get a list of all osd device names."""
178 osd_devs = []
179 for remote, roles_for_host in ctx.cluster.remotes.items():
180 host = remote.name.split('@')[-1]
181 shortname = host.split('.')[0]
182 devs = teuthology.get_scratch_devices(remote)
183 num_osd_per_host = list(
184 teuthology.roles_of_type(
185 roles_for_host, 'osd'))
186 num_osds = len(num_osd_per_host)
187 if config.get('separate_journal_disk') is not None:
188 num_devs_reqd = 2 * num_osds
189 assert num_devs_reqd <= len(
190 devs), 'fewer data and journal disks than required ' + shortname
191 for dindex in range(0, num_devs_reqd, 2):
192 jd_index = dindex + 1
193 dev_short = devs[dindex].split('/')[-1]
194 jdev_short = devs[jd_index].split('/')[-1]
195 osd_devs.append((shortname, dev_short, jdev_short))
196 else:
197 assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
198 for dev in devs[:num_osds]:
199 dev_short = dev.split('/')[-1]
200 osd_devs.append((shortname, dev_short))
201 return osd_devs
202
203
204 def get_all_nodes(ctx, config):
205 """Return a string of node names separated by blanks"""
206 nodelist = []
207 for t, k in ctx.config['targets'].items():
208 host = t.split('@')[-1]
209 simple_host = host.split('.')[0]
210 nodelist.append(simple_host)
211 nodelist = " ".join(nodelist)
212 return nodelist
213
214 @contextlib.contextmanager
215 def build_ceph_cluster(ctx, config):
216 """Build a ceph cluster"""
217
218 # Expect to find ceph_admin on the first mon by ID, same place that the download task
219 # puts it. Remember this here, because subsequently IDs will change from those in
220 # the test config to those that ceph-deploy invents.
221
222 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
223
224 def execute_ceph_deploy(cmd):
225 """Remotely execute a ceph_deploy command"""
226 return ceph_admin.run(
227 args=[
228 'cd',
229 '{tdir}/ceph-deploy'.format(tdir=testdir),
230 run.Raw('&&'),
231 run.Raw(cmd),
232 ],
233 check_status=False,
234 ).exitstatus
235
236 def ceph_disk_osd_create(ctx, config):
237 node_dev_list = get_dev_for_osd(ctx, config)
238 no_of_osds = 0
239 for d in node_dev_list:
240 node = d[0]
241 for disk in d[1:]:
242 zap = './ceph-deploy disk zap ' + node + ' ' + disk
243 estatus = execute_ceph_deploy(zap)
244 if estatus != 0:
245 raise RuntimeError("ceph-deploy: Failed to zap osds")
246 osd_create_cmd = './ceph-deploy osd create '
247 # first check for filestore, default is bluestore with ceph-deploy
248 if config.get('filestore') is not None:
249 osd_create_cmd += '--filestore '
250 elif config.get('bluestore') is not None:
251 osd_create_cmd += '--bluestore '
252 if config.get('dmcrypt') is not None:
253 osd_create_cmd += '--dmcrypt '
254 osd_create_cmd += ":".join(d)
255 estatus_osd = execute_ceph_deploy(osd_create_cmd)
256 if estatus_osd == 0:
257 log.info('successfully created osd')
258 no_of_osds += 1
259 else:
260 raise RuntimeError("ceph-deploy: Failed to create osds")
261 return no_of_osds
262
263 def ceph_volume_osd_create(ctx, config):
264 osds = ctx.cluster.only(teuthology.is_type('osd'))
265 no_of_osds = 0
266 for remote in osds.remotes.keys():
267 # all devs should be lvm
268 osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
269 # default is bluestore so we just need config item for filestore
270 roles = ctx.cluster.remotes[remote]
271 dev_needed = len([role for role in roles
272 if role.startswith('osd')])
273 all_devs = teuthology.get_scratch_devices(remote)
274 log.info("node={n}, need_devs={d}, available={a}".format(
275 n=remote.shortname,
276 d=dev_needed,
277 a=all_devs,
278 ))
279 devs = all_devs[0:dev_needed]
280 # rest of the devices can be used for journal if required
281 jdevs = dev_needed
282 for device in devs:
283 device_split = device.split('/')
284 lv_device = device_split[-2] + '/' + device_split[-1]
285 if config.get('filestore') is not None:
286 osd_create_cmd += '--filestore --data ' + lv_device + ' '
287 # filestore with ceph-volume also needs journal disk
288 try:
289 jdevice = all_devs.pop(jdevs)
290 except IndexError:
291 raise RuntimeError("No device available for \
292 journal configuration")
293 jdevice_split = jdevice.split('/')
294 j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
295 osd_create_cmd += '--journal ' + j_lv
296 else:
297 osd_create_cmd += ' --data ' + lv_device
298 estatus_osd = execute_ceph_deploy(osd_create_cmd)
299 if estatus_osd == 0:
300 log.info('successfully created osd')
301 no_of_osds += 1
302 else:
303 raise RuntimeError("ceph-deploy: Failed to create osds")
304 return no_of_osds
305
306 try:
307 log.info('Building ceph cluster using ceph-deploy...')
308 testdir = teuthology.get_testdir(ctx)
309 ceph_branch = None
310 if config.get('branch') is not None:
311 cbranch = config.get('branch')
312 for var, val in cbranch.items():
313 ceph_branch = '--{var}={val}'.format(var=var, val=val)
314 all_nodes = get_all_nodes(ctx, config)
315 mds_nodes = get_nodes_using_role(ctx, 'mds')
316 mds_nodes = " ".join(mds_nodes)
317 mon_node = get_nodes_using_role(ctx, 'mon')
318 mon_nodes = " ".join(mon_node)
319 # skip mgr based on config item
320 # this is needed when test uses latest code to install old ceph
321 # versions
322 skip_mgr = config.get('skip-mgr', False)
323 if not skip_mgr:
324 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
325 mgr_nodes = " ".join(mgr_nodes)
326 new_mon = './ceph-deploy new' + " " + mon_nodes
327 if not skip_mgr:
328 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
329 mon_hostname = mon_nodes.split(' ')[0]
330 mon_hostname = str(mon_hostname)
331 gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
332 deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
333
334 if mon_nodes is None:
335 raise RuntimeError("no monitor nodes in the config file")
336
337 estatus_new = execute_ceph_deploy(new_mon)
338 if estatus_new != 0:
339 raise RuntimeError("ceph-deploy: new command failed")
340
341 log.info('adding config inputs...')
342 testdir = teuthology.get_testdir(ctx)
343 conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
344
345 if config.get('conf') is not None:
346 confp = config.get('conf')
347 for section, keys in confp.items():
348 lines = '[{section}]\n'.format(section=section)
349 ceph_admin.sudo_write_file(conf_path, lines, append=True)
350 for key, value in keys.items():
351 log.info("[%s] %s = %s" % (section, key, value))
352 lines = '{key} = {value}\n'.format(key=key, value=value)
353 ceph_admin.sudo_write_file(conf_path, lines, append=True)
354
355 # install ceph
356 dev_branch = ctx.config['branch']
357 branch = '--dev={branch}'.format(branch=dev_branch)
358 if ceph_branch:
359 option = ceph_branch
360 else:
361 option = branch
362 install_nodes = './ceph-deploy install ' + option + " " + all_nodes
363 estatus_install = execute_ceph_deploy(install_nodes)
364 if estatus_install != 0:
365 raise RuntimeError("ceph-deploy: Failed to install ceph")
366 # install ceph-test package too
367 install_nodes2 = './ceph-deploy install --tests ' + option + \
368 " " + all_nodes
369 estatus_install = execute_ceph_deploy(install_nodes2)
370 if estatus_install != 0:
371 raise RuntimeError("ceph-deploy: Failed to install ceph-test")
372
373 mon_create_nodes = './ceph-deploy mon create-initial'
374 # If the following fails, it is OK, it might just be that the monitors
375 # are taking way more than a minute/monitor to form quorum, so lets
376 # try the next block which will wait up to 15 minutes to gatherkeys.
377 execute_ceph_deploy(mon_create_nodes)
378
379 estatus_gather = execute_ceph_deploy(gather_keys)
380 if estatus_gather != 0:
381 raise RuntimeError("ceph-deploy: Failed during gather keys")
382
383 # install admin key on mons (ceph-create-keys doesn't do this any more)
384 mons = ctx.cluster.only(teuthology.is_type('mon'))
385 for remote in mons.remotes.keys():
386 execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)
387
388 # create osd's
389 if config.get('use-ceph-volume', False):
390 no_of_osds = ceph_volume_osd_create(ctx, config)
391 else:
392 # this method will only work with ceph-deploy v1.5.39 or older
393 no_of_osds = ceph_disk_osd_create(ctx, config)
394
395 if not skip_mgr:
396 execute_ceph_deploy(mgr_create)
397
398 if mds_nodes:
399 estatus_mds = execute_ceph_deploy(deploy_mds)
400 if estatus_mds != 0:
401 raise RuntimeError("ceph-deploy: Failed to deploy mds")
402
403 if config.get('test_mon_destroy') is not None:
404 for d in range(1, len(mon_node)):
405 mon_destroy_nodes = './ceph-deploy mon destroy' + \
406 " " + mon_node[d]
407 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
408 if estatus_mon_d != 0:
409 raise RuntimeError("ceph-deploy: Failed to delete monitor")
410
411
412
413 if config.get('wait-for-healthy', True) and no_of_osds >= 2:
414 is_healthy(ctx=ctx, config=None)
415
416 log.info('Setting up client nodes...')
417 conf_path = '/etc/ceph/ceph.conf'
418 admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
419 first_mon = teuthology.get_first_mon(ctx, config)
420 (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
421 conf_data = mon0_remote.read_file(conf_path, sudo=True)
422 admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True)
423
424 clients = ctx.cluster.only(teuthology.is_type('client'))
425 for remote, roles_for_host in clients.remotes.items():
426 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
427 client_keyring = \
428 '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
429 mon0_remote.run(
430 args=[
431 'cd',
432 '{tdir}'.format(tdir=testdir),
433 run.Raw('&&'),
434 'sudo', 'bash', '-c',
435 run.Raw('"'), 'ceph',
436 'auth',
437 'get-or-create',
438 'client.{id}'.format(id=id_),
439 'mds', 'allow',
440 'mon', 'allow *',
441 'osd', 'allow *',
442 run.Raw('>'),
443 client_keyring,
444 run.Raw('"'),
445 ],
446 )
447 key_data = mon0_remote.read_file(
448 path=client_keyring,
449 sudo=True,
450 )
451 remote.sudo_write_file(
452 path=client_keyring,
453 data=key_data,
454 mode='0644'
455 )
456 remote.sudo_write_file(
457 path=admin_keyring_path,
458 data=admin_keyring,
459 mode='0644'
460 )
461 remote.sudo_write_file(
462 path=conf_path,
463 data=conf_data,
464 mode='0644'
465 )
466
467 if mds_nodes:
468 log.info('Configuring CephFS...')
469 Filesystem(ctx, create=True)
470 elif not config.get('only_mon'):
471 raise RuntimeError(
472 "The cluster is NOT operational due to insufficient OSDs")
473 # create rbd pool
474 ceph_admin.run(
475 args=[
476 'sudo', 'ceph', '--cluster', 'ceph',
477 'osd', 'pool', 'create', 'rbd', '128', '128'],
478 check_status=False)
479 ceph_admin.run(
480 args=[
481 'sudo', 'ceph', '--cluster', 'ceph',
482 'osd', 'pool', 'application', 'enable',
483 'rbd', 'rbd', '--yes-i-really-mean-it'
484 ],
485 check_status=False)
486 yield
487
488 except Exception:
489 log.info(
490 "Error encountered, logging exception before tearing down ceph-deploy")
491 log.info(traceback.format_exc())
492 raise
493 finally:
494 if config.get('keep_running'):
495 return
496 log.info('Stopping ceph...')
497 ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
498 check_status=False)
499 time.sleep(4)
500
501 # and now just check for the processes themselves, as if upstart/sysvinit
502 # is lying to us. Ignore errors if the grep fails
503 ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
504 'grep', '-v', 'grep', run.Raw('|'),
505 'grep', 'ceph'], check_status=False)
506 ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
507 'grep', 'ceph'], check_status=False)
508
509 if ctx.archive is not None:
510 # archive mon data, too
511 log.info('Archiving mon data...')
512 path = os.path.join(ctx.archive, 'data')
513 os.makedirs(path)
514 mons = ctx.cluster.only(teuthology.is_type('mon'))
515 for remote, roles in mons.remotes.items():
516 for role in roles:
517 if role.startswith('mon.'):
518 teuthology.pull_directory_tarball(
519 remote,
520 '/var/lib/ceph/mon',
521 path + '/' + role + '.tgz')
522
523 log.info('Compressing logs...')
524 run.wait(
525 ctx.cluster.run(
526 args=[
527 'sudo',
528 'find',
529 '/var/log/ceph',
530 '-name',
531 '*.log',
532 '-print0',
533 run.Raw('|'),
534 'sudo',
535 'xargs',
536 '-0',
537 '--no-run-if-empty',
538 '--',
539 'gzip',
540 '--',
541 ],
542 wait=False,
543 ),
544 )
545
546 log.info('Archiving logs...')
547 path = os.path.join(ctx.archive, 'remote')
548 os.makedirs(path)
549 for remote in ctx.cluster.remotes.keys():
550 sub = os.path.join(path, remote.shortname)
551 os.makedirs(sub)
552 teuthology.pull_directory(remote, '/var/log/ceph',
553 os.path.join(sub, 'log'))
554
555 # Prevent these from being undefined if the try block fails
556 all_nodes = get_all_nodes(ctx, config)
557 purge_nodes = './ceph-deploy purge' + " " + all_nodes
558 purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
559
560 log.info('Purging package...')
561 execute_ceph_deploy(purge_nodes)
562 log.info('Purging data...')
563 execute_ceph_deploy(purgedata_nodes)
564
565
566 @contextlib.contextmanager
567 def cli_test(ctx, config):
568 """
569 ceph-deploy cli to exercise most commonly use cli's and ensure
570 all commands works and also startup the init system.
571
572 """
573 log.info('Ceph-deploy Test')
574 if config is None:
575 config = {}
576 test_branch = ''
577 conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
578
579 def execute_cdeploy(admin, cmd, path):
580 """Execute ceph-deploy commands """
581 """Either use git path or repo path """
582 args = ['cd', conf_dir, run.Raw(';')]
583 if path:
584 args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
585 else:
586 args.append('ceph-deploy')
587 args.append(run.Raw(cmd))
588 ec = admin.run(args=args, check_status=False).exitstatus
589 if ec != 0:
590 raise RuntimeError(
591 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
592
593 if config.get('rhbuild'):
594 path = None
595 else:
596 path = teuthology.get_testdir(ctx)
597 # test on branch from config eg: wip-* , master or next etc
598 # packages for all distro's should exist for wip*
599 if ctx.config.get('branch'):
600 branch = ctx.config.get('branch')
601 test_branch = ' --dev={branch} '.format(branch=branch)
602 mons = ctx.cluster.only(teuthology.is_type('mon'))
603 for node, role in mons.remotes.items():
604 admin = node
605 admin.run(args=['mkdir', conf_dir], check_status=False)
606 nodename = admin.shortname
607 system_type = teuthology.get_system_type(admin)
608 if config.get('rhbuild'):
609 admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
610 log.info('system type is %s', system_type)
611 osds = ctx.cluster.only(teuthology.is_type('osd'))
612
613 for remote, roles in osds.remotes.items():
614 devs = teuthology.get_scratch_devices(remote)
615 log.info("roles %s", roles)
616 if (len(devs) < 3):
617 log.error(
618 'Test needs minimum of 3 devices, only found %s',
619 str(devs))
620 raise RuntimeError("Needs minimum of 3 devices ")
621
622 conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
623 new_cmd = 'new ' + nodename
624 execute_cdeploy(admin, new_cmd, path)
625 if config.get('conf') is not None:
626 confp = config.get('conf')
627 for section, keys in confp.items():
628 lines = '[{section}]\n'.format(section=section)
629 admin.sudo_write_file(conf_path, lines, append=True)
630 for key, value in keys.items():
631 log.info("[%s] %s = %s" % (section, key, value))
632 lines = '{key} = {value}\n'.format(key=key, value=value)
633 admin.sudo_write_file(conf_path, lines, append=True)
634 new_mon_install = 'install {branch} --mon '.format(
635 branch=test_branch) + nodename
636 new_mgr_install = 'install {branch} --mgr '.format(
637 branch=test_branch) + nodename
638 new_osd_install = 'install {branch} --osd '.format(
639 branch=test_branch) + nodename
640 new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
641 create_initial = 'mon create-initial '
642 mgr_create = 'mgr create ' + nodename
643 # either use create-keys or push command
644 push_keys = 'admin ' + nodename
645 execute_cdeploy(admin, new_mon_install, path)
646 execute_cdeploy(admin, new_mgr_install, path)
647 execute_cdeploy(admin, new_osd_install, path)
648 execute_cdeploy(admin, new_admin, path)
649 execute_cdeploy(admin, create_initial, path)
650 execute_cdeploy(admin, mgr_create, path)
651 execute_cdeploy(admin, push_keys, path)
652
653 for i in range(3):
654 zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
655 prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
656 execute_cdeploy(admin, zap_disk, path)
657 execute_cdeploy(admin, prepare, path)
658
659 log.info("list files for debugging purpose to check file permissions")
660 admin.run(args=['ls', run.Raw('-lt'), conf_dir])
661 remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
662 out = remote.sh('sudo ceph health')
663 log.info('Ceph health: %s', out.rstrip('\n'))
664 log.info("Waiting for cluster to become healthy")
665 with contextutil.safe_while(sleep=10, tries=6,
666 action='check health') as proceed:
667 while proceed():
668 out = remote.sh('sudo ceph health')
669 if (out.split(None, 1)[0] == 'HEALTH_OK'):
670 break
671 rgw_install = 'install {branch} --rgw {node}'.format(
672 branch=test_branch,
673 node=nodename,
674 )
675 rgw_create = 'rgw create ' + nodename
676 execute_cdeploy(admin, rgw_install, path)
677 execute_cdeploy(admin, rgw_create, path)
678 log.info('All ceph-deploy cli tests passed')
679 try:
680 yield
681 finally:
682 log.info("cleaning up")
683 ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
684 check_status=False)
685 time.sleep(4)
686 for i in range(3):
687 umount_dev = "{d}1".format(d=devs[i])
688 remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
689 cmd = 'purge ' + nodename
690 execute_cdeploy(admin, cmd, path)
691 cmd = 'purgedata ' + nodename
692 execute_cdeploy(admin, cmd, path)
693 log.info("Removing temporary dir")
694 admin.run(
695 args=[
696 'rm',
697 run.Raw('-rf'),
698 run.Raw(conf_dir)],
699 check_status=False)
700 if config.get('rhbuild'):
701 admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
702
703
704 @contextlib.contextmanager
705 def single_node_test(ctx, config):
706 """
707 - ceph-deploy.single_node_test: null
708
709 #rhbuild testing
710 - ceph-deploy.single_node_test:
711 rhbuild: 1.2.3
712
713 """
714 log.info("Testing ceph-deploy on single node")
715 if config is None:
716 config = {}
717 overrides = ctx.config.get('overrides', {})
718 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
719
720 if config.get('rhbuild'):
721 log.info("RH Build, Skip Download")
722 with contextutil.nested(
723 lambda: cli_test(ctx=ctx, config=config),
724 ):
725 yield
726 else:
727 with contextutil.nested(
728 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
729 lambda: download_ceph_deploy(ctx=ctx, config=config),
730 lambda: cli_test(ctx=ctx, config=config),
731 ):
732 yield
733
734
735 @contextlib.contextmanager
736 def upgrade(ctx, config):
737 """
738 Upgrade using ceph-deploy
739 eg:
740 ceph-deploy.upgrade:
741 # to upgrade to specific branch, use
742 branch:
743 stable: jewel
744 # to setup mgr node, use
745 setup-mgr-node: True
746 # to wait for cluster to be healthy after all upgrade, use
747 wait-for-healthy: True
748 role: (upgrades the below roles serially)
749 mon.a
750 mon.b
751 osd.0
752 """
753 roles = config.get('roles')
754 # get the roles that are mapped as per ceph-deploy
755 # roles are mapped for mon/mds eg: mon.a => mon.host_short_name
756 mapped_role = ctx.cluster.mapped_role
757 log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
758 if config.get('branch'):
759 branch = config.get('branch')
760 (var, val) = branch.items()[0]
761 ceph_branch = '--{var}={val}'.format(var=var, val=val)
762 else:
763 # default to wip-branch under test
764 dev_branch = ctx.config['branch']
765 ceph_branch = '--dev={branch}'.format(branch=dev_branch)
766 # get the node used for initial deployment which is mon.a
767 mon_a = mapped_role.get('mon.a')
768 (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys()
769 testdir = teuthology.get_testdir(ctx)
770 cmd = './ceph-deploy install ' + ceph_branch
771 for role in roles:
772 # check if this role is mapped (mon or mds)
773 if mapped_role.get(role):
774 role = mapped_role.get(role)
775 remotes_and_roles = ctx.cluster.only(role).remotes
776 for remote, roles in remotes_and_roles.items():
777 nodename = remote.shortname
778 cmd = cmd + ' ' + nodename
779 log.info("Upgrading ceph on %s", nodename)
780 ceph_admin.run(
781 args=[
782 'cd',
783 '{tdir}/ceph-deploy'.format(tdir=testdir),
784 run.Raw('&&'),
785 run.Raw(cmd),
786 ],
787 )
788 # restart all ceph services, ideally upgrade should but it does not
789 remote.run(
790 args=[
791 'sudo', 'systemctl', 'restart', 'ceph.target'
792 ]
793 )
794 ceph_admin.run(args=['sudo', 'ceph', '-s'])
795
796 # workaround for http://tracker.ceph.com/issues/20950
797 # write the correct mgr key to disk
798 if config.get('setup-mgr-node', None):
799 mons = ctx.cluster.only(teuthology.is_type('mon'))
800 for remote, roles in mons.remotes.items():
801 remote.run(
802 args=[
803 run.Raw('sudo ceph auth get client.bootstrap-mgr'),
804 run.Raw('|'),
805 run.Raw('sudo tee'),
806 run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
807 ]
808 )
809
810 if config.get('setup-mgr-node', None):
811 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
812 mgr_nodes = " ".join(mgr_nodes)
813 mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
814 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
815 # install mgr
816 ceph_admin.run(
817 args=[
818 'cd',
819 '{tdir}/ceph-deploy'.format(tdir=testdir),
820 run.Raw('&&'),
821 run.Raw(mgr_install),
822 ],
823 )
824 # create mgr
825 ceph_admin.run(
826 args=[
827 'cd',
828 '{tdir}/ceph-deploy'.format(tdir=testdir),
829 run.Raw('&&'),
830 run.Raw(mgr_create),
831 ],
832 )
833 ceph_admin.run(args=['sudo', 'ceph', '-s'])
834 if config.get('wait-for-healthy', None):
835 wait_until_healthy(ctx, ceph_admin, use_sudo=True)
836 yield
837
838
839 @contextlib.contextmanager
840 def task(ctx, config):
841 """
842 Set up and tear down a Ceph cluster.
843
844 For example::
845
846 tasks:
847 - install:
848 extras: yes
849 - ssh_keys:
850 - ceph-deploy:
851 branch:
852 stable: bobtail
853 mon_initial_members: 1
854 ceph-deploy-branch: my-ceph-deploy-branch
855 only_mon: true
856 keep_running: true
857 # either choose bluestore or filestore, default is bluestore
858 bluestore: True
859 # or
860 filestore: True
861 # skip install of mgr for old release using below flag
862 skip-mgr: True ( default is False )
863 # to use ceph-volume instead of ceph-disk
864 # ceph-disk can only be used with old ceph-deploy release from pypi
865 use-ceph-volume: true
866
867 tasks:
868 - install:
869 extras: yes
870 - ssh_keys:
871 - ceph-deploy:
872 branch:
873 dev: master
874 conf:
875 mon:
876 debug mon = 20
877
878 tasks:
879 - install:
880 extras: yes
881 - ssh_keys:
882 - ceph-deploy:
883 branch:
884 testing:
885 dmcrypt: yes
886 separate_journal_disk: yes
887
888 """
889 if config is None:
890 config = {}
891
892 assert isinstance(config, dict), \
893 "task ceph-deploy only supports a dictionary for configuration"
894
895 overrides = ctx.config.get('overrides', {})
896 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
897
898 if config.get('branch') is not None:
899 assert isinstance(
900 config['branch'], dict), 'branch must be a dictionary'
901
902 log.info('task ceph-deploy with config ' + str(config))
903
904 # we need to use 1.5.39-stable for testing jewel or master branch with
905 # ceph-disk
906 if config.get('use-ceph-volume', False) is False:
907 # check we are not testing specific branch
908 if config.get('ceph-deploy-branch', False) is False:
909 config['ceph-deploy-branch'] = '1.5.39-stable'
910
911 with contextutil.nested(
912 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
913 lambda: download_ceph_deploy(ctx=ctx, config=config),
914 lambda: build_ceph_cluster(ctx=ctx, config=config),
915 ):
916 yield