]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/ceph_deploy.py
import 14.2.4 nautilus point release
[ceph.git] / ceph / qa / tasks / ceph_deploy.py
CommitLineData
7c673cae
FG
1"""
2Execute ceph-deploy as a task
3"""
4from cStringIO import StringIO
5
6import contextlib
7import os
8import time
9import logging
10import traceback
11
12from teuthology import misc as teuthology
13from teuthology import contextutil
14from teuthology.config import config as teuth_config
15from teuthology.task import install as install_fn
16from teuthology.orchestra import run
17from tasks.cephfs.filesystem import Filesystem
3efd9988 18from teuthology.misc import wait_until_healthy
7c673cae
FG
19
20log = logging.getLogger(__name__)
21
22
23@contextlib.contextmanager
24def download_ceph_deploy(ctx, config):
25 """
26 Downloads ceph-deploy from the ceph.com git mirror and (by default)
27 switches to the master branch. If the `ceph-deploy-branch` is specified, it
28 will use that instead. The `bootstrap` script is ran, with the argument
29 obtained from `python_version`, if specified.
30 """
3efd9988
FG
31 # use mon.a for ceph_admin
32 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
7c673cae
FG
33
34 try:
35 py_ver = str(config['python_version'])
36 except KeyError:
37 pass
38 else:
39 supported_versions = ['2', '3']
40 if py_ver not in supported_versions:
41 raise ValueError("python_version must be: {}, not {}".format(
42 ' or '.join(supported_versions), py_ver
43 ))
44
45 log.info("Installing Python")
3efd9988 46 system_type = teuthology.get_system_type(ceph_admin)
7c673cae
FG
47
48 if system_type == 'rpm':
11fdf7f2 49 package = 'python36' if py_ver == '3' else 'python'
7c673cae
FG
50 ctx.cluster.run(args=[
51 'sudo', 'yum', '-y', 'install',
52 package, 'python-virtualenv'
53 ])
54 else:
55 package = 'python3' if py_ver == '3' else 'python'
56 ctx.cluster.run(args=[
57 'sudo', 'apt-get', '-y', '--force-yes', 'install',
58 package, 'python-virtualenv'
59 ])
60
61 log.info('Downloading ceph-deploy...')
62 testdir = teuthology.get_testdir(ctx)
63 ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
64
65 ceph_admin.run(
66 args=[
67 'git', 'clone', '-b', ceph_deploy_branch,
68 teuth_config.ceph_git_base_url + 'ceph-deploy.git',
69 '{tdir}/ceph-deploy'.format(tdir=testdir),
70 ],
71 )
72 args = [
73 'cd',
74 '{tdir}/ceph-deploy'.format(tdir=testdir),
75 run.Raw('&&'),
76 './bootstrap',
77 ]
78 try:
79 args.append(str(config['python_version']))
80 except KeyError:
81 pass
82 ceph_admin.run(args=args)
83
84 try:
85 yield
86 finally:
87 log.info('Removing ceph-deploy ...')
88 ceph_admin.run(
89 args=[
90 'rm',
91 '-rf',
92 '{tdir}/ceph-deploy'.format(tdir=testdir),
93 ],
94 )
95
96
97def is_healthy(ctx, config):
98 """Wait until a Ceph cluster is healthy."""
99 testdir = teuthology.get_testdir(ctx)
100 ceph_admin = teuthology.get_first_mon(ctx, config)
101 (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
102 max_tries = 90 # 90 tries * 10 secs --> 15 minutes
103 tries = 0
104 while True:
105 tries += 1
106 if tries >= max_tries:
107 msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
108 remote.run(
109 args=[
110 'cd',
111 '{tdir}'.format(tdir=testdir),
112 run.Raw('&&'),
113 'sudo', 'ceph',
114 'report',
115 ],
116 )
117 raise RuntimeError(msg)
118
119 r = remote.run(
120 args=[
121 'cd',
122 '{tdir}'.format(tdir=testdir),
123 run.Raw('&&'),
124 'sudo', 'ceph',
125 'health',
126 ],
127 stdout=StringIO(),
128 logger=log.getChild('health'),
129 )
130 out = r.stdout.getvalue()
131 log.info('Ceph health: %s', out.rstrip('\n'))
132 if out.split(None, 1)[0] == 'HEALTH_OK':
133 break
134 time.sleep(10)
135
136
137def get_nodes_using_role(ctx, target_role):
138 """
139 Extract the names of nodes that match a given role from a cluster, and modify the
140 cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
141 uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
142 """
143
144 # Nodes containing a service of the specified role
145 nodes_of_interest = []
146
147 # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
148 modified_remotes = {}
3efd9988 149 ceph_deploy_mapped = dict()
7c673cae
FG
150 for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
151 modified_remotes[_remote] = []
152 for svc_id in roles_for_host:
153 if svc_id.startswith("{0}.".format(target_role)):
154 fqdn = str(_remote).split('@')[-1]
155 nodename = str(str(_remote).split('.')[0]).split('@')[1]
156 if target_role == 'mon':
157 nodes_of_interest.append(fqdn)
158 else:
159 nodes_of_interest.append(nodename)
3efd9988
FG
160 mapped_role = "{0}.{1}".format(target_role, nodename)
161 modified_remotes[_remote].append(mapped_role)
162 # keep dict of mapped role for later use by tasks
163 # eg. mon.a => mon.node1
164 ceph_deploy_mapped[svc_id] = mapped_role
7c673cae
FG
165 else:
166 modified_remotes[_remote].append(svc_id)
167
168 ctx.cluster.remotes = modified_remotes
11fdf7f2
TL
169 # since the function is called multiple times for target roles
170 # append new mapped roles
171 if not hasattr(ctx.cluster, 'mapped_role'):
172 ctx.cluster.mapped_role = ceph_deploy_mapped
173 else:
174 ctx.cluster.mapped_role.update(ceph_deploy_mapped)
175 log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role))
7c673cae
FG
176 return nodes_of_interest
177
178
179def get_dev_for_osd(ctx, config):
180 """Get a list of all osd device names."""
181 osd_devs = []
182 for remote, roles_for_host in ctx.cluster.remotes.iteritems():
183 host = remote.name.split('@')[-1]
184 shortname = host.split('.')[0]
185 devs = teuthology.get_scratch_devices(remote)
186 num_osd_per_host = list(
187 teuthology.roles_of_type(
188 roles_for_host, 'osd'))
189 num_osds = len(num_osd_per_host)
190 if config.get('separate_journal_disk') is not None:
191 num_devs_reqd = 2 * num_osds
192 assert num_devs_reqd <= len(
193 devs), 'fewer data and journal disks than required ' + shortname
194 for dindex in range(0, num_devs_reqd, 2):
195 jd_index = dindex + 1
196 dev_short = devs[dindex].split('/')[-1]
197 jdev_short = devs[jd_index].split('/')[-1]
198 osd_devs.append((shortname, dev_short, jdev_short))
199 else:
200 assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
201 for dev in devs[:num_osds]:
202 dev_short = dev.split('/')[-1]
203 osd_devs.append((shortname, dev_short))
204 return osd_devs
205
206
207def get_all_nodes(ctx, config):
208 """Return a string of node names separated by blanks"""
209 nodelist = []
210 for t, k in ctx.config['targets'].iteritems():
211 host = t.split('@')[-1]
212 simple_host = host.split('.')[0]
213 nodelist.append(simple_host)
214 nodelist = " ".join(nodelist)
215 return nodelist
216
7c673cae
FG
217@contextlib.contextmanager
218def build_ceph_cluster(ctx, config):
219 """Build a ceph cluster"""
220
221 # Expect to find ceph_admin on the first mon by ID, same place that the download task
222 # puts it. Remember this here, because subsequently IDs will change from those in
223 # the test config to those that ceph-deploy invents.
3efd9988
FG
224
225 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
7c673cae
FG
226
227 def execute_ceph_deploy(cmd):
228 """Remotely execute a ceph_deploy command"""
229 return ceph_admin.run(
230 args=[
231 'cd',
232 '{tdir}/ceph-deploy'.format(tdir=testdir),
233 run.Raw('&&'),
234 run.Raw(cmd),
235 ],
236 check_status=False,
237 ).exitstatus
238
b32b8144
FG
239 def ceph_disk_osd_create(ctx, config):
240 node_dev_list = get_dev_for_osd(ctx, config)
241 no_of_osds = 0
242 for d in node_dev_list:
243 node = d[0]
244 for disk in d[1:]:
245 zap = './ceph-deploy disk zap ' + node + ':' + disk
246 estatus = execute_ceph_deploy(zap)
247 if estatus != 0:
248 raise RuntimeError("ceph-deploy: Failed to zap osds")
249 osd_create_cmd = './ceph-deploy osd create '
250 # first check for filestore, default is bluestore with ceph-deploy
251 if config.get('filestore') is not None:
252 osd_create_cmd += '--filestore '
253 elif config.get('bluestore') is not None:
254 osd_create_cmd += '--bluestore '
255 if config.get('dmcrypt') is not None:
256 osd_create_cmd += '--dmcrypt '
257 osd_create_cmd += ":".join(d)
258 estatus_osd = execute_ceph_deploy(osd_create_cmd)
259 if estatus_osd == 0:
260 log.info('successfully created osd')
261 no_of_osds += 1
262 else:
263 raise RuntimeError("ceph-deploy: Failed to create osds")
264 return no_of_osds
265
266 def ceph_volume_osd_create(ctx, config):
267 osds = ctx.cluster.only(teuthology.is_type('osd'))
268 no_of_osds = 0
269 for remote in osds.remotes.iterkeys():
270 # all devs should be lvm
271 osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
272 # default is bluestore so we just need config item for filestore
273 roles = ctx.cluster.remotes[remote]
274 dev_needed = len([role for role in roles
275 if role.startswith('osd')])
276 all_devs = teuthology.get_scratch_devices(remote)
277 log.info("node={n}, need_devs={d}, available={a}".format(
278 n=remote.shortname,
279 d=dev_needed,
280 a=all_devs,
281 ))
282 devs = all_devs[0:dev_needed]
283 # rest of the devices can be used for journal if required
284 jdevs = dev_needed
285 for device in devs:
286 device_split = device.split('/')
287 lv_device = device_split[-2] + '/' + device_split[-1]
288 if config.get('filestore') is not None:
289 osd_create_cmd += '--filestore --data ' + lv_device + ' '
290 # filestore with ceph-volume also needs journal disk
291 try:
292 jdevice = all_devs.pop(jdevs)
293 except IndexError:
294 raise RuntimeError("No device available for \
295 journal configuration")
296 jdevice_split = jdevice.split('/')
297 j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
298 osd_create_cmd += '--journal ' + j_lv
299 else:
300 osd_create_cmd += ' --data ' + lv_device
301 estatus_osd = execute_ceph_deploy(osd_create_cmd)
302 if estatus_osd == 0:
303 log.info('successfully created osd')
304 no_of_osds += 1
305 else:
306 raise RuntimeError("ceph-deploy: Failed to create osds")
307 return no_of_osds
308
7c673cae
FG
309 try:
310 log.info('Building ceph cluster using ceph-deploy...')
311 testdir = teuthology.get_testdir(ctx)
312 ceph_branch = None
313 if config.get('branch') is not None:
314 cbranch = config.get('branch')
315 for var, val in cbranch.iteritems():
316 ceph_branch = '--{var}={val}'.format(var=var, val=val)
317 all_nodes = get_all_nodes(ctx, config)
318 mds_nodes = get_nodes_using_role(ctx, 'mds')
319 mds_nodes = " ".join(mds_nodes)
320 mon_node = get_nodes_using_role(ctx, 'mon')
321 mon_nodes = " ".join(mon_node)
3efd9988
FG
322 # skip mgr based on config item
323 # this is needed when test uses latest code to install old ceph
324 # versions
325 skip_mgr = config.get('skip-mgr', False)
326 if not skip_mgr:
327 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
328 mgr_nodes = " ".join(mgr_nodes)
7c673cae 329 new_mon = './ceph-deploy new' + " " + mon_nodes
3efd9988
FG
330 if not skip_mgr:
331 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
7c673cae
FG
332 mon_hostname = mon_nodes.split(' ')[0]
333 mon_hostname = str(mon_hostname)
334 gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
335 deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
7c673cae
FG
336
337 if mon_nodes is None:
338 raise RuntimeError("no monitor nodes in the config file")
339
340 estatus_new = execute_ceph_deploy(new_mon)
341 if estatus_new != 0:
342 raise RuntimeError("ceph-deploy: new command failed")
343
344 log.info('adding config inputs...')
345 testdir = teuthology.get_testdir(ctx)
346 conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
347
348 if config.get('conf') is not None:
349 confp = config.get('conf')
350 for section, keys in confp.iteritems():
351 lines = '[{section}]\n'.format(section=section)
352 teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
353 sudo=True)
354 for key, value in keys.iteritems():
355 log.info("[%s] %s = %s" % (section, key, value))
356 lines = '{key} = {value}\n'.format(key=key, value=value)
357 teuthology.append_lines_to_file(
358 ceph_admin, conf_path, lines, sudo=True)
359
360 # install ceph
361 dev_branch = ctx.config['branch']
362 branch = '--dev={branch}'.format(branch=dev_branch)
363 if ceph_branch:
364 option = ceph_branch
365 else:
366 option = branch
367 install_nodes = './ceph-deploy install ' + option + " " + all_nodes
368 estatus_install = execute_ceph_deploy(install_nodes)
369 if estatus_install != 0:
370 raise RuntimeError("ceph-deploy: Failed to install ceph")
371 # install ceph-test package too
372 install_nodes2 = './ceph-deploy install --tests ' + option + \
373 " " + all_nodes
374 estatus_install = execute_ceph_deploy(install_nodes2)
375 if estatus_install != 0:
376 raise RuntimeError("ceph-deploy: Failed to install ceph-test")
377
378 mon_create_nodes = './ceph-deploy mon create-initial'
379 # If the following fails, it is OK, it might just be that the monitors
380 # are taking way more than a minute/monitor to form quorum, so lets
381 # try the next block which will wait up to 15 minutes to gatherkeys.
382 execute_ceph_deploy(mon_create_nodes)
383
7c673cae 384 estatus_gather = execute_ceph_deploy(gather_keys)
b32b8144
FG
385 if estatus_gather != 0:
386 raise RuntimeError("ceph-deploy: Failed during gather keys")
494da23a
TL
387
388 # install admin key on mons (ceph-create-keys doesn't do this any more)
389 mons = ctx.cluster.only(teuthology.is_type('mon'))
390 for remote in mons.remotes.iterkeys():
391 execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)
392
b32b8144
FG
393 # create osd's
394 if config.get('use-ceph-volume', False):
395 no_of_osds = ceph_volume_osd_create(ctx, config)
396 else:
397 # this method will only work with ceph-deploy v1.5.39 or older
398 no_of_osds = ceph_disk_osd_create(ctx, config)
b5b8bbf5 399
3efd9988
FG
400 if not skip_mgr:
401 execute_ceph_deploy(mgr_create)
b5b8bbf5 402
7c673cae
FG
403 if mds_nodes:
404 estatus_mds = execute_ceph_deploy(deploy_mds)
405 if estatus_mds != 0:
406 raise RuntimeError("ceph-deploy: Failed to deploy mds")
407
408 if config.get('test_mon_destroy') is not None:
409 for d in range(1, len(mon_node)):
410 mon_destroy_nodes = './ceph-deploy mon destroy' + \
411 " " + mon_node[d]
412 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
413 if estatus_mon_d != 0:
414 raise RuntimeError("ceph-deploy: Failed to delete monitor")
415
b32b8144 416
7c673cae
FG
417
418 if config.get('wait-for-healthy', True) and no_of_osds >= 2:
419 is_healthy(ctx=ctx, config=None)
420
421 log.info('Setting up client nodes...')
422 conf_path = '/etc/ceph/ceph.conf'
423 admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
424 first_mon = teuthology.get_first_mon(ctx, config)
425 (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
426 conf_data = teuthology.get_file(
427 remote=mon0_remote,
428 path=conf_path,
429 sudo=True,
430 )
431 admin_keyring = teuthology.get_file(
432 remote=mon0_remote,
433 path=admin_keyring_path,
434 sudo=True,
435 )
436
437 clients = ctx.cluster.only(teuthology.is_type('client'))
438 for remot, roles_for_host in clients.remotes.iteritems():
439 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
440 client_keyring = \
441 '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
442 mon0_remote.run(
443 args=[
444 'cd',
445 '{tdir}'.format(tdir=testdir),
446 run.Raw('&&'),
447 'sudo', 'bash', '-c',
448 run.Raw('"'), 'ceph',
449 'auth',
450 'get-or-create',
451 'client.{id}'.format(id=id_),
452 'mds', 'allow',
453 'mon', 'allow *',
454 'osd', 'allow *',
455 run.Raw('>'),
456 client_keyring,
457 run.Raw('"'),
458 ],
459 )
460 key_data = teuthology.get_file(
461 remote=mon0_remote,
462 path=client_keyring,
463 sudo=True,
464 )
465 teuthology.sudo_write_file(
466 remote=remot,
467 path=client_keyring,
468 data=key_data,
469 perms='0644'
470 )
471 teuthology.sudo_write_file(
472 remote=remot,
473 path=admin_keyring_path,
474 data=admin_keyring,
475 perms='0644'
476 )
477 teuthology.sudo_write_file(
478 remote=remot,
479 path=conf_path,
480 data=conf_data,
481 perms='0644'
482 )
483
484 if mds_nodes:
485 log.info('Configuring CephFS...')
3efd9988 486 Filesystem(ctx, create=True)
7c673cae
FG
487 elif not config.get('only_mon'):
488 raise RuntimeError(
489 "The cluster is NOT operational due to insufficient OSDs")
28e407b8
AA
490 # create rbd pool
491 ceph_admin.run(
492 args=[
493 'sudo', 'ceph', '--cluster', 'ceph',
494 'osd', 'pool', 'create', 'rbd', '128', '128'],
495 check_status=False)
496 ceph_admin.run(
497 args=[
498 'sudo', 'ceph', '--cluster', 'ceph',
499 'osd', 'pool', 'application', 'enable',
500 'rbd', 'rbd', '--yes-i-really-mean-it'
501 ],
502 check_status=False)
7c673cae
FG
503 yield
504
505 except Exception:
506 log.info(
507 "Error encountered, logging exception before tearing down ceph-deploy")
508 log.info(traceback.format_exc())
509 raise
510 finally:
511 if config.get('keep_running'):
512 return
513 log.info('Stopping ceph...')
494da23a
TL
514 ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
515 check_status=False)
516 time.sleep(4)
7c673cae
FG
517
518 # and now just check for the processes themselves, as if upstart/sysvinit
519 # is lying to us. Ignore errors if the grep fails
520 ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
521 'grep', '-v', 'grep', run.Raw('|'),
522 'grep', 'ceph'], check_status=False)
494da23a
TL
523 ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
524 'grep', 'ceph'], check_status=False)
7c673cae
FG
525
526 if ctx.archive is not None:
527 # archive mon data, too
528 log.info('Archiving mon data...')
529 path = os.path.join(ctx.archive, 'data')
530 os.makedirs(path)
531 mons = ctx.cluster.only(teuthology.is_type('mon'))
532 for remote, roles in mons.remotes.iteritems():
533 for role in roles:
534 if role.startswith('mon.'):
535 teuthology.pull_directory_tarball(
536 remote,
537 '/var/lib/ceph/mon',
538 path + '/' + role + '.tgz')
539
540 log.info('Compressing logs...')
541 run.wait(
542 ctx.cluster.run(
543 args=[
544 'sudo',
545 'find',
546 '/var/log/ceph',
547 '-name',
548 '*.log',
549 '-print0',
550 run.Raw('|'),
551 'sudo',
552 'xargs',
553 '-0',
554 '--no-run-if-empty',
555 '--',
556 'gzip',
557 '--',
558 ],
559 wait=False,
560 ),
561 )
562
563 log.info('Archiving logs...')
564 path = os.path.join(ctx.archive, 'remote')
565 os.makedirs(path)
566 for remote in ctx.cluster.remotes.iterkeys():
567 sub = os.path.join(path, remote.shortname)
568 os.makedirs(sub)
569 teuthology.pull_directory(remote, '/var/log/ceph',
570 os.path.join(sub, 'log'))
571
572 # Prevent these from being undefined if the try block fails
573 all_nodes = get_all_nodes(ctx, config)
574 purge_nodes = './ceph-deploy purge' + " " + all_nodes
575 purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
576
577 log.info('Purging package...')
578 execute_ceph_deploy(purge_nodes)
579 log.info('Purging data...')
580 execute_ceph_deploy(purgedata_nodes)
581
582
583@contextlib.contextmanager
584def cli_test(ctx, config):
585 """
586 ceph-deploy cli to exercise most commonly use cli's and ensure
587 all commands works and also startup the init system.
588
589 """
590 log.info('Ceph-deploy Test')
591 if config is None:
592 config = {}
593 test_branch = ''
594 conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
595
596 def execute_cdeploy(admin, cmd, path):
597 """Execute ceph-deploy commands """
598 """Either use git path or repo path """
599 args = ['cd', conf_dir, run.Raw(';')]
600 if path:
3efd9988 601 args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
7c673cae
FG
602 else:
603 args.append('ceph-deploy')
604 args.append(run.Raw(cmd))
605 ec = admin.run(args=args, check_status=False).exitstatus
606 if ec != 0:
607 raise RuntimeError(
608 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
609
610 if config.get('rhbuild'):
611 path = None
612 else:
613 path = teuthology.get_testdir(ctx)
614 # test on branch from config eg: wip-* , master or next etc
615 # packages for all distro's should exist for wip*
616 if ctx.config.get('branch'):
617 branch = ctx.config.get('branch')
618 test_branch = ' --dev={branch} '.format(branch=branch)
619 mons = ctx.cluster.only(teuthology.is_type('mon'))
620 for node, role in mons.remotes.iteritems():
621 admin = node
622 admin.run(args=['mkdir', conf_dir], check_status=False)
623 nodename = admin.shortname
624 system_type = teuthology.get_system_type(admin)
625 if config.get('rhbuild'):
626 admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
627 log.info('system type is %s', system_type)
628 osds = ctx.cluster.only(teuthology.is_type('osd'))
629
630 for remote, roles in osds.remotes.iteritems():
631 devs = teuthology.get_scratch_devices(remote)
632 log.info("roles %s", roles)
633 if (len(devs) < 3):
634 log.error(
635 'Test needs minimum of 3 devices, only found %s',
636 str(devs))
637 raise RuntimeError("Needs minimum of 3 devices ")
638
639 conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
640 new_cmd = 'new ' + nodename
641 execute_cdeploy(admin, new_cmd, path)
642 if config.get('conf') is not None:
643 confp = config.get('conf')
644 for section, keys in confp.iteritems():
645 lines = '[{section}]\n'.format(section=section)
646 teuthology.append_lines_to_file(admin, conf_path, lines,
647 sudo=True)
648 for key, value in keys.iteritems():
649 log.info("[%s] %s = %s" % (section, key, value))
650 lines = '{key} = {value}\n'.format(key=key, value=value)
651 teuthology.append_lines_to_file(admin, conf_path, lines,
652 sudo=True)
653 new_mon_install = 'install {branch} --mon '.format(
654 branch=test_branch) + nodename
655 new_mgr_install = 'install {branch} --mgr '.format(
656 branch=test_branch) + nodename
657 new_osd_install = 'install {branch} --osd '.format(
658 branch=test_branch) + nodename
659 new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
660 create_initial = 'mon create-initial '
11fdf7f2 661 mgr_create = 'mgr create ' + nodename
7c673cae
FG
662 # either use create-keys or push command
663 push_keys = 'admin ' + nodename
664 execute_cdeploy(admin, new_mon_install, path)
665 execute_cdeploy(admin, new_mgr_install, path)
666 execute_cdeploy(admin, new_osd_install, path)
667 execute_cdeploy(admin, new_admin, path)
668 execute_cdeploy(admin, create_initial, path)
11fdf7f2 669 execute_cdeploy(admin, mgr_create, path)
7c673cae
FG
670 execute_cdeploy(admin, push_keys, path)
671
672 for i in range(3):
673 zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
674 prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
675 execute_cdeploy(admin, zap_disk, path)
676 execute_cdeploy(admin, prepare, path)
677
678 log.info("list files for debugging purpose to check file permissions")
679 admin.run(args=['ls', run.Raw('-lt'), conf_dir])
680 remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
681 r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
682 out = r.stdout.getvalue()
683 log.info('Ceph health: %s', out.rstrip('\n'))
684 log.info("Waiting for cluster to become healthy")
685 with contextutil.safe_while(sleep=10, tries=6,
686 action='check health') as proceed:
3efd9988
FG
687 while proceed():
688 r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
689 out = r.stdout.getvalue()
690 if (out.split(None, 1)[0] == 'HEALTH_OK'):
691 break
7c673cae
FG
692 rgw_install = 'install {branch} --rgw {node}'.format(
693 branch=test_branch,
694 node=nodename,
695 )
696 rgw_create = 'rgw create ' + nodename
697 execute_cdeploy(admin, rgw_install, path)
698 execute_cdeploy(admin, rgw_create, path)
699 log.info('All ceph-deploy cli tests passed')
700 try:
701 yield
702 finally:
703 log.info("cleaning up")
494da23a 704 ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
7c673cae
FG
705 check_status=False)
706 time.sleep(4)
707 for i in range(3):
708 umount_dev = "{d}1".format(d=devs[i])
709 r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
710 cmd = 'purge ' + nodename
711 execute_cdeploy(admin, cmd, path)
712 cmd = 'purgedata ' + nodename
713 execute_cdeploy(admin, cmd, path)
714 log.info("Removing temporary dir")
715 admin.run(
716 args=[
717 'rm',
718 run.Raw('-rf'),
719 run.Raw(conf_dir)],
720 check_status=False)
721 if config.get('rhbuild'):
722 admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
723
724
725@contextlib.contextmanager
726def single_node_test(ctx, config):
727 """
728 - ceph-deploy.single_node_test: null
729
730 #rhbuild testing
731 - ceph-deploy.single_node_test:
732 rhbuild: 1.2.3
733
734 """
735 log.info("Testing ceph-deploy on single node")
736 if config is None:
737 config = {}
738 overrides = ctx.config.get('overrides', {})
739 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
740
741 if config.get('rhbuild'):
742 log.info("RH Build, Skip Download")
743 with contextutil.nested(
744 lambda: cli_test(ctx=ctx, config=config),
745 ):
746 yield
747 else:
748 with contextutil.nested(
749 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
750 lambda: download_ceph_deploy(ctx=ctx, config=config),
751 lambda: cli_test(ctx=ctx, config=config),
752 ):
753 yield
754
755
3efd9988
FG
756@contextlib.contextmanager
757def upgrade(ctx, config):
758 """
759 Upgrade using ceph-deploy
760 eg:
761 ceph-deploy.upgrade:
762 # to upgrade to specific branch, use
763 branch:
764 stable: jewel
765 # to setup mgr node, use
766 setup-mgr-node: True
767 # to wait for cluster to be healthy after all upgrade, use
768 wait-for-healthy: True
769 role: (upgrades the below roles serially)
770 mon.a
771 mon.b
772 osd.0
773 """
774 roles = config.get('roles')
775 # get the roles that are mapped as per ceph-deploy
776 # roles are mapped for mon/mds eg: mon.a => mon.host_short_name
777 mapped_role = ctx.cluster.mapped_role
11fdf7f2 778 log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
3efd9988
FG
779 if config.get('branch'):
780 branch = config.get('branch')
781 (var, val) = branch.items()[0]
782 ceph_branch = '--{var}={val}'.format(var=var, val=val)
783 else:
b32b8144
FG
784 # default to wip-branch under test
785 dev_branch = ctx.config['branch']
786 ceph_branch = '--dev={branch}'.format(branch=dev_branch)
3efd9988
FG
787 # get the node used for initial deployment which is mon.a
788 mon_a = mapped_role.get('mon.a')
789 (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys()
790 testdir = teuthology.get_testdir(ctx)
791 cmd = './ceph-deploy install ' + ceph_branch
792 for role in roles:
793 # check if this role is mapped (mon or mds)
794 if mapped_role.get(role):
795 role = mapped_role.get(role)
796 remotes_and_roles = ctx.cluster.only(role).remotes
797 for remote, roles in remotes_and_roles.iteritems():
798 nodename = remote.shortname
799 cmd = cmd + ' ' + nodename
800 log.info("Upgrading ceph on %s", nodename)
801 ceph_admin.run(
802 args=[
803 'cd',
804 '{tdir}/ceph-deploy'.format(tdir=testdir),
805 run.Raw('&&'),
806 run.Raw(cmd),
807 ],
808 )
809 # restart all ceph services, ideally upgrade should but it does not
810 remote.run(
811 args=[
812 'sudo', 'systemctl', 'restart', 'ceph.target'
813 ]
814 )
815 ceph_admin.run(args=['sudo', 'ceph', '-s'])
816
817 # workaround for http://tracker.ceph.com/issues/20950
818 # write the correct mgr key to disk
819 if config.get('setup-mgr-node', None):
820 mons = ctx.cluster.only(teuthology.is_type('mon'))
821 for remote, roles in mons.remotes.iteritems():
822 remote.run(
823 args=[
824 run.Raw('sudo ceph auth get client.bootstrap-mgr'),
825 run.Raw('|'),
826 run.Raw('sudo tee'),
827 run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
828 ]
829 )
830
831 if config.get('setup-mgr-node', None):
832 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
833 mgr_nodes = " ".join(mgr_nodes)
834 mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
835 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
836 # install mgr
837 ceph_admin.run(
838 args=[
839 'cd',
840 '{tdir}/ceph-deploy'.format(tdir=testdir),
841 run.Raw('&&'),
842 run.Raw(mgr_install),
843 ],
844 )
845 # create mgr
846 ceph_admin.run(
847 args=[
848 'cd',
849 '{tdir}/ceph-deploy'.format(tdir=testdir),
850 run.Raw('&&'),
851 run.Raw(mgr_create),
852 ],
853 )
854 ceph_admin.run(args=['sudo', 'ceph', '-s'])
855 if config.get('wait-for-healthy', None):
856 wait_until_healthy(ctx, ceph_admin, use_sudo=True)
857 yield
858
859
7c673cae
FG
860@contextlib.contextmanager
861def task(ctx, config):
862 """
863 Set up and tear down a Ceph cluster.
864
865 For example::
866
867 tasks:
868 - install:
869 extras: yes
870 - ssh_keys:
871 - ceph-deploy:
872 branch:
873 stable: bobtail
874 mon_initial_members: 1
3efd9988 875 ceph-deploy-branch: my-ceph-deploy-branch
7c673cae
FG
876 only_mon: true
877 keep_running: true
c07f9fc5
FG
878 # either choose bluestore or filestore, default is bluestore
879 bluestore: True
880 # or
881 filestore: True
3efd9988
FG
882 # skip install of mgr for old release using below flag
883 skip-mgr: True ( default is False )
b32b8144
FG
884 # to use ceph-volume instead of ceph-disk
885 # ceph-disk can only be used with old ceph-deploy release from pypi
886 use-ceph-volume: true
7c673cae
FG
887
888 tasks:
889 - install:
890 extras: yes
891 - ssh_keys:
892 - ceph-deploy:
893 branch:
894 dev: master
895 conf:
896 mon:
897 debug mon = 20
898
899 tasks:
900 - install:
901 extras: yes
902 - ssh_keys:
903 - ceph-deploy:
904 branch:
905 testing:
906 dmcrypt: yes
907 separate_journal_disk: yes
908
909 """
910 if config is None:
911 config = {}
912
913 assert isinstance(config, dict), \
914 "task ceph-deploy only supports a dictionary for configuration"
915
916 overrides = ctx.config.get('overrides', {})
917 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
918
919 if config.get('branch') is not None:
920 assert isinstance(
921 config['branch'], dict), 'branch must be a dictionary'
922
923 log.info('task ceph-deploy with config ' + str(config))
924
b32b8144
FG
925 # we need to use 1.5.39-stable for testing jewel or master branch with
926 # ceph-disk
927 if config.get('use-ceph-volume', False) is False:
928 # check we are not testing specific branch
929 if config.get('ceph-deploy-branch', False) is False:
930 config['ceph-deploy-branch'] = '1.5.39-stable'
931
7c673cae
FG
932 with contextutil.nested(
933 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
934 lambda: download_ceph_deploy(ctx=ctx, config=config),
935 lambda: build_ceph_cluster(ctx=ctx, config=config),
936 ):
937 yield