]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/ceph_deploy.py
update sources to v12.2.3
[ceph.git] / ceph / qa / tasks / ceph_deploy.py
CommitLineData
7c673cae
FG
1"""
2Execute ceph-deploy as a task
3"""
4from cStringIO import StringIO
5
6import contextlib
7import os
8import time
9import logging
10import traceback
11
12from teuthology import misc as teuthology
13from teuthology import contextutil
14from teuthology.config import config as teuth_config
15from teuthology.task import install as install_fn
16from teuthology.orchestra import run
17from tasks.cephfs.filesystem import Filesystem
3efd9988 18from teuthology.misc import wait_until_healthy
7c673cae
FG
19
20log = logging.getLogger(__name__)
21
22
23@contextlib.contextmanager
24def download_ceph_deploy(ctx, config):
25 """
26 Downloads ceph-deploy from the ceph.com git mirror and (by default)
27 switches to the master branch. If the `ceph-deploy-branch` is specified, it
28 will use that instead. The `bootstrap` script is ran, with the argument
29 obtained from `python_version`, if specified.
30 """
3efd9988
FG
31 # use mon.a for ceph_admin
32 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
7c673cae
FG
33
34 try:
35 py_ver = str(config['python_version'])
36 except KeyError:
37 pass
38 else:
39 supported_versions = ['2', '3']
40 if py_ver not in supported_versions:
41 raise ValueError("python_version must be: {}, not {}".format(
42 ' or '.join(supported_versions), py_ver
43 ))
44
45 log.info("Installing Python")
3efd9988 46 system_type = teuthology.get_system_type(ceph_admin)
7c673cae
FG
47
48 if system_type == 'rpm':
49 package = 'python34' if py_ver == '3' else 'python'
50 ctx.cluster.run(args=[
51 'sudo', 'yum', '-y', 'install',
52 package, 'python-virtualenv'
53 ])
54 else:
55 package = 'python3' if py_ver == '3' else 'python'
56 ctx.cluster.run(args=[
57 'sudo', 'apt-get', '-y', '--force-yes', 'install',
58 package, 'python-virtualenv'
59 ])
60
61 log.info('Downloading ceph-deploy...')
62 testdir = teuthology.get_testdir(ctx)
63 ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
64
65 ceph_admin.run(
66 args=[
67 'git', 'clone', '-b', ceph_deploy_branch,
68 teuth_config.ceph_git_base_url + 'ceph-deploy.git',
69 '{tdir}/ceph-deploy'.format(tdir=testdir),
70 ],
71 )
72 args = [
73 'cd',
74 '{tdir}/ceph-deploy'.format(tdir=testdir),
75 run.Raw('&&'),
76 './bootstrap',
77 ]
78 try:
79 args.append(str(config['python_version']))
80 except KeyError:
81 pass
82 ceph_admin.run(args=args)
83
84 try:
85 yield
86 finally:
87 log.info('Removing ceph-deploy ...')
88 ceph_admin.run(
89 args=[
90 'rm',
91 '-rf',
92 '{tdir}/ceph-deploy'.format(tdir=testdir),
93 ],
94 )
95
96
97def is_healthy(ctx, config):
98 """Wait until a Ceph cluster is healthy."""
99 testdir = teuthology.get_testdir(ctx)
100 ceph_admin = teuthology.get_first_mon(ctx, config)
101 (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
102 max_tries = 90 # 90 tries * 10 secs --> 15 minutes
103 tries = 0
104 while True:
105 tries += 1
106 if tries >= max_tries:
107 msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
108 remote.run(
109 args=[
110 'cd',
111 '{tdir}'.format(tdir=testdir),
112 run.Raw('&&'),
113 'sudo', 'ceph',
114 'report',
115 ],
116 )
117 raise RuntimeError(msg)
118
119 r = remote.run(
120 args=[
121 'cd',
122 '{tdir}'.format(tdir=testdir),
123 run.Raw('&&'),
124 'sudo', 'ceph',
125 'health',
126 ],
127 stdout=StringIO(),
128 logger=log.getChild('health'),
129 )
130 out = r.stdout.getvalue()
131 log.info('Ceph health: %s', out.rstrip('\n'))
132 if out.split(None, 1)[0] == 'HEALTH_OK':
133 break
134 time.sleep(10)
135
136
137def get_nodes_using_role(ctx, target_role):
138 """
139 Extract the names of nodes that match a given role from a cluster, and modify the
140 cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
141 uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
142 """
143
144 # Nodes containing a service of the specified role
145 nodes_of_interest = []
146
147 # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
148 modified_remotes = {}
3efd9988 149 ceph_deploy_mapped = dict()
7c673cae
FG
150 for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
151 modified_remotes[_remote] = []
152 for svc_id in roles_for_host:
153 if svc_id.startswith("{0}.".format(target_role)):
154 fqdn = str(_remote).split('@')[-1]
155 nodename = str(str(_remote).split('.')[0]).split('@')[1]
156 if target_role == 'mon':
157 nodes_of_interest.append(fqdn)
158 else:
159 nodes_of_interest.append(nodename)
3efd9988
FG
160 mapped_role = "{0}.{1}".format(target_role, nodename)
161 modified_remotes[_remote].append(mapped_role)
162 # keep dict of mapped role for later use by tasks
163 # eg. mon.a => mon.node1
164 ceph_deploy_mapped[svc_id] = mapped_role
7c673cae
FG
165 else:
166 modified_remotes[_remote].append(svc_id)
167
168 ctx.cluster.remotes = modified_remotes
3efd9988 169 ctx.cluster.mapped_role = ceph_deploy_mapped
7c673cae
FG
170
171 return nodes_of_interest
172
173
174def get_dev_for_osd(ctx, config):
175 """Get a list of all osd device names."""
176 osd_devs = []
177 for remote, roles_for_host in ctx.cluster.remotes.iteritems():
178 host = remote.name.split('@')[-1]
179 shortname = host.split('.')[0]
180 devs = teuthology.get_scratch_devices(remote)
181 num_osd_per_host = list(
182 teuthology.roles_of_type(
183 roles_for_host, 'osd'))
184 num_osds = len(num_osd_per_host)
185 if config.get('separate_journal_disk') is not None:
186 num_devs_reqd = 2 * num_osds
187 assert num_devs_reqd <= len(
188 devs), 'fewer data and journal disks than required ' + shortname
189 for dindex in range(0, num_devs_reqd, 2):
190 jd_index = dindex + 1
191 dev_short = devs[dindex].split('/')[-1]
192 jdev_short = devs[jd_index].split('/')[-1]
193 osd_devs.append((shortname, dev_short, jdev_short))
194 else:
195 assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
196 for dev in devs[:num_osds]:
197 dev_short = dev.split('/')[-1]
198 osd_devs.append((shortname, dev_short))
199 return osd_devs
200
201
202def get_all_nodes(ctx, config):
203 """Return a string of node names separated by blanks"""
204 nodelist = []
205 for t, k in ctx.config['targets'].iteritems():
206 host = t.split('@')[-1]
207 simple_host = host.split('.')[0]
208 nodelist.append(simple_host)
209 nodelist = " ".join(nodelist)
210 return nodelist
211
7c673cae
FG
212@contextlib.contextmanager
213def build_ceph_cluster(ctx, config):
214 """Build a ceph cluster"""
215
216 # Expect to find ceph_admin on the first mon by ID, same place that the download task
217 # puts it. Remember this here, because subsequently IDs will change from those in
218 # the test config to those that ceph-deploy invents.
3efd9988
FG
219
220 (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
7c673cae
FG
221
222 def execute_ceph_deploy(cmd):
223 """Remotely execute a ceph_deploy command"""
224 return ceph_admin.run(
225 args=[
226 'cd',
227 '{tdir}/ceph-deploy'.format(tdir=testdir),
228 run.Raw('&&'),
229 run.Raw(cmd),
230 ],
231 check_status=False,
232 ).exitstatus
233
b32b8144
FG
234 def ceph_disk_osd_create(ctx, config):
235 node_dev_list = get_dev_for_osd(ctx, config)
236 no_of_osds = 0
237 for d in node_dev_list:
238 node = d[0]
239 for disk in d[1:]:
240 zap = './ceph-deploy disk zap ' + node + ':' + disk
241 estatus = execute_ceph_deploy(zap)
242 if estatus != 0:
243 raise RuntimeError("ceph-deploy: Failed to zap osds")
244 osd_create_cmd = './ceph-deploy osd create '
245 # first check for filestore, default is bluestore with ceph-deploy
246 if config.get('filestore') is not None:
247 osd_create_cmd += '--filestore '
248 elif config.get('bluestore') is not None:
249 osd_create_cmd += '--bluestore '
250 if config.get('dmcrypt') is not None:
251 osd_create_cmd += '--dmcrypt '
252 osd_create_cmd += ":".join(d)
253 estatus_osd = execute_ceph_deploy(osd_create_cmd)
254 if estatus_osd == 0:
255 log.info('successfully created osd')
256 no_of_osds += 1
257 else:
258 raise RuntimeError("ceph-deploy: Failed to create osds")
259 return no_of_osds
260
261 def ceph_volume_osd_create(ctx, config):
262 osds = ctx.cluster.only(teuthology.is_type('osd'))
263 no_of_osds = 0
264 for remote in osds.remotes.iterkeys():
265 # all devs should be lvm
266 osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
267 # default is bluestore so we just need config item for filestore
268 roles = ctx.cluster.remotes[remote]
269 dev_needed = len([role for role in roles
270 if role.startswith('osd')])
271 all_devs = teuthology.get_scratch_devices(remote)
272 log.info("node={n}, need_devs={d}, available={a}".format(
273 n=remote.shortname,
274 d=dev_needed,
275 a=all_devs,
276 ))
277 devs = all_devs[0:dev_needed]
278 # rest of the devices can be used for journal if required
279 jdevs = dev_needed
280 for device in devs:
281 device_split = device.split('/')
282 lv_device = device_split[-2] + '/' + device_split[-1]
283 if config.get('filestore') is not None:
284 osd_create_cmd += '--filestore --data ' + lv_device + ' '
285 # filestore with ceph-volume also needs journal disk
286 try:
287 jdevice = all_devs.pop(jdevs)
288 except IndexError:
289 raise RuntimeError("No device available for \
290 journal configuration")
291 jdevice_split = jdevice.split('/')
292 j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
293 osd_create_cmd += '--journal ' + j_lv
294 else:
295 osd_create_cmd += ' --data ' + lv_device
296 estatus_osd = execute_ceph_deploy(osd_create_cmd)
297 if estatus_osd == 0:
298 log.info('successfully created osd')
299 no_of_osds += 1
300 else:
301 raise RuntimeError("ceph-deploy: Failed to create osds")
302 return no_of_osds
303
7c673cae
FG
304 try:
305 log.info('Building ceph cluster using ceph-deploy...')
306 testdir = teuthology.get_testdir(ctx)
307 ceph_branch = None
308 if config.get('branch') is not None:
309 cbranch = config.get('branch')
310 for var, val in cbranch.iteritems():
311 ceph_branch = '--{var}={val}'.format(var=var, val=val)
312 all_nodes = get_all_nodes(ctx, config)
313 mds_nodes = get_nodes_using_role(ctx, 'mds')
314 mds_nodes = " ".join(mds_nodes)
315 mon_node = get_nodes_using_role(ctx, 'mon')
316 mon_nodes = " ".join(mon_node)
3efd9988
FG
317 # skip mgr based on config item
318 # this is needed when test uses latest code to install old ceph
319 # versions
320 skip_mgr = config.get('skip-mgr', False)
321 if not skip_mgr:
322 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
323 mgr_nodes = " ".join(mgr_nodes)
7c673cae 324 new_mon = './ceph-deploy new' + " " + mon_nodes
3efd9988
FG
325 if not skip_mgr:
326 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
7c673cae
FG
327 mon_hostname = mon_nodes.split(' ')[0]
328 mon_hostname = str(mon_hostname)
329 gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
330 deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
7c673cae
FG
331
332 if mon_nodes is None:
333 raise RuntimeError("no monitor nodes in the config file")
334
335 estatus_new = execute_ceph_deploy(new_mon)
336 if estatus_new != 0:
337 raise RuntimeError("ceph-deploy: new command failed")
338
339 log.info('adding config inputs...')
340 testdir = teuthology.get_testdir(ctx)
341 conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
342
343 if config.get('conf') is not None:
344 confp = config.get('conf')
345 for section, keys in confp.iteritems():
346 lines = '[{section}]\n'.format(section=section)
347 teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
348 sudo=True)
349 for key, value in keys.iteritems():
350 log.info("[%s] %s = %s" % (section, key, value))
351 lines = '{key} = {value}\n'.format(key=key, value=value)
352 teuthology.append_lines_to_file(
353 ceph_admin, conf_path, lines, sudo=True)
354
355 # install ceph
356 dev_branch = ctx.config['branch']
357 branch = '--dev={branch}'.format(branch=dev_branch)
358 if ceph_branch:
359 option = ceph_branch
360 else:
361 option = branch
362 install_nodes = './ceph-deploy install ' + option + " " + all_nodes
363 estatus_install = execute_ceph_deploy(install_nodes)
364 if estatus_install != 0:
365 raise RuntimeError("ceph-deploy: Failed to install ceph")
366 # install ceph-test package too
367 install_nodes2 = './ceph-deploy install --tests ' + option + \
368 " " + all_nodes
369 estatus_install = execute_ceph_deploy(install_nodes2)
370 if estatus_install != 0:
371 raise RuntimeError("ceph-deploy: Failed to install ceph-test")
372
373 mon_create_nodes = './ceph-deploy mon create-initial'
374 # If the following fails, it is OK, it might just be that the monitors
375 # are taking way more than a minute/monitor to form quorum, so lets
376 # try the next block which will wait up to 15 minutes to gatherkeys.
377 execute_ceph_deploy(mon_create_nodes)
378
379 # create-keys is explicit now
380 # http://tracker.ceph.com/issues/16036
381 mons = ctx.cluster.only(teuthology.is_type('mon'))
382 for remote in mons.remotes.iterkeys():
383 remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
384 '--id', remote.shortname])
385
386 estatus_gather = execute_ceph_deploy(gather_keys)
b32b8144
FG
387 if estatus_gather != 0:
388 raise RuntimeError("ceph-deploy: Failed during gather keys")
389 # create osd's
390 if config.get('use-ceph-volume', False):
391 no_of_osds = ceph_volume_osd_create(ctx, config)
392 else:
393 # this method will only work with ceph-deploy v1.5.39 or older
394 no_of_osds = ceph_disk_osd_create(ctx, config)
b5b8bbf5 395
3efd9988
FG
396 if not skip_mgr:
397 execute_ceph_deploy(mgr_create)
b5b8bbf5 398
7c673cae
FG
399 if mds_nodes:
400 estatus_mds = execute_ceph_deploy(deploy_mds)
401 if estatus_mds != 0:
402 raise RuntimeError("ceph-deploy: Failed to deploy mds")
403
404 if config.get('test_mon_destroy') is not None:
405 for d in range(1, len(mon_node)):
406 mon_destroy_nodes = './ceph-deploy mon destroy' + \
407 " " + mon_node[d]
408 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
409 if estatus_mon_d != 0:
410 raise RuntimeError("ceph-deploy: Failed to delete monitor")
411
b32b8144 412
7c673cae
FG
413
414 if config.get('wait-for-healthy', True) and no_of_osds >= 2:
415 is_healthy(ctx=ctx, config=None)
416
417 log.info('Setting up client nodes...')
418 conf_path = '/etc/ceph/ceph.conf'
419 admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
420 first_mon = teuthology.get_first_mon(ctx, config)
421 (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
422 conf_data = teuthology.get_file(
423 remote=mon0_remote,
424 path=conf_path,
425 sudo=True,
426 )
427 admin_keyring = teuthology.get_file(
428 remote=mon0_remote,
429 path=admin_keyring_path,
430 sudo=True,
431 )
432
433 clients = ctx.cluster.only(teuthology.is_type('client'))
434 for remot, roles_for_host in clients.remotes.iteritems():
435 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
436 client_keyring = \
437 '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
438 mon0_remote.run(
439 args=[
440 'cd',
441 '{tdir}'.format(tdir=testdir),
442 run.Raw('&&'),
443 'sudo', 'bash', '-c',
444 run.Raw('"'), 'ceph',
445 'auth',
446 'get-or-create',
447 'client.{id}'.format(id=id_),
448 'mds', 'allow',
449 'mon', 'allow *',
450 'osd', 'allow *',
451 run.Raw('>'),
452 client_keyring,
453 run.Raw('"'),
454 ],
455 )
456 key_data = teuthology.get_file(
457 remote=mon0_remote,
458 path=client_keyring,
459 sudo=True,
460 )
461 teuthology.sudo_write_file(
462 remote=remot,
463 path=client_keyring,
464 data=key_data,
465 perms='0644'
466 )
467 teuthology.sudo_write_file(
468 remote=remot,
469 path=admin_keyring_path,
470 data=admin_keyring,
471 perms='0644'
472 )
473 teuthology.sudo_write_file(
474 remote=remot,
475 path=conf_path,
476 data=conf_data,
477 perms='0644'
478 )
479
480 if mds_nodes:
481 log.info('Configuring CephFS...')
3efd9988 482 Filesystem(ctx, create=True)
7c673cae
FG
483 elif not config.get('only_mon'):
484 raise RuntimeError(
485 "The cluster is NOT operational due to insufficient OSDs")
486 yield
487
488 except Exception:
489 log.info(
490 "Error encountered, logging exception before tearing down ceph-deploy")
491 log.info(traceback.format_exc())
492 raise
493 finally:
494 if config.get('keep_running'):
495 return
496 log.info('Stopping ceph...')
497 ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
498 'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
499 'sudo', 'systemctl', 'stop', 'ceph.target'])
500
501 # Are you really not running anymore?
502 # try first with the init tooling
503 # ignoring the status so this becomes informational only
504 ctx.cluster.run(
505 args=[
506 'sudo', 'status', 'ceph-all', run.Raw('||'),
507 'sudo', 'service', 'ceph', 'status', run.Raw('||'),
508 'sudo', 'systemctl', 'status', 'ceph.target'],
509 check_status=False)
510
511 # and now just check for the processes themselves, as if upstart/sysvinit
512 # is lying to us. Ignore errors if the grep fails
513 ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
514 'grep', '-v', 'grep', run.Raw('|'),
515 'grep', 'ceph'], check_status=False)
516
517 if ctx.archive is not None:
518 # archive mon data, too
519 log.info('Archiving mon data...')
520 path = os.path.join(ctx.archive, 'data')
521 os.makedirs(path)
522 mons = ctx.cluster.only(teuthology.is_type('mon'))
523 for remote, roles in mons.remotes.iteritems():
524 for role in roles:
525 if role.startswith('mon.'):
526 teuthology.pull_directory_tarball(
527 remote,
528 '/var/lib/ceph/mon',
529 path + '/' + role + '.tgz')
530
531 log.info('Compressing logs...')
532 run.wait(
533 ctx.cluster.run(
534 args=[
535 'sudo',
536 'find',
537 '/var/log/ceph',
538 '-name',
539 '*.log',
540 '-print0',
541 run.Raw('|'),
542 'sudo',
543 'xargs',
544 '-0',
545 '--no-run-if-empty',
546 '--',
547 'gzip',
548 '--',
549 ],
550 wait=False,
551 ),
552 )
553
554 log.info('Archiving logs...')
555 path = os.path.join(ctx.archive, 'remote')
556 os.makedirs(path)
557 for remote in ctx.cluster.remotes.iterkeys():
558 sub = os.path.join(path, remote.shortname)
559 os.makedirs(sub)
560 teuthology.pull_directory(remote, '/var/log/ceph',
561 os.path.join(sub, 'log'))
562
563 # Prevent these from being undefined if the try block fails
564 all_nodes = get_all_nodes(ctx, config)
565 purge_nodes = './ceph-deploy purge' + " " + all_nodes
566 purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
567
568 log.info('Purging package...')
569 execute_ceph_deploy(purge_nodes)
570 log.info('Purging data...')
571 execute_ceph_deploy(purgedata_nodes)
572
573
574@contextlib.contextmanager
575def cli_test(ctx, config):
576 """
577 ceph-deploy cli to exercise most commonly use cli's and ensure
578 all commands works and also startup the init system.
579
580 """
581 log.info('Ceph-deploy Test')
582 if config is None:
583 config = {}
584 test_branch = ''
585 conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
586
587 def execute_cdeploy(admin, cmd, path):
588 """Execute ceph-deploy commands """
589 """Either use git path or repo path """
590 args = ['cd', conf_dir, run.Raw(';')]
591 if path:
3efd9988 592 args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
7c673cae
FG
593 else:
594 args.append('ceph-deploy')
595 args.append(run.Raw(cmd))
596 ec = admin.run(args=args, check_status=False).exitstatus
597 if ec != 0:
598 raise RuntimeError(
599 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
600
601 if config.get('rhbuild'):
602 path = None
603 else:
604 path = teuthology.get_testdir(ctx)
605 # test on branch from config eg: wip-* , master or next etc
606 # packages for all distro's should exist for wip*
607 if ctx.config.get('branch'):
608 branch = ctx.config.get('branch')
609 test_branch = ' --dev={branch} '.format(branch=branch)
610 mons = ctx.cluster.only(teuthology.is_type('mon'))
611 for node, role in mons.remotes.iteritems():
612 admin = node
613 admin.run(args=['mkdir', conf_dir], check_status=False)
614 nodename = admin.shortname
615 system_type = teuthology.get_system_type(admin)
616 if config.get('rhbuild'):
617 admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
618 log.info('system type is %s', system_type)
619 osds = ctx.cluster.only(teuthology.is_type('osd'))
620
621 for remote, roles in osds.remotes.iteritems():
622 devs = teuthology.get_scratch_devices(remote)
623 log.info("roles %s", roles)
624 if (len(devs) < 3):
625 log.error(
626 'Test needs minimum of 3 devices, only found %s',
627 str(devs))
628 raise RuntimeError("Needs minimum of 3 devices ")
629
630 conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
631 new_cmd = 'new ' + nodename
632 execute_cdeploy(admin, new_cmd, path)
633 if config.get('conf') is not None:
634 confp = config.get('conf')
635 for section, keys in confp.iteritems():
636 lines = '[{section}]\n'.format(section=section)
637 teuthology.append_lines_to_file(admin, conf_path, lines,
638 sudo=True)
639 for key, value in keys.iteritems():
640 log.info("[%s] %s = %s" % (section, key, value))
641 lines = '{key} = {value}\n'.format(key=key, value=value)
642 teuthology.append_lines_to_file(admin, conf_path, lines,
643 sudo=True)
644 new_mon_install = 'install {branch} --mon '.format(
645 branch=test_branch) + nodename
646 new_mgr_install = 'install {branch} --mgr '.format(
647 branch=test_branch) + nodename
648 new_osd_install = 'install {branch} --osd '.format(
649 branch=test_branch) + nodename
650 new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
651 create_initial = 'mon create-initial '
652 # either use create-keys or push command
653 push_keys = 'admin ' + nodename
654 execute_cdeploy(admin, new_mon_install, path)
655 execute_cdeploy(admin, new_mgr_install, path)
656 execute_cdeploy(admin, new_osd_install, path)
657 execute_cdeploy(admin, new_admin, path)
658 execute_cdeploy(admin, create_initial, path)
659 execute_cdeploy(admin, push_keys, path)
660
661 for i in range(3):
662 zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
663 prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
664 execute_cdeploy(admin, zap_disk, path)
665 execute_cdeploy(admin, prepare, path)
666
667 log.info("list files for debugging purpose to check file permissions")
668 admin.run(args=['ls', run.Raw('-lt'), conf_dir])
669 remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
670 r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
671 out = r.stdout.getvalue()
672 log.info('Ceph health: %s', out.rstrip('\n'))
673 log.info("Waiting for cluster to become healthy")
674 with contextutil.safe_while(sleep=10, tries=6,
675 action='check health') as proceed:
3efd9988
FG
676 while proceed():
677 r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
678 out = r.stdout.getvalue()
679 if (out.split(None, 1)[0] == 'HEALTH_OK'):
680 break
7c673cae
FG
681 rgw_install = 'install {branch} --rgw {node}'.format(
682 branch=test_branch,
683 node=nodename,
684 )
685 rgw_create = 'rgw create ' + nodename
686 execute_cdeploy(admin, rgw_install, path)
687 execute_cdeploy(admin, rgw_create, path)
688 log.info('All ceph-deploy cli tests passed')
689 try:
690 yield
691 finally:
692 log.info("cleaning up")
693 ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
694 'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
695 'sudo', 'systemctl', 'stop', 'ceph.target'],
696 check_status=False)
697 time.sleep(4)
698 for i in range(3):
699 umount_dev = "{d}1".format(d=devs[i])
700 r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
701 cmd = 'purge ' + nodename
702 execute_cdeploy(admin, cmd, path)
703 cmd = 'purgedata ' + nodename
704 execute_cdeploy(admin, cmd, path)
705 log.info("Removing temporary dir")
706 admin.run(
707 args=[
708 'rm',
709 run.Raw('-rf'),
710 run.Raw(conf_dir)],
711 check_status=False)
712 if config.get('rhbuild'):
713 admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
714
715
716@contextlib.contextmanager
717def single_node_test(ctx, config):
718 """
719 - ceph-deploy.single_node_test: null
720
721 #rhbuild testing
722 - ceph-deploy.single_node_test:
723 rhbuild: 1.2.3
724
725 """
726 log.info("Testing ceph-deploy on single node")
727 if config is None:
728 config = {}
729 overrides = ctx.config.get('overrides', {})
730 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
731
732 if config.get('rhbuild'):
733 log.info("RH Build, Skip Download")
734 with contextutil.nested(
735 lambda: cli_test(ctx=ctx, config=config),
736 ):
737 yield
738 else:
739 with contextutil.nested(
740 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
741 lambda: download_ceph_deploy(ctx=ctx, config=config),
742 lambda: cli_test(ctx=ctx, config=config),
743 ):
744 yield
745
746
3efd9988
FG
747@contextlib.contextmanager
748def upgrade(ctx, config):
749 """
750 Upgrade using ceph-deploy
751 eg:
752 ceph-deploy.upgrade:
753 # to upgrade to specific branch, use
754 branch:
755 stable: jewel
756 # to setup mgr node, use
757 setup-mgr-node: True
758 # to wait for cluster to be healthy after all upgrade, use
759 wait-for-healthy: True
760 role: (upgrades the below roles serially)
761 mon.a
762 mon.b
763 osd.0
764 """
765 roles = config.get('roles')
766 # get the roles that are mapped as per ceph-deploy
767 # roles are mapped for mon/mds eg: mon.a => mon.host_short_name
768 mapped_role = ctx.cluster.mapped_role
769 if config.get('branch'):
770 branch = config.get('branch')
771 (var, val) = branch.items()[0]
772 ceph_branch = '--{var}={val}'.format(var=var, val=val)
773 else:
b32b8144
FG
774 # default to wip-branch under test
775 dev_branch = ctx.config['branch']
776 ceph_branch = '--dev={branch}'.format(branch=dev_branch)
3efd9988
FG
777 # get the node used for initial deployment which is mon.a
778 mon_a = mapped_role.get('mon.a')
779 (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys()
780 testdir = teuthology.get_testdir(ctx)
781 cmd = './ceph-deploy install ' + ceph_branch
782 for role in roles:
783 # check if this role is mapped (mon or mds)
784 if mapped_role.get(role):
785 role = mapped_role.get(role)
786 remotes_and_roles = ctx.cluster.only(role).remotes
787 for remote, roles in remotes_and_roles.iteritems():
788 nodename = remote.shortname
789 cmd = cmd + ' ' + nodename
790 log.info("Upgrading ceph on %s", nodename)
791 ceph_admin.run(
792 args=[
793 'cd',
794 '{tdir}/ceph-deploy'.format(tdir=testdir),
795 run.Raw('&&'),
796 run.Raw(cmd),
797 ],
798 )
799 # restart all ceph services, ideally upgrade should but it does not
800 remote.run(
801 args=[
802 'sudo', 'systemctl', 'restart', 'ceph.target'
803 ]
804 )
805 ceph_admin.run(args=['sudo', 'ceph', '-s'])
806
807 # workaround for http://tracker.ceph.com/issues/20950
808 # write the correct mgr key to disk
809 if config.get('setup-mgr-node', None):
810 mons = ctx.cluster.only(teuthology.is_type('mon'))
811 for remote, roles in mons.remotes.iteritems():
812 remote.run(
813 args=[
814 run.Raw('sudo ceph auth get client.bootstrap-mgr'),
815 run.Raw('|'),
816 run.Raw('sudo tee'),
817 run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
818 ]
819 )
820
821 if config.get('setup-mgr-node', None):
822 mgr_nodes = get_nodes_using_role(ctx, 'mgr')
823 mgr_nodes = " ".join(mgr_nodes)
824 mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
825 mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
826 # install mgr
827 ceph_admin.run(
828 args=[
829 'cd',
830 '{tdir}/ceph-deploy'.format(tdir=testdir),
831 run.Raw('&&'),
832 run.Raw(mgr_install),
833 ],
834 )
835 # create mgr
836 ceph_admin.run(
837 args=[
838 'cd',
839 '{tdir}/ceph-deploy'.format(tdir=testdir),
840 run.Raw('&&'),
841 run.Raw(mgr_create),
842 ],
843 )
844 ceph_admin.run(args=['sudo', 'ceph', '-s'])
845 if config.get('wait-for-healthy', None):
846 wait_until_healthy(ctx, ceph_admin, use_sudo=True)
847 yield
848
849
7c673cae
FG
850@contextlib.contextmanager
851def task(ctx, config):
852 """
853 Set up and tear down a Ceph cluster.
854
855 For example::
856
857 tasks:
858 - install:
859 extras: yes
860 - ssh_keys:
861 - ceph-deploy:
862 branch:
863 stable: bobtail
864 mon_initial_members: 1
3efd9988 865 ceph-deploy-branch: my-ceph-deploy-branch
7c673cae
FG
866 only_mon: true
867 keep_running: true
c07f9fc5
FG
868 # either choose bluestore or filestore, default is bluestore
869 bluestore: True
870 # or
871 filestore: True
3efd9988
FG
872 # skip install of mgr for old release using below flag
873 skip-mgr: True ( default is False )
b32b8144
FG
874 # to use ceph-volume instead of ceph-disk
875 # ceph-disk can only be used with old ceph-deploy release from pypi
876 use-ceph-volume: true
7c673cae
FG
877
878 tasks:
879 - install:
880 extras: yes
881 - ssh_keys:
882 - ceph-deploy:
883 branch:
884 dev: master
885 conf:
886 mon:
887 debug mon = 20
888
889 tasks:
890 - install:
891 extras: yes
892 - ssh_keys:
893 - ceph-deploy:
894 branch:
895 testing:
896 dmcrypt: yes
897 separate_journal_disk: yes
898
899 """
900 if config is None:
901 config = {}
902
903 assert isinstance(config, dict), \
904 "task ceph-deploy only supports a dictionary for configuration"
905
906 overrides = ctx.config.get('overrides', {})
907 teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
908
909 if config.get('branch') is not None:
910 assert isinstance(
911 config['branch'], dict), 'branch must be a dictionary'
912
913 log.info('task ceph-deploy with config ' + str(config))
914
b32b8144
FG
915 # we need to use 1.5.39-stable for testing jewel or master branch with
916 # ceph-disk
917 if config.get('use-ceph-volume', False) is False:
918 # check we are not testing specific branch
919 if config.get('ceph-deploy-branch', False) is False:
920 config['ceph-deploy-branch'] = '1.5.39-stable'
921
7c673cae
FG
922 with contextutil.nested(
923 lambda: install_fn.ship_utilities(ctx=ctx, config=None),
924 lambda: download_ceph_deploy(ctx=ctx, config=config),
925 lambda: build_ceph_cluster(ctx=ctx, config=config),
926 ):
927 yield