ceph/qa/tasks/ceph_deploy.py

   1 """
   2 Execute ceph-deploy as a task
   3 """
   4
   5 import contextlib
   6 import os
   7 import time
   8 import logging
   9 import traceback
  10
  11 from teuthology import misc as teuthology
  12 from teuthology import contextutil
  13 from teuthology.config import config as teuth_config
  14 from teuthology.task import install as install_fn
  15 from teuthology.orchestra import run
  16 from tasks.cephfs.filesystem import Filesystem
  17 from teuthology.misc import wait_until_healthy
  18
  19 log = logging.getLogger(__name__)
  20
  21
  22 @contextlib.contextmanager
  23 def download_ceph_deploy(ctx, config):
  24     """
  25     Downloads ceph-deploy from the ceph.com git mirror and (by default)
  26     switches to the master branch. If the `ceph-deploy-branch` is specified, it
  27     will use that instead. The `bootstrap` script is ran, with the argument
  28     obtained from `python_version`, if specified.
  29     """
  30     # use mon.a for ceph_admin
  31     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
  32
  33     try:
  34         py_ver = str(config['python_version'])
  35     except KeyError:
  36         pass
  37     else:
  38         supported_versions = ['2', '3']
  39         if py_ver not in supported_versions:
  40             raise ValueError("python_version must be: {}, not {}".format(
  41                 ' or '.join(supported_versions), py_ver
  42             ))
  43
  44         log.info("Installing Python")
  45         system_type = teuthology.get_system_type(ceph_admin)
  46
  47         if system_type == 'rpm':
  48             package = 'python36' if py_ver == '3' else 'python'
  49             ctx.cluster.run(args=[
  50                 'sudo', 'yum', '-y', 'install',
  51                 package, 'python-virtualenv'
  52             ])
  53         else:
  54             package = 'python3' if py_ver == '3' else 'python'
  55             ctx.cluster.run(args=[
  56                 'sudo', 'apt-get', '-y', '--force-yes', 'install',
  57                 package, 'python-virtualenv'
  58             ])
  59
  60     log.info('Downloading ceph-deploy...')
  61     testdir = teuthology.get_testdir(ctx)
  62     ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
  63
  64     ceph_admin.run(
  65         args=[
  66             'git', 'clone', '-b', ceph_deploy_branch,
  67             teuth_config.ceph_git_base_url + 'ceph-deploy.git',
  68             '{tdir}/ceph-deploy'.format(tdir=testdir),
  69         ],
  70     )
  71     args = [
  72         'cd',
  73         '{tdir}/ceph-deploy'.format(tdir=testdir),
  74         run.Raw('&&'),
  75         './bootstrap',
  76     ]
  77     try:
  78         args.append(str(config['python_version']))
  79     except KeyError:
  80         pass
  81     ceph_admin.run(args=args)
  82
  83     try:
  84         yield
  85     finally:
  86         log.info('Removing ceph-deploy ...')
  87         ceph_admin.run(
  88             args=[
  89                 'rm',
  90                 '-rf',
  91                 '{tdir}/ceph-deploy'.format(tdir=testdir),
  92             ],
  93         )
  94
  95
  96 def is_healthy(ctx, config):
  97     """Wait until a Ceph cluster is healthy."""
  98     testdir = teuthology.get_testdir(ctx)
  99     ceph_admin = teuthology.get_first_mon(ctx, config)
 100     (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
 101     max_tries = 90  # 90 tries * 10 secs --> 15 minutes
 102     tries = 0
 103     while True:
 104         tries += 1
 105         if tries >= max_tries:
 106             msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
 107             remote.run(
 108                 args=[
 109                     'cd',
 110                     '{tdir}'.format(tdir=testdir),
 111                     run.Raw('&&'),
 112                     'sudo', 'ceph',
 113                     'report',
 114                 ],
 115             )
 116             raise RuntimeError(msg)
 117
 118         out = remote.sh(
 119             [
 120                 'cd',
 121                 '{tdir}'.format(tdir=testdir),
 122                 run.Raw('&&'),
 123                 'sudo', 'ceph',
 124                 'health',
 125             ],
 126             logger=log.getChild('health'),
 127         )
 128         log.info('Ceph health: %s', out.rstrip('\n'))
 129         if out.split(None, 1)[0] == 'HEALTH_OK':
 130             break
 131         time.sleep(10)
 132
 133
 134 def get_nodes_using_role(ctx, target_role):
 135     """
 136     Extract the names of nodes that match a given role from a cluster, and modify the
 137     cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
 138     uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
 139     """
 140
 141     # Nodes containing a service of the specified role
 142     nodes_of_interest = []
 143
 144     # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
 145     modified_remotes = {}
 146     ceph_deploy_mapped = dict()
 147     for _remote, roles_for_host in ctx.cluster.remotes.items():
 148         modified_remotes[_remote] = []
 149         for svc_id in roles_for_host:
 150             if svc_id.startswith("{0}.".format(target_role)):
 151                 fqdn = str(_remote).split('@')[-1]
 152                 nodename = str(str(_remote).split('.')[0]).split('@')[1]
 153                 if target_role == 'mon':
 154                     nodes_of_interest.append(fqdn)
 155                 else:
 156                     nodes_of_interest.append(nodename)
 157                 mapped_role = "{0}.{1}".format(target_role, nodename)
 158                 modified_remotes[_remote].append(mapped_role)
 159                 # keep dict of mapped role for later use by tasks
 160                 # eg. mon.a => mon.node1
 161                 ceph_deploy_mapped[svc_id] = mapped_role
 162             else:
 163                 modified_remotes[_remote].append(svc_id)
 164
 165     ctx.cluster.remotes = modified_remotes
 166     # since the function is called multiple times for target roles
 167     # append new mapped roles
 168     if not hasattr(ctx.cluster, 'mapped_role'):
 169         ctx.cluster.mapped_role = ceph_deploy_mapped
 170     else:
 171         ctx.cluster.mapped_role.update(ceph_deploy_mapped)
 172     log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role))
 173     return nodes_of_interest
 174
 175
 176 def get_dev_for_osd(ctx, config):
 177     """Get a list of all osd device names."""
 178     osd_devs = []
 179     for remote, roles_for_host in ctx.cluster.remotes.items():
 180         host = remote.name.split('@')[-1]
 181         shortname = host.split('.')[0]
 182         devs = teuthology.get_scratch_devices(remote)
 183         num_osd_per_host = list(
 184             teuthology.roles_of_type(
 185                 roles_for_host, 'osd'))
 186         num_osds = len(num_osd_per_host)
 187         if config.get('separate_journal_disk') is not None:
 188             num_devs_reqd = 2 * num_osds
 189             assert num_devs_reqd <= len(
 190                 devs), 'fewer data and journal disks than required ' + shortname
 191             for dindex in range(0, num_devs_reqd, 2):
 192                 jd_index = dindex + 1
 193                 dev_short = devs[dindex].split('/')[-1]
 194                 jdev_short = devs[jd_index].split('/')[-1]
 195                 osd_devs.append((shortname, dev_short, jdev_short))
 196         else:
 197             assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
 198             for dev in devs[:num_osds]:
 199                 dev_short = dev.split('/')[-1]
 200                 osd_devs.append((shortname, dev_short))
 201     return osd_devs
 202
 203
 204 def get_all_nodes(ctx, config):
 205     """Return a string of node names separated by blanks"""
 206     nodelist = []
 207     for t, k in ctx.config['targets'].items():
 208         host = t.split('@')[-1]
 209         simple_host = host.split('.')[0]
 210         nodelist.append(simple_host)
 211     nodelist = " ".join(nodelist)
 212     return nodelist
 213
 214 @contextlib.contextmanager
 215 def build_ceph_cluster(ctx, config):
 216     """Build a ceph cluster"""
 217
 218     # Expect to find ceph_admin on the first mon by ID, same place that the download task
 219     # puts it.  Remember this here, because subsequently IDs will change from those in
 220     # the test config to those that ceph-deploy invents.
 221
 222     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
 223
 224     def execute_ceph_deploy(cmd):
 225         """Remotely execute a ceph_deploy command"""
 226         return ceph_admin.run(
 227             args=[
 228                 'cd',
 229                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 230                 run.Raw('&&'),
 231                 run.Raw(cmd),
 232             ],
 233             check_status=False,
 234         ).exitstatus
 235
 236     def ceph_disk_osd_create(ctx, config):
 237         node_dev_list = get_dev_for_osd(ctx, config)
 238         no_of_osds = 0
 239         for d in node_dev_list:
 240             node = d[0]
 241             for disk in d[1:]:
 242                 zap = './ceph-deploy disk zap ' + node + ' ' + disk
 243                 estatus = execute_ceph_deploy(zap)
 244                 if estatus != 0:
 245                     raise RuntimeError("ceph-deploy: Failed to zap osds")
 246             osd_create_cmd = './ceph-deploy osd create '
 247             # first check for filestore, default is bluestore with ceph-deploy
 248             if config.get('filestore') is not None:
 249                 osd_create_cmd += '--filestore '
 250             elif config.get('bluestore') is not None:
 251                 osd_create_cmd += '--bluestore '
 252             if config.get('dmcrypt') is not None:
 253                 osd_create_cmd += '--dmcrypt '
 254             osd_create_cmd += ":".join(d)
 255             estatus_osd = execute_ceph_deploy(osd_create_cmd)
 256             if estatus_osd == 0:
 257                 log.info('successfully created osd')
 258                 no_of_osds += 1
 259             else:
 260                 raise RuntimeError("ceph-deploy: Failed to create osds")
 261         return no_of_osds
 262
 263     def ceph_volume_osd_create(ctx, config):
 264         osds = ctx.cluster.only(teuthology.is_type('osd'))
 265         no_of_osds = 0
 266         for remote in osds.remotes.keys():
 267             # all devs should be lvm
 268             osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
 269             # default is bluestore so we just need config item for filestore
 270             roles = ctx.cluster.remotes[remote]
 271             dev_needed = len([role for role in roles
 272                               if role.startswith('osd')])
 273             all_devs = teuthology.get_scratch_devices(remote)
 274             log.info("node={n}, need_devs={d}, available={a}".format(
 275                         n=remote.shortname,
 276                         d=dev_needed,
 277                         a=all_devs,
 278                         ))
 279             devs = all_devs[0:dev_needed]
 280             # rest of the devices can be used for journal if required
 281             jdevs = dev_needed
 282             for device in devs:
 283                 device_split = device.split('/')
 284                 lv_device = device_split[-2] + '/' + device_split[-1]
 285                 if config.get('filestore') is not None:
 286                     osd_create_cmd += '--filestore --data ' + lv_device + ' '
 287                     # filestore with ceph-volume also needs journal disk
 288                     try:
 289                         jdevice = all_devs.pop(jdevs)
 290                     except IndexError:
 291                         raise RuntimeError("No device available for \
 292                                             journal configuration")
 293                     jdevice_split = jdevice.split('/')
 294                     j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
 295                     osd_create_cmd += '--journal ' + j_lv
 296                 else:
 297                     osd_create_cmd += ' --data ' + lv_device
 298                 estatus_osd = execute_ceph_deploy(osd_create_cmd)
 299                 if estatus_osd == 0:
 300                     log.info('successfully created osd')
 301                     no_of_osds += 1
 302                 else:
 303                     raise RuntimeError("ceph-deploy: Failed to create osds")
 304         return no_of_osds
 305
 306     try:
 307         log.info('Building ceph cluster using ceph-deploy...')
 308         testdir = teuthology.get_testdir(ctx)
 309         ceph_branch = None
 310         if config.get('branch') is not None:
 311             cbranch = config.get('branch')
 312             for var, val in cbranch.items():
 313                 ceph_branch = '--{var}={val}'.format(var=var, val=val)
 314         all_nodes = get_all_nodes(ctx, config)
 315         mds_nodes = get_nodes_using_role(ctx, 'mds')
 316         mds_nodes = " ".join(mds_nodes)
 317         mon_node = get_nodes_using_role(ctx, 'mon')
 318         mon_nodes = " ".join(mon_node)
 319         # skip mgr based on config item
 320         # this is needed when test uses latest code to install old ceph
 321         # versions
 322         skip_mgr = config.get('skip-mgr', False)
 323         if not skip_mgr:
 324             mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 325             mgr_nodes = " ".join(mgr_nodes)
 326         new_mon = './ceph-deploy new' + " " + mon_nodes
 327         if not skip_mgr:
 328             mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 329         mon_hostname = mon_nodes.split(' ')[0]
 330         mon_hostname = str(mon_hostname)
 331         gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
 332         deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
 333
 334         if mon_nodes is None:
 335             raise RuntimeError("no monitor nodes in the config file")
 336
 337         estatus_new = execute_ceph_deploy(new_mon)
 338         if estatus_new != 0:
 339             raise RuntimeError("ceph-deploy: new command failed")
 340
 341         log.info('adding config inputs...')
 342         testdir = teuthology.get_testdir(ctx)
 343         conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
 344
 345         if config.get('conf') is not None:
 346             confp = config.get('conf')
 347             for section, keys in confp.items():
 348                 lines = '[{section}]\n'.format(section=section)
 349                 ceph_admin.sudo_write_file(conf_path, lines, append=True)
 350                 for key, value in keys.items():
 351                     log.info("[%s] %s = %s" % (section, key, value))
 352                     lines = '{key} = {value}\n'.format(key=key, value=value)
 353                     ceph_admin.sudo_write_file(conf_path, lines, append=True)
 354
 355         # install ceph
 356         dev_branch = ctx.config['branch']
 357         branch = '--dev={branch}'.format(branch=dev_branch)
 358         if ceph_branch:
 359             option = ceph_branch
 360         else:
 361             option = branch
 362         install_nodes = './ceph-deploy install ' + option + " " + all_nodes
 363         estatus_install = execute_ceph_deploy(install_nodes)
 364         if estatus_install != 0:
 365             raise RuntimeError("ceph-deploy: Failed to install ceph")
 366         # install ceph-test package too
 367         install_nodes2 = './ceph-deploy install --tests ' + option + \
 368                          " " + all_nodes
 369         estatus_install = execute_ceph_deploy(install_nodes2)
 370         if estatus_install != 0:
 371             raise RuntimeError("ceph-deploy: Failed to install ceph-test")
 372
 373         mon_create_nodes = './ceph-deploy mon create-initial'
 374         # If the following fails, it is OK, it might just be that the monitors
 375         # are taking way more than a minute/monitor to form quorum, so lets
 376         # try the next block which will wait up to 15 minutes to gatherkeys.
 377         execute_ceph_deploy(mon_create_nodes)
 378
 379         estatus_gather = execute_ceph_deploy(gather_keys)
 380         if estatus_gather != 0:
 381             raise RuntimeError("ceph-deploy: Failed during gather keys")
 382
 383         # install admin key on mons (ceph-create-keys doesn't do this any more)
 384         mons = ctx.cluster.only(teuthology.is_type('mon'))
 385         for remote in mons.remotes.keys():
 386             execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)
 387
 388         # create osd's
 389         if config.get('use-ceph-volume', False):
 390             no_of_osds = ceph_volume_osd_create(ctx, config)
 391         else:
 392             # this method will only work with ceph-deploy v1.5.39 or older
 393             no_of_osds = ceph_disk_osd_create(ctx, config)
 394
 395         if not skip_mgr:
 396             execute_ceph_deploy(mgr_create)
 397
 398         if mds_nodes:
 399             estatus_mds = execute_ceph_deploy(deploy_mds)
 400             if estatus_mds != 0:
 401                 raise RuntimeError("ceph-deploy: Failed to deploy mds")
 402
 403         if config.get('test_mon_destroy') is not None:
 404             for d in range(1, len(mon_node)):
 405                 mon_destroy_nodes = './ceph-deploy mon destroy' + \
 406                     " " + mon_node[d]
 407                 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
 408                 if estatus_mon_d != 0:
 409                     raise RuntimeError("ceph-deploy: Failed to delete monitor")
 410
 411
 412
 413         if config.get('wait-for-healthy', True) and no_of_osds >= 2:
 414             is_healthy(ctx=ctx, config=None)
 415
 416             log.info('Setting up client nodes...')
 417             conf_path = '/etc/ceph/ceph.conf'
 418             admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
 419             first_mon = teuthology.get_first_mon(ctx, config)
 420             (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
 421             conf_data = mon0_remote.read_file(conf_path, sudo=True)
 422             admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True)
 423
 424             clients = ctx.cluster.only(teuthology.is_type('client'))
 425             for remote, roles_for_host in clients.remotes.items():
 426                 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
 427                     client_keyring = \
 428                         '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
 429                     mon0_remote.run(
 430                         args=[
 431                             'cd',
 432                             '{tdir}'.format(tdir=testdir),
 433                             run.Raw('&&'),
 434                             'sudo', 'bash', '-c',
 435                             run.Raw('"'), 'ceph',
 436                             'auth',
 437                             'get-or-create',
 438                             'client.{id}'.format(id=id_),
 439                             'mds', 'allow',
 440                             'mon', 'allow *',
 441                             'osd', 'allow *',
 442                             run.Raw('>'),
 443                             client_keyring,
 444                             run.Raw('"'),
 445                         ],
 446                     )
 447                     key_data = mon0_remote.read_file(
 448                         path=client_keyring,
 449                         sudo=True,
 450                     )
 451                     remote.sudo_write_file(
 452                         path=client_keyring,
 453                         data=key_data,
 454                         mode='0644'
 455                     )
 456                     remote.sudo_write_file(
 457                         path=admin_keyring_path,
 458                         data=admin_keyring,
 459                         mode='0644'
 460                     )
 461                     remote.sudo_write_file(
 462                         path=conf_path,
 463                         data=conf_data,
 464                         mode='0644'
 465                     )
 466
 467             if mds_nodes:
 468                 log.info('Configuring CephFS...')
 469                 Filesystem(ctx, create=True)
 470         elif not config.get('only_mon'):
 471             raise RuntimeError(
 472                 "The cluster is NOT operational due to insufficient OSDs")
 473         # create rbd pool
 474         ceph_admin.run(
 475             args=[
 476                 'sudo', 'ceph', '--cluster', 'ceph',
 477                 'osd', 'pool', 'create', 'rbd', '128', '128'],
 478             check_status=False)
 479         ceph_admin.run(
 480             args=[
 481                 'sudo', 'ceph', '--cluster', 'ceph',
 482                 'osd', 'pool', 'application', 'enable',
 483                 'rbd', 'rbd', '--yes-i-really-mean-it'
 484                 ],
 485             check_status=False)
 486         yield
 487
 488     except Exception:
 489         log.info(
 490             "Error encountered, logging exception before tearing down ceph-deploy")
 491         log.info(traceback.format_exc())
 492         raise
 493     finally:
 494         if config.get('keep_running'):
 495             return
 496         log.info('Stopping ceph...')
 497         ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
 498                         check_status=False)
 499         time.sleep(4)
 500
 501         # and now just check for the processes themselves, as if upstart/sysvinit
 502         # is lying to us. Ignore errors if the grep fails
 503         ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
 504                               'grep', '-v', 'grep', run.Raw('|'),
 505                               'grep', 'ceph'], check_status=False)
 506         ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
 507                               'grep', 'ceph'], check_status=False)
 508
 509         if ctx.archive is not None:
 510             # archive mon data, too
 511             log.info('Archiving mon data...')
 512             path = os.path.join(ctx.archive, 'data')
 513             os.makedirs(path)
 514             mons = ctx.cluster.only(teuthology.is_type('mon'))
 515             for remote, roles in mons.remotes.items():
 516                 for role in roles:
 517                     if role.startswith('mon.'):
 518                         teuthology.pull_directory_tarball(
 519                             remote,
 520                             '/var/lib/ceph/mon',
 521                             path + '/' + role + '.tgz')
 522
 523             log.info('Compressing logs...')
 524             run.wait(
 525                 ctx.cluster.run(
 526                     args=[
 527                         'sudo',
 528                         'find',
 529                         '/var/log/ceph',
 530                         '-name',
 531                         '*.log',
 532                         '-print0',
 533                         run.Raw('|'),
 534                         'sudo',
 535                         'xargs',
 536                         '-0',
 537                         '--no-run-if-empty',
 538                         '--',
 539                         'gzip',
 540                         '--',
 541                     ],
 542                     wait=False,
 543                 ),
 544             )
 545
 546             log.info('Archiving logs...')
 547             path = os.path.join(ctx.archive, 'remote')
 548             os.makedirs(path)
 549             for remote in ctx.cluster.remotes.keys():
 550                 sub = os.path.join(path, remote.shortname)
 551                 os.makedirs(sub)
 552                 teuthology.pull_directory(remote, '/var/log/ceph',
 553                                           os.path.join(sub, 'log'))
 554
 555         # Prevent these from being undefined if the try block fails
 556         all_nodes = get_all_nodes(ctx, config)
 557         purge_nodes = './ceph-deploy purge' + " " + all_nodes
 558         purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
 559
 560         log.info('Purging package...')
 561         execute_ceph_deploy(purge_nodes)
 562         log.info('Purging data...')
 563         execute_ceph_deploy(purgedata_nodes)
 564
 565
 566 @contextlib.contextmanager
 567 def cli_test(ctx, config):
 568     """
 569      ceph-deploy cli to exercise most commonly use cli's and ensure
 570      all commands works and also startup the init system.
 571
 572     """
 573     log.info('Ceph-deploy Test')
 574     if config is None:
 575         config = {}
 576     test_branch = ''
 577     conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
 578
 579     def execute_cdeploy(admin, cmd, path):
 580         """Execute ceph-deploy commands """
 581         """Either use git path or repo path """
 582         args = ['cd', conf_dir, run.Raw(';')]
 583         if path:
 584             args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
 585         else:
 586             args.append('ceph-deploy')
 587         args.append(run.Raw(cmd))
 588         ec = admin.run(args=args, check_status=False).exitstatus
 589         if ec != 0:
 590             raise RuntimeError(
 591                 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
 592
 593     if config.get('rhbuild'):
 594         path = None
 595     else:
 596         path = teuthology.get_testdir(ctx)
 597         # test on branch from config eg: wip-* , master or next etc
 598         # packages for all distro's should exist for wip*
 599         if ctx.config.get('branch'):
 600             branch = ctx.config.get('branch')
 601             test_branch = ' --dev={branch} '.format(branch=branch)
 602     mons = ctx.cluster.only(teuthology.is_type('mon'))
 603     for node, role in mons.remotes.items():
 604         admin = node
 605         admin.run(args=['mkdir', conf_dir], check_status=False)
 606         nodename = admin.shortname
 607     system_type = teuthology.get_system_type(admin)
 608     if config.get('rhbuild'):
 609         admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
 610     log.info('system type is %s', system_type)
 611     osds = ctx.cluster.only(teuthology.is_type('osd'))
 612
 613     for remote, roles in osds.remotes.items():
 614         devs = teuthology.get_scratch_devices(remote)
 615         log.info("roles %s", roles)
 616         if (len(devs) < 3):
 617             log.error(
 618                 'Test needs minimum of 3 devices, only found %s',
 619                 str(devs))
 620             raise RuntimeError("Needs minimum of 3 devices ")
 621
 622     conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
 623     new_cmd = 'new ' + nodename
 624     execute_cdeploy(admin, new_cmd, path)
 625     if config.get('conf') is not None:
 626         confp = config.get('conf')
 627         for section, keys in confp.items():
 628             lines = '[{section}]\n'.format(section=section)
 629             admin.sudo_write_file(conf_path, lines, append=True)
 630             for key, value in keys.items():
 631                 log.info("[%s] %s = %s" % (section, key, value))
 632                 lines = '{key} = {value}\n'.format(key=key, value=value)
 633                 admin.sudo_write_file(conf_path, lines, append=True)
 634     new_mon_install = 'install {branch} --mon '.format(
 635         branch=test_branch) + nodename
 636     new_mgr_install = 'install {branch} --mgr '.format(
 637         branch=test_branch) + nodename
 638     new_osd_install = 'install {branch} --osd '.format(
 639         branch=test_branch) + nodename
 640     new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
 641     create_initial = 'mon create-initial '
 642     mgr_create = 'mgr create ' + nodename
 643     # either use create-keys or push command
 644     push_keys = 'admin ' + nodename
 645     execute_cdeploy(admin, new_mon_install, path)
 646     execute_cdeploy(admin, new_mgr_install, path)
 647     execute_cdeploy(admin, new_osd_install, path)
 648     execute_cdeploy(admin, new_admin, path)
 649     execute_cdeploy(admin, create_initial, path)
 650     execute_cdeploy(admin, mgr_create, path)
 651     execute_cdeploy(admin, push_keys, path)
 652
 653     for i in range(3):
 654         zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
 655         prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
 656         execute_cdeploy(admin, zap_disk, path)
 657         execute_cdeploy(admin, prepare, path)
 658
 659     log.info("list files for debugging purpose to check file permissions")
 660     admin.run(args=['ls', run.Raw('-lt'), conf_dir])
 661     remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
 662     out = remote.sh('sudo ceph health')
 663     log.info('Ceph health: %s', out.rstrip('\n'))
 664     log.info("Waiting for cluster to become healthy")
 665     with contextutil.safe_while(sleep=10, tries=6,
 666                                 action='check health') as proceed:
 667         while proceed():
 668             out = remote.sh('sudo ceph health')
 669             if (out.split(None, 1)[0] == 'HEALTH_OK'):
 670                 break
 671     rgw_install = 'install {branch} --rgw {node}'.format(
 672         branch=test_branch,
 673         node=nodename,
 674     )
 675     rgw_create = 'rgw create ' + nodename
 676     execute_cdeploy(admin, rgw_install, path)
 677     execute_cdeploy(admin, rgw_create, path)
 678     log.info('All ceph-deploy cli tests passed')
 679     try:
 680         yield
 681     finally:
 682         log.info("cleaning up")
 683         ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
 684                         check_status=False)
 685         time.sleep(4)
 686         for i in range(3):
 687             umount_dev = "{d}1".format(d=devs[i])
 688             remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
 689         cmd = 'purge ' + nodename
 690         execute_cdeploy(admin, cmd, path)
 691         cmd = 'purgedata ' + nodename
 692         execute_cdeploy(admin, cmd, path)
 693         log.info("Removing temporary dir")
 694         admin.run(
 695             args=[
 696                 'rm',
 697                 run.Raw('-rf'),
 698                 run.Raw(conf_dir)],
 699             check_status=False)
 700         if config.get('rhbuild'):
 701             admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
 702
 703
 704 @contextlib.contextmanager
 705 def single_node_test(ctx, config):
 706     """
 707     - ceph-deploy.single_node_test: null
 708
 709     #rhbuild testing
 710     - ceph-deploy.single_node_test:
 711         rhbuild: 1.2.3
 712
 713     """
 714     log.info("Testing ceph-deploy on single node")
 715     if config is None:
 716         config = {}
 717     overrides = ctx.config.get('overrides', {})
 718     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 719
 720     if config.get('rhbuild'):
 721         log.info("RH Build, Skip Download")
 722         with contextutil.nested(
 723             lambda: cli_test(ctx=ctx, config=config),
 724         ):
 725             yield
 726     else:
 727         with contextutil.nested(
 728             lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 729             lambda: download_ceph_deploy(ctx=ctx, config=config),
 730             lambda: cli_test(ctx=ctx, config=config),
 731         ):
 732             yield
 733
 734
 735 @contextlib.contextmanager
 736 def upgrade(ctx, config):
 737     """
 738      Upgrade using ceph-deploy
 739      eg:
 740        ceph-deploy.upgrade:
 741           # to upgrade to specific branch, use
 742           branch:
 743              stable: jewel
 744            # to setup mgr node, use
 745            setup-mgr-node: True
 746            # to wait for cluster to be healthy after all upgrade, use
 747            wait-for-healthy: True
 748            role: (upgrades the below roles serially)
 749               mon.a
 750               mon.b
 751               osd.0
 752      """
 753     roles = config.get('roles')
 754     # get the roles that are mapped as per ceph-deploy
 755     # roles are mapped for mon/mds eg: mon.a  => mon.host_short_name
 756     mapped_role = ctx.cluster.mapped_role
 757     log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
 758     if config.get('branch'):
 759         branch = config.get('branch')
 760         (var, val) = branch.items()[0]
 761         ceph_branch = '--{var}={val}'.format(var=var, val=val)
 762     else:
 763         # default to wip-branch under test
 764         dev_branch = ctx.config['branch']
 765         ceph_branch = '--dev={branch}'.format(branch=dev_branch)
 766     # get the node used for initial deployment which is mon.a
 767     mon_a = mapped_role.get('mon.a')
 768     (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys()
 769     testdir = teuthology.get_testdir(ctx)
 770     cmd = './ceph-deploy install ' + ceph_branch
 771     for role in roles:
 772         # check if this role is mapped (mon or mds)
 773         if mapped_role.get(role):
 774             role = mapped_role.get(role)
 775         remotes_and_roles = ctx.cluster.only(role).remotes
 776         for remote, roles in remotes_and_roles.items():
 777             nodename = remote.shortname
 778             cmd = cmd + ' ' + nodename
 779             log.info("Upgrading ceph on  %s", nodename)
 780             ceph_admin.run(
 781                 args=[
 782                     'cd',
 783                     '{tdir}/ceph-deploy'.format(tdir=testdir),
 784                     run.Raw('&&'),
 785                     run.Raw(cmd),
 786                 ],
 787             )
 788             # restart all ceph services, ideally upgrade should but it does not
 789             remote.run(
 790                 args=[
 791                     'sudo', 'systemctl', 'restart', 'ceph.target'
 792                 ]
 793             )
 794             ceph_admin.run(args=['sudo', 'ceph', '-s'])
 795
 796     # workaround for http://tracker.ceph.com/issues/20950
 797     # write the correct mgr key to disk
 798     if config.get('setup-mgr-node', None):
 799         mons = ctx.cluster.only(teuthology.is_type('mon'))
 800         for remote, roles in mons.remotes.items():
 801             remote.run(
 802                 args=[
 803                     run.Raw('sudo ceph auth get client.bootstrap-mgr'),
 804                     run.Raw('|'),
 805                     run.Raw('sudo tee'),
 806                     run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
 807                 ]
 808             )
 809
 810     if config.get('setup-mgr-node', None):
 811         mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 812         mgr_nodes = " ".join(mgr_nodes)
 813         mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
 814         mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 815         # install mgr
 816         ceph_admin.run(
 817             args=[
 818                 'cd',
 819                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 820                 run.Raw('&&'),
 821                 run.Raw(mgr_install),
 822                 ],
 823             )
 824         # create mgr
 825         ceph_admin.run(
 826             args=[
 827                 'cd',
 828                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 829                 run.Raw('&&'),
 830                 run.Raw(mgr_create),
 831                 ],
 832             )
 833         ceph_admin.run(args=['sudo', 'ceph', '-s'])
 834     if config.get('wait-for-healthy', None):
 835         wait_until_healthy(ctx, ceph_admin, use_sudo=True)
 836     yield
 837
 838
 839 @contextlib.contextmanager
 840 def task(ctx, config):
 841     """
 842     Set up and tear down a Ceph cluster.
 843
 844     For example::
 845
 846         tasks:
 847         - install:
 848              extras: yes
 849         - ssh_keys:
 850         - ceph-deploy:
 851              branch:
 852                 stable: bobtail
 853              mon_initial_members: 1
 854              ceph-deploy-branch: my-ceph-deploy-branch
 855              only_mon: true
 856              keep_running: true
 857              # either choose bluestore or filestore, default is bluestore
 858              bluestore: True
 859              # or
 860              filestore: True
 861              # skip install of mgr for old release using below flag
 862              skip-mgr: True  ( default is False )
 863              # to use ceph-volume instead of ceph-disk
 864              # ceph-disk can only be used with old ceph-deploy release from pypi
 865              use-ceph-volume: true
 866
 867         tasks:
 868         - install:
 869              extras: yes
 870         - ssh_keys:
 871         - ceph-deploy:
 872              branch:
 873                 dev: master
 874              conf:
 875                 mon:
 876                    debug mon = 20
 877
 878         tasks:
 879         - install:
 880              extras: yes
 881         - ssh_keys:
 882         - ceph-deploy:
 883              branch:
 884                 testing:
 885              dmcrypt: yes
 886              separate_journal_disk: yes
 887
 888     """
 889     if config is None:
 890         config = {}
 891
 892     assert isinstance(config, dict), \
 893         "task ceph-deploy only supports a dictionary for configuration"
 894
 895     overrides = ctx.config.get('overrides', {})
 896     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 897
 898     if config.get('branch') is not None:
 899         assert isinstance(
 900             config['branch'], dict), 'branch must be a dictionary'
 901
 902     log.info('task ceph-deploy with config ' + str(config))
 903
 904     # we need to use 1.5.39-stable for testing jewel or master branch with
 905     # ceph-disk
 906     if config.get('use-ceph-volume', False) is False:
 907         # check we are not testing specific branch
 908         if config.get('ceph-deploy-branch', False) is False:
 909             config['ceph-deploy-branch'] = '1.5.39-stable'
 910
 911     with contextutil.nested(
 912         lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 913         lambda: download_ceph_deploy(ctx=ctx, config=config),
 914         lambda: build_ceph_cluster(ctx=ctx, config=config),
 915     ):
 916         yield