ceph/qa/tasks/ceph_deploy.py

   1 """
   2 Execute ceph-deploy as a task
   3 """
   4 from cStringIO import StringIO
   5
   6 import contextlib
   7 import os
   8 import time
   9 import logging
  10 import traceback
  11
  12 from teuthology import misc as teuthology
  13 from teuthology import contextutil
  14 from teuthology.config import config as teuth_config
  15 from teuthology.task import install as install_fn
  16 from teuthology.orchestra import run
  17 from tasks.cephfs.filesystem import Filesystem
  18 from teuthology.misc import wait_until_healthy
  19
  20 log = logging.getLogger(__name__)
  21
  22
  23 @contextlib.contextmanager
  24 def download_ceph_deploy(ctx, config):
  25     """
  26     Downloads ceph-deploy from the ceph.com git mirror and (by default)
  27     switches to the master branch. If the `ceph-deploy-branch` is specified, it
  28     will use that instead. The `bootstrap` script is ran, with the argument
  29     obtained from `python_version`, if specified.
  30     """
  31     # use mon.a for ceph_admin
  32     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
  33
  34     try:
  35         py_ver = str(config['python_version'])
  36     except KeyError:
  37         pass
  38     else:
  39         supported_versions = ['2', '3']
  40         if py_ver not in supported_versions:
  41             raise ValueError("python_version must be: {}, not {}".format(
  42                 ' or '.join(supported_versions), py_ver
  43             ))
  44
  45         log.info("Installing Python")
  46         system_type = teuthology.get_system_type(ceph_admin)
  47
  48         if system_type == 'rpm':
  49             package = 'python34' if py_ver == '3' else 'python'
  50             ctx.cluster.run(args=[
  51                 'sudo', 'yum', '-y', 'install',
  52                 package, 'python-virtualenv'
  53             ])
  54         else:
  55             package = 'python3' if py_ver == '3' else 'python'
  56             ctx.cluster.run(args=[
  57                 'sudo', 'apt-get', '-y', '--force-yes', 'install',
  58                 package, 'python-virtualenv'
  59             ])
  60
  61     log.info('Downloading ceph-deploy...')
  62     testdir = teuthology.get_testdir(ctx)
  63     ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
  64
  65     ceph_admin.run(
  66         args=[
  67             'git', 'clone', '-b', ceph_deploy_branch,
  68             teuth_config.ceph_git_base_url + 'ceph-deploy.git',
  69             '{tdir}/ceph-deploy'.format(tdir=testdir),
  70         ],
  71     )
  72     args = [
  73         'cd',
  74         '{tdir}/ceph-deploy'.format(tdir=testdir),
  75         run.Raw('&&'),
  76         './bootstrap',
  77     ]
  78     try:
  79         args.append(str(config['python_version']))
  80     except KeyError:
  81         pass
  82     ceph_admin.run(args=args)
  83
  84     try:
  85         yield
  86     finally:
  87         log.info('Removing ceph-deploy ...')
  88         ceph_admin.run(
  89             args=[
  90                 'rm',
  91                 '-rf',
  92                 '{tdir}/ceph-deploy'.format(tdir=testdir),
  93             ],
  94         )
  95
  96
  97 def is_healthy(ctx, config):
  98     """Wait until a Ceph cluster is healthy."""
  99     testdir = teuthology.get_testdir(ctx)
 100     ceph_admin = teuthology.get_first_mon(ctx, config)
 101     (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
 102     max_tries = 90  # 90 tries * 10 secs --> 15 minutes
 103     tries = 0
 104     while True:
 105         tries += 1
 106         if tries >= max_tries:
 107             msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
 108             remote.run(
 109                 args=[
 110                     'cd',
 111                     '{tdir}'.format(tdir=testdir),
 112                     run.Raw('&&'),
 113                     'sudo', 'ceph',
 114                     'report',
 115                 ],
 116             )
 117             raise RuntimeError(msg)
 118
 119         r = remote.run(
 120             args=[
 121                 'cd',
 122                 '{tdir}'.format(tdir=testdir),
 123                 run.Raw('&&'),
 124                 'sudo', 'ceph',
 125                 'health',
 126             ],
 127             stdout=StringIO(),
 128             logger=log.getChild('health'),
 129         )
 130         out = r.stdout.getvalue()
 131         log.info('Ceph health: %s', out.rstrip('\n'))
 132         if out.split(None, 1)[0] == 'HEALTH_OK':
 133             break
 134         time.sleep(10)
 135
 136
 137 def get_nodes_using_role(ctx, target_role):
 138     """
 139     Extract the names of nodes that match a given role from a cluster, and modify the
 140     cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
 141     uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
 142     """
 143
 144     # Nodes containing a service of the specified role
 145     nodes_of_interest = []
 146
 147     # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
 148     modified_remotes = {}
 149     ceph_deploy_mapped = dict()
 150     for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
 151         modified_remotes[_remote] = []
 152         for svc_id in roles_for_host:
 153             if svc_id.startswith("{0}.".format(target_role)):
 154                 fqdn = str(_remote).split('@')[-1]
 155                 nodename = str(str(_remote).split('.')[0]).split('@')[1]
 156                 if target_role == 'mon':
 157                     nodes_of_interest.append(fqdn)
 158                 else:
 159                     nodes_of_interest.append(nodename)
 160                 mapped_role = "{0}.{1}".format(target_role, nodename)
 161                 modified_remotes[_remote].append(mapped_role)
 162                 # keep dict of mapped role for later use by tasks
 163                 # eg. mon.a => mon.node1
 164                 ceph_deploy_mapped[svc_id] = mapped_role
 165             else:
 166                 modified_remotes[_remote].append(svc_id)
 167
 168     ctx.cluster.remotes = modified_remotes
 169     ctx.cluster.mapped_role = ceph_deploy_mapped
 170
 171     return nodes_of_interest
 172
 173
 174 def get_dev_for_osd(ctx, config):
 175     """Get a list of all osd device names."""
 176     osd_devs = []
 177     for remote, roles_for_host in ctx.cluster.remotes.iteritems():
 178         host = remote.name.split('@')[-1]
 179         shortname = host.split('.')[0]
 180         devs = teuthology.get_scratch_devices(remote)
 181         num_osd_per_host = list(
 182             teuthology.roles_of_type(
 183                 roles_for_host, 'osd'))
 184         num_osds = len(num_osd_per_host)
 185         if config.get('separate_journal_disk') is not None:
 186             num_devs_reqd = 2 * num_osds
 187             assert num_devs_reqd <= len(
 188                 devs), 'fewer data and journal disks than required ' + shortname
 189             for dindex in range(0, num_devs_reqd, 2):
 190                 jd_index = dindex + 1
 191                 dev_short = devs[dindex].split('/')[-1]
 192                 jdev_short = devs[jd_index].split('/')[-1]
 193                 osd_devs.append((shortname, dev_short, jdev_short))
 194         else:
 195             assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
 196             for dev in devs[:num_osds]:
 197                 dev_short = dev.split('/')[-1]
 198                 osd_devs.append((shortname, dev_short))
 199     return osd_devs
 200
 201
 202 def get_all_nodes(ctx, config):
 203     """Return a string of node names separated by blanks"""
 204     nodelist = []
 205     for t, k in ctx.config['targets'].iteritems():
 206         host = t.split('@')[-1]
 207         simple_host = host.split('.')[0]
 208         nodelist.append(simple_host)
 209     nodelist = " ".join(nodelist)
 210     return nodelist
 211
 212 @contextlib.contextmanager
 213 def build_ceph_cluster(ctx, config):
 214     """Build a ceph cluster"""
 215
 216     # Expect to find ceph_admin on the first mon by ID, same place that the download task
 217     # puts it.  Remember this here, because subsequently IDs will change from those in
 218     # the test config to those that ceph-deploy invents.
 219
 220     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
 221
 222     def execute_ceph_deploy(cmd):
 223         """Remotely execute a ceph_deploy command"""
 224         return ceph_admin.run(
 225             args=[
 226                 'cd',
 227                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 228                 run.Raw('&&'),
 229                 run.Raw(cmd),
 230             ],
 231             check_status=False,
 232         ).exitstatus
 233
 234     def ceph_disk_osd_create(ctx, config):
 235         node_dev_list = get_dev_for_osd(ctx, config)
 236         no_of_osds = 0
 237         for d in node_dev_list:
 238             node = d[0]
 239             for disk in d[1:]:
 240                 zap = './ceph-deploy disk zap ' + node + ':' + disk
 241                 estatus = execute_ceph_deploy(zap)
 242                 if estatus != 0:
 243                     raise RuntimeError("ceph-deploy: Failed to zap osds")
 244             osd_create_cmd = './ceph-deploy osd create '
 245             # first check for filestore, default is bluestore with ceph-deploy
 246             if config.get('filestore') is not None:
 247                 osd_create_cmd += '--filestore '
 248             elif config.get('bluestore') is not None:
 249                 osd_create_cmd += '--bluestore '
 250             if config.get('dmcrypt') is not None:
 251                 osd_create_cmd += '--dmcrypt '
 252             osd_create_cmd += ":".join(d)
 253             estatus_osd = execute_ceph_deploy(osd_create_cmd)
 254             if estatus_osd == 0:
 255                 log.info('successfully created osd')
 256                 no_of_osds += 1
 257             else:
 258                 raise RuntimeError("ceph-deploy: Failed to create osds")
 259         return no_of_osds
 260
 261     def ceph_volume_osd_create(ctx, config):
 262         osds = ctx.cluster.only(teuthology.is_type('osd'))
 263         no_of_osds = 0
 264         for remote in osds.remotes.iterkeys():
 265             # all devs should be lvm
 266             osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
 267             # default is bluestore so we just need config item for filestore
 268             roles = ctx.cluster.remotes[remote]
 269             dev_needed = len([role for role in roles
 270                               if role.startswith('osd')])
 271             all_devs = teuthology.get_scratch_devices(remote)
 272             log.info("node={n}, need_devs={d}, available={a}".format(
 273                         n=remote.shortname,
 274                         d=dev_needed,
 275                         a=all_devs,
 276                         ))
 277             devs = all_devs[0:dev_needed]
 278             # rest of the devices can be used for journal if required
 279             jdevs = dev_needed
 280             for device in devs:
 281                 device_split = device.split('/')
 282                 lv_device = device_split[-2] + '/' + device_split[-1]
 283                 if config.get('filestore') is not None:
 284                     osd_create_cmd += '--filestore --data ' + lv_device + ' '
 285                     # filestore with ceph-volume also needs journal disk
 286                     try:
 287                         jdevice = all_devs.pop(jdevs)
 288                     except IndexError:
 289                         raise RuntimeError("No device available for \
 290                                             journal configuration")
 291                     jdevice_split = jdevice.split('/')
 292                     j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
 293                     osd_create_cmd += '--journal ' + j_lv
 294                 else:
 295                     osd_create_cmd += ' --data ' + lv_device
 296                 estatus_osd = execute_ceph_deploy(osd_create_cmd)
 297                 if estatus_osd == 0:
 298                     log.info('successfully created osd')
 299                     no_of_osds += 1
 300                 else:
 301                     raise RuntimeError("ceph-deploy: Failed to create osds")
 302         return no_of_osds
 303
 304     try:
 305         log.info('Building ceph cluster using ceph-deploy...')
 306         testdir = teuthology.get_testdir(ctx)
 307         ceph_branch = None
 308         if config.get('branch') is not None:
 309             cbranch = config.get('branch')
 310             for var, val in cbranch.iteritems():
 311                 ceph_branch = '--{var}={val}'.format(var=var, val=val)
 312         all_nodes = get_all_nodes(ctx, config)
 313         mds_nodes = get_nodes_using_role(ctx, 'mds')
 314         mds_nodes = " ".join(mds_nodes)
 315         mon_node = get_nodes_using_role(ctx, 'mon')
 316         mon_nodes = " ".join(mon_node)
 317         # skip mgr based on config item
 318         # this is needed when test uses latest code to install old ceph
 319         # versions
 320         skip_mgr = config.get('skip-mgr', False)
 321         if not skip_mgr:
 322             mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 323             mgr_nodes = " ".join(mgr_nodes)
 324         new_mon = './ceph-deploy new' + " " + mon_nodes
 325         if not skip_mgr:
 326             mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 327         mon_hostname = mon_nodes.split(' ')[0]
 328         mon_hostname = str(mon_hostname)
 329         gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
 330         deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
 331
 332         if mon_nodes is None:
 333             raise RuntimeError("no monitor nodes in the config file")
 334
 335         estatus_new = execute_ceph_deploy(new_mon)
 336         if estatus_new != 0:
 337             raise RuntimeError("ceph-deploy: new command failed")
 338
 339         log.info('adding config inputs...')
 340         testdir = teuthology.get_testdir(ctx)
 341         conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
 342
 343         if config.get('conf') is not None:
 344             confp = config.get('conf')
 345             for section, keys in confp.iteritems():
 346                 lines = '[{section}]\n'.format(section=section)
 347                 teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
 348                                                 sudo=True)
 349                 for key, value in keys.iteritems():
 350                     log.info("[%s] %s = %s" % (section, key, value))
 351                     lines = '{key} = {value}\n'.format(key=key, value=value)
 352                     teuthology.append_lines_to_file(
 353                         ceph_admin, conf_path, lines, sudo=True)
 354
 355         # install ceph
 356         dev_branch = ctx.config['branch']
 357         branch = '--dev={branch}'.format(branch=dev_branch)
 358         if ceph_branch:
 359             option = ceph_branch
 360         else:
 361             option = branch
 362         install_nodes = './ceph-deploy install ' + option + " " + all_nodes
 363         estatus_install = execute_ceph_deploy(install_nodes)
 364         if estatus_install != 0:
 365             raise RuntimeError("ceph-deploy: Failed to install ceph")
 366         # install ceph-test package too
 367         install_nodes2 = './ceph-deploy install --tests ' + option + \
 368                          " " + all_nodes
 369         estatus_install = execute_ceph_deploy(install_nodes2)
 370         if estatus_install != 0:
 371             raise RuntimeError("ceph-deploy: Failed to install ceph-test")
 372
 373         mon_create_nodes = './ceph-deploy mon create-initial'
 374         # If the following fails, it is OK, it might just be that the monitors
 375         # are taking way more than a minute/monitor to form quorum, so lets
 376         # try the next block which will wait up to 15 minutes to gatherkeys.
 377         execute_ceph_deploy(mon_create_nodes)
 378
 379         # create-keys is explicit now
 380         # http://tracker.ceph.com/issues/16036
 381         mons = ctx.cluster.only(teuthology.is_type('mon'))
 382         for remote in mons.remotes.iterkeys():
 383             remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
 384                              '--id', remote.shortname])
 385
 386         estatus_gather = execute_ceph_deploy(gather_keys)
 387         if estatus_gather != 0:
 388             raise RuntimeError("ceph-deploy: Failed during gather keys")
 389         # create osd's
 390         if config.get('use-ceph-volume', False):
 391             no_of_osds = ceph_volume_osd_create(ctx, config)
 392         else:
 393             # this method will only work with ceph-deploy v1.5.39 or older
 394             no_of_osds = ceph_disk_osd_create(ctx, config)
 395
 396         if not skip_mgr:
 397             execute_ceph_deploy(mgr_create)
 398
 399         if mds_nodes:
 400             estatus_mds = execute_ceph_deploy(deploy_mds)
 401             if estatus_mds != 0:
 402                 raise RuntimeError("ceph-deploy: Failed to deploy mds")
 403
 404         if config.get('test_mon_destroy') is not None:
 405             for d in range(1, len(mon_node)):
 406                 mon_destroy_nodes = './ceph-deploy mon destroy' + \
 407                     " " + mon_node[d]
 408                 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
 409                 if estatus_mon_d != 0:
 410                     raise RuntimeError("ceph-deploy: Failed to delete monitor")
 411
 412
 413
 414         if config.get('wait-for-healthy', True) and no_of_osds >= 2:
 415             is_healthy(ctx=ctx, config=None)
 416
 417             log.info('Setting up client nodes...')
 418             conf_path = '/etc/ceph/ceph.conf'
 419             admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
 420             first_mon = teuthology.get_first_mon(ctx, config)
 421             (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
 422             conf_data = teuthology.get_file(
 423                 remote=mon0_remote,
 424                 path=conf_path,
 425                 sudo=True,
 426             )
 427             admin_keyring = teuthology.get_file(
 428                 remote=mon0_remote,
 429                 path=admin_keyring_path,
 430                 sudo=True,
 431             )
 432
 433             clients = ctx.cluster.only(teuthology.is_type('client'))
 434             for remot, roles_for_host in clients.remotes.iteritems():
 435                 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
 436                     client_keyring = \
 437                         '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
 438                     mon0_remote.run(
 439                         args=[
 440                             'cd',
 441                             '{tdir}'.format(tdir=testdir),
 442                             run.Raw('&&'),
 443                             'sudo', 'bash', '-c',
 444                             run.Raw('"'), 'ceph',
 445                             'auth',
 446                             'get-or-create',
 447                             'client.{id}'.format(id=id_),
 448                             'mds', 'allow',
 449                             'mon', 'allow *',
 450                             'osd', 'allow *',
 451                             run.Raw('>'),
 452                             client_keyring,
 453                             run.Raw('"'),
 454                         ],
 455                     )
 456                     key_data = teuthology.get_file(
 457                         remote=mon0_remote,
 458                         path=client_keyring,
 459                         sudo=True,
 460                     )
 461                     teuthology.sudo_write_file(
 462                         remote=remot,
 463                         path=client_keyring,
 464                         data=key_data,
 465                         perms='0644'
 466                     )
 467                     teuthology.sudo_write_file(
 468                         remote=remot,
 469                         path=admin_keyring_path,
 470                         data=admin_keyring,
 471                         perms='0644'
 472                     )
 473                     teuthology.sudo_write_file(
 474                         remote=remot,
 475                         path=conf_path,
 476                         data=conf_data,
 477                         perms='0644'
 478                     )
 479
 480             if mds_nodes:
 481                 log.info('Configuring CephFS...')
 482                 Filesystem(ctx, create=True)
 483         elif not config.get('only_mon'):
 484             raise RuntimeError(
 485                 "The cluster is NOT operational due to insufficient OSDs")
 486         # create rbd pool
 487         ceph_admin.run(
 488             args=[
 489                 'sudo', 'ceph', '--cluster', 'ceph',
 490                 'osd', 'pool', 'create', 'rbd', '128', '128'],
 491             check_status=False)
 492         ceph_admin.run(
 493             args=[
 494                 'sudo', 'ceph', '--cluster', 'ceph',
 495                 'osd', 'pool', 'application', 'enable',
 496                 'rbd', 'rbd', '--yes-i-really-mean-it'
 497                 ],
 498             check_status=False)
 499         yield
 500
 501     except Exception:
 502         log.info(
 503             "Error encountered, logging exception before tearing down ceph-deploy")
 504         log.info(traceback.format_exc())
 505         raise
 506     finally:
 507         if config.get('keep_running'):
 508             return
 509         log.info('Stopping ceph...')
 510         ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
 511                               'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
 512                               'sudo', 'systemctl', 'stop', 'ceph.target'])
 513
 514         # Are you really not running anymore?
 515         # try first with the init tooling
 516         # ignoring the status so this becomes informational only
 517         ctx.cluster.run(
 518             args=[
 519                 'sudo', 'status', 'ceph-all', run.Raw('||'),
 520                 'sudo', 'service', 'ceph', 'status', run.Raw('||'),
 521                 'sudo', 'systemctl', 'status', 'ceph.target'],
 522             check_status=False)
 523
 524         # and now just check for the processes themselves, as if upstart/sysvinit
 525         # is lying to us. Ignore errors if the grep fails
 526         ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
 527                               'grep', '-v', 'grep', run.Raw('|'),
 528                               'grep', 'ceph'], check_status=False)
 529
 530         if ctx.archive is not None:
 531             # archive mon data, too
 532             log.info('Archiving mon data...')
 533             path = os.path.join(ctx.archive, 'data')
 534             os.makedirs(path)
 535             mons = ctx.cluster.only(teuthology.is_type('mon'))
 536             for remote, roles in mons.remotes.iteritems():
 537                 for role in roles:
 538                     if role.startswith('mon.'):
 539                         teuthology.pull_directory_tarball(
 540                             remote,
 541                             '/var/lib/ceph/mon',
 542                             path + '/' + role + '.tgz')
 543
 544             log.info('Compressing logs...')
 545             run.wait(
 546                 ctx.cluster.run(
 547                     args=[
 548                         'sudo',
 549                         'find',
 550                         '/var/log/ceph',
 551                         '-name',
 552                         '*.log',
 553                         '-print0',
 554                         run.Raw('|'),
 555                         'sudo',
 556                         'xargs',
 557                         '-0',
 558                         '--no-run-if-empty',
 559                         '--',
 560                         'gzip',
 561                         '--',
 562                     ],
 563                     wait=False,
 564                 ),
 565             )
 566
 567             log.info('Archiving logs...')
 568             path = os.path.join(ctx.archive, 'remote')
 569             os.makedirs(path)
 570             for remote in ctx.cluster.remotes.iterkeys():
 571                 sub = os.path.join(path, remote.shortname)
 572                 os.makedirs(sub)
 573                 teuthology.pull_directory(remote, '/var/log/ceph',
 574                                           os.path.join(sub, 'log'))
 575
 576         # Prevent these from being undefined if the try block fails
 577         all_nodes = get_all_nodes(ctx, config)
 578         purge_nodes = './ceph-deploy purge' + " " + all_nodes
 579         purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
 580
 581         log.info('Purging package...')
 582         execute_ceph_deploy(purge_nodes)
 583         log.info('Purging data...')
 584         execute_ceph_deploy(purgedata_nodes)
 585
 586
 587 @contextlib.contextmanager
 588 def cli_test(ctx, config):
 589     """
 590      ceph-deploy cli to exercise most commonly use cli's and ensure
 591      all commands works and also startup the init system.
 592
 593     """
 594     log.info('Ceph-deploy Test')
 595     if config is None:
 596         config = {}
 597     test_branch = ''
 598     conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
 599
 600     def execute_cdeploy(admin, cmd, path):
 601         """Execute ceph-deploy commands """
 602         """Either use git path or repo path """
 603         args = ['cd', conf_dir, run.Raw(';')]
 604         if path:
 605             args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
 606         else:
 607             args.append('ceph-deploy')
 608         args.append(run.Raw(cmd))
 609         ec = admin.run(args=args, check_status=False).exitstatus
 610         if ec != 0:
 611             raise RuntimeError(
 612                 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
 613
 614     if config.get('rhbuild'):
 615         path = None
 616     else:
 617         path = teuthology.get_testdir(ctx)
 618         # test on branch from config eg: wip-* , master or next etc
 619         # packages for all distro's should exist for wip*
 620         if ctx.config.get('branch'):
 621             branch = ctx.config.get('branch')
 622             test_branch = ' --dev={branch} '.format(branch=branch)
 623     mons = ctx.cluster.only(teuthology.is_type('mon'))
 624     for node, role in mons.remotes.iteritems():
 625         admin = node
 626         admin.run(args=['mkdir', conf_dir], check_status=False)
 627         nodename = admin.shortname
 628     system_type = teuthology.get_system_type(admin)
 629     if config.get('rhbuild'):
 630         admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
 631     log.info('system type is %s', system_type)
 632     osds = ctx.cluster.only(teuthology.is_type('osd'))
 633
 634     for remote, roles in osds.remotes.iteritems():
 635         devs = teuthology.get_scratch_devices(remote)
 636         log.info("roles %s", roles)
 637         if (len(devs) < 3):
 638             log.error(
 639                 'Test needs minimum of 3 devices, only found %s',
 640                 str(devs))
 641             raise RuntimeError("Needs minimum of 3 devices ")
 642
 643     conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
 644     new_cmd = 'new ' + nodename
 645     execute_cdeploy(admin, new_cmd, path)
 646     if config.get('conf') is not None:
 647         confp = config.get('conf')
 648         for section, keys in confp.iteritems():
 649             lines = '[{section}]\n'.format(section=section)
 650             teuthology.append_lines_to_file(admin, conf_path, lines,
 651                                             sudo=True)
 652             for key, value in keys.iteritems():
 653                 log.info("[%s] %s = %s" % (section, key, value))
 654                 lines = '{key} = {value}\n'.format(key=key, value=value)
 655                 teuthology.append_lines_to_file(admin, conf_path, lines,
 656                                                 sudo=True)
 657     new_mon_install = 'install {branch} --mon '.format(
 658         branch=test_branch) + nodename
 659     new_mgr_install = 'install {branch} --mgr '.format(
 660         branch=test_branch) + nodename
 661     new_osd_install = 'install {branch} --osd '.format(
 662         branch=test_branch) + nodename
 663     new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
 664     create_initial = 'mon create-initial '
 665     # either use create-keys or push command
 666     push_keys = 'admin ' + nodename
 667     execute_cdeploy(admin, new_mon_install, path)
 668     execute_cdeploy(admin, new_mgr_install, path)
 669     execute_cdeploy(admin, new_osd_install, path)
 670     execute_cdeploy(admin, new_admin, path)
 671     execute_cdeploy(admin, create_initial, path)
 672     execute_cdeploy(admin, push_keys, path)
 673
 674     for i in range(3):
 675         zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
 676         prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
 677         execute_cdeploy(admin, zap_disk, path)
 678         execute_cdeploy(admin, prepare, path)
 679
 680     log.info("list files for debugging purpose to check file permissions")
 681     admin.run(args=['ls', run.Raw('-lt'), conf_dir])
 682     remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
 683     r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
 684     out = r.stdout.getvalue()
 685     log.info('Ceph health: %s', out.rstrip('\n'))
 686     log.info("Waiting for cluster to become healthy")
 687     with contextutil.safe_while(sleep=10, tries=6,
 688                                 action='check health') as proceed:
 689         while proceed():
 690             r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
 691             out = r.stdout.getvalue()
 692             if (out.split(None, 1)[0] == 'HEALTH_OK'):
 693                 break
 694     rgw_install = 'install {branch} --rgw {node}'.format(
 695         branch=test_branch,
 696         node=nodename,
 697     )
 698     rgw_create = 'rgw create ' + nodename
 699     execute_cdeploy(admin, rgw_install, path)
 700     execute_cdeploy(admin, rgw_create, path)
 701     log.info('All ceph-deploy cli tests passed')
 702     try:
 703         yield
 704     finally:
 705         log.info("cleaning up")
 706         ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
 707                               'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
 708                               'sudo', 'systemctl', 'stop', 'ceph.target'],
 709                         check_status=False)
 710         time.sleep(4)
 711         for i in range(3):
 712             umount_dev = "{d}1".format(d=devs[i])
 713             r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
 714         cmd = 'purge ' + nodename
 715         execute_cdeploy(admin, cmd, path)
 716         cmd = 'purgedata ' + nodename
 717         execute_cdeploy(admin, cmd, path)
 718         log.info("Removing temporary dir")
 719         admin.run(
 720             args=[
 721                 'rm',
 722                 run.Raw('-rf'),
 723                 run.Raw(conf_dir)],
 724             check_status=False)
 725         if config.get('rhbuild'):
 726             admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
 727
 728
 729 @contextlib.contextmanager
 730 def single_node_test(ctx, config):
 731     """
 732     - ceph-deploy.single_node_test: null
 733
 734     #rhbuild testing
 735     - ceph-deploy.single_node_test:
 736         rhbuild: 1.2.3
 737
 738     """
 739     log.info("Testing ceph-deploy on single node")
 740     if config is None:
 741         config = {}
 742     overrides = ctx.config.get('overrides', {})
 743     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 744
 745     if config.get('rhbuild'):
 746         log.info("RH Build, Skip Download")
 747         with contextutil.nested(
 748             lambda: cli_test(ctx=ctx, config=config),
 749         ):
 750             yield
 751     else:
 752         with contextutil.nested(
 753             lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 754             lambda: download_ceph_deploy(ctx=ctx, config=config),
 755             lambda: cli_test(ctx=ctx, config=config),
 756         ):
 757             yield
 758
 759
 760 @contextlib.contextmanager
 761 def upgrade(ctx, config):
 762     """
 763      Upgrade using ceph-deploy
 764      eg:
 765        ceph-deploy.upgrade:
 766           # to upgrade to specific branch, use
 767           branch:
 768              stable: jewel
 769            # to setup mgr node, use
 770            setup-mgr-node: True
 771            # to wait for cluster to be healthy after all upgrade, use
 772            wait-for-healthy: True
 773            role: (upgrades the below roles serially)
 774               mon.a
 775               mon.b
 776               osd.0
 777      """
 778     roles = config.get('roles')
 779     # get the roles that are mapped as per ceph-deploy
 780     # roles are mapped for mon/mds eg: mon.a  => mon.host_short_name
 781     mapped_role = ctx.cluster.mapped_role
 782     if config.get('branch'):
 783         branch = config.get('branch')
 784         (var, val) = branch.items()[0]
 785         ceph_branch = '--{var}={val}'.format(var=var, val=val)
 786     else:
 787         # default to wip-branch under test
 788         dev_branch = ctx.config['branch']
 789         ceph_branch = '--dev={branch}'.format(branch=dev_branch)
 790     # get the node used for initial deployment which is mon.a
 791     mon_a = mapped_role.get('mon.a')
 792     (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys()
 793     testdir = teuthology.get_testdir(ctx)
 794     cmd = './ceph-deploy install ' + ceph_branch
 795     for role in roles:
 796         # check if this role is mapped (mon or mds)
 797         if mapped_role.get(role):
 798             role = mapped_role.get(role)
 799         remotes_and_roles = ctx.cluster.only(role).remotes
 800         for remote, roles in remotes_and_roles.iteritems():
 801             nodename = remote.shortname
 802             cmd = cmd + ' ' + nodename
 803             log.info("Upgrading ceph on  %s", nodename)
 804             ceph_admin.run(
 805                 args=[
 806                     'cd',
 807                     '{tdir}/ceph-deploy'.format(tdir=testdir),
 808                     run.Raw('&&'),
 809                     run.Raw(cmd),
 810                 ],
 811             )
 812             # restart all ceph services, ideally upgrade should but it does not
 813             remote.run(
 814                 args=[
 815                     'sudo', 'systemctl', 'restart', 'ceph.target'
 816                 ]
 817             )
 818             ceph_admin.run(args=['sudo', 'ceph', '-s'])
 819
 820     # workaround for http://tracker.ceph.com/issues/20950
 821     # write the correct mgr key to disk
 822     if config.get('setup-mgr-node', None):
 823         mons = ctx.cluster.only(teuthology.is_type('mon'))
 824         for remote, roles in mons.remotes.iteritems():
 825             remote.run(
 826                 args=[
 827                     run.Raw('sudo ceph auth get client.bootstrap-mgr'),
 828                     run.Raw('|'),
 829                     run.Raw('sudo tee'),
 830                     run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
 831                 ]
 832             )
 833
 834     if config.get('setup-mgr-node', None):
 835         mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 836         mgr_nodes = " ".join(mgr_nodes)
 837         mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
 838         mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 839         # install mgr
 840         ceph_admin.run(
 841             args=[
 842                 'cd',
 843                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 844                 run.Raw('&&'),
 845                 run.Raw(mgr_install),
 846                 ],
 847             )
 848         # create mgr
 849         ceph_admin.run(
 850             args=[
 851                 'cd',
 852                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 853                 run.Raw('&&'),
 854                 run.Raw(mgr_create),
 855                 ],
 856             )
 857         ceph_admin.run(args=['sudo', 'ceph', '-s'])
 858     if config.get('wait-for-healthy', None):
 859         wait_until_healthy(ctx, ceph_admin, use_sudo=True)
 860     yield
 861
 862
 863 @contextlib.contextmanager
 864 def task(ctx, config):
 865     """
 866     Set up and tear down a Ceph cluster.
 867
 868     For example::
 869
 870         tasks:
 871         - install:
 872              extras: yes
 873         - ssh_keys:
 874         - ceph-deploy:
 875              branch:
 876                 stable: bobtail
 877              mon_initial_members: 1
 878              ceph-deploy-branch: my-ceph-deploy-branch
 879              only_mon: true
 880              keep_running: true
 881              # either choose bluestore or filestore, default is bluestore
 882              bluestore: True
 883              # or
 884              filestore: True
 885              # skip install of mgr for old release using below flag
 886              skip-mgr: True  ( default is False )
 887              # to use ceph-volume instead of ceph-disk
 888              # ceph-disk can only be used with old ceph-deploy release from pypi
 889              use-ceph-volume: true
 890
 891         tasks:
 892         - install:
 893              extras: yes
 894         - ssh_keys:
 895         - ceph-deploy:
 896              branch:
 897                 dev: master
 898              conf:
 899                 mon:
 900                    debug mon = 20
 901
 902         tasks:
 903         - install:
 904              extras: yes
 905         - ssh_keys:
 906         - ceph-deploy:
 907              branch:
 908                 testing:
 909              dmcrypt: yes
 910              separate_journal_disk: yes
 911
 912     """
 913     if config is None:
 914         config = {}
 915
 916     assert isinstance(config, dict), \
 917         "task ceph-deploy only supports a dictionary for configuration"
 918
 919     overrides = ctx.config.get('overrides', {})
 920     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 921
 922     if config.get('branch') is not None:
 923         assert isinstance(
 924             config['branch'], dict), 'branch must be a dictionary'
 925
 926     log.info('task ceph-deploy with config ' + str(config))
 927
 928     # we need to use 1.5.39-stable for testing jewel or master branch with
 929     # ceph-disk
 930     if config.get('use-ceph-volume', False) is False:
 931         # check we are not testing specific branch
 932         if config.get('ceph-deploy-branch', False) is False:
 933             config['ceph-deploy-branch'] = '1.5.39-stable'
 934
 935     with contextutil.nested(
 936         lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 937         lambda: download_ceph_deploy(ctx=ctx, config=config),
 938         lambda: build_ceph_cluster(ctx=ctx, config=config),
 939     ):
 940         yield