ceph/qa/tasks/cephadm.py

   1 """
   2 Ceph cluster task, deployed via cephadm orchestrator
   3 """
   4 import argparse
   5 import configobj
   6 import contextlib
   7 import errno
   8 import logging
   9 import os
  10 import json
  11 import re
  12 import uuid
  13 import yaml
  14
  15 import six
  16 import toml
  17 from io import BytesIO
  18 from six import StringIO
  19 from tarfile import ReadError
  20 from tasks.ceph_manager import CephManager
  21 from teuthology import misc as teuthology
  22 from teuthology import contextutil
  23 from teuthology.orchestra import run
  24 from teuthology.orchestra.daemon import DaemonGroup
  25 from teuthology.config import config as teuth_config
  26
  27 # these items we use from ceph.py should probably eventually move elsewhere
  28 from tasks.ceph import get_mons, healthy
  29
  30 CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
  31
  32 log = logging.getLogger(__name__)
  33
  34
  35 def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs):
  36     testdir = teuthology.get_testdir(ctx)
  37     return remote.run(
  38         args=[
  39             'sudo',
  40             ctx.cephadm,
  41             '--image', ctx.ceph[cluster_name].image,
  42             'shell',
  43             '-c', '/etc/ceph/{}.conf'.format(cluster_name),
  44             '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
  45             '--fsid', ctx.ceph[cluster_name].fsid,
  46             ] + extra_cephadm_args + [
  47             '--',
  48             ] + args,
  49         **kwargs
  50     )
  51
  52 def build_initial_config(ctx, config):
  53     cluster_name = config['cluster']
  54
  55     path = os.path.join(os.path.dirname(__file__), 'cephadm.conf')
  56     conf = configobj.ConfigObj(path, file_error=True)
  57
  58     conf.setdefault('global', {})
  59     conf['global']['fsid'] = ctx.ceph[cluster_name].fsid
  60
  61     # overrides
  62     for section, keys in config.get('conf',{}).items():
  63         for key, value in keys.items():
  64             log.info(" override: [%s] %s = %s" % (section, key, value))
  65             if section not in conf:
  66                 conf[section] = {}
  67             conf[section][key] = value
  68
  69     return conf
  70
  71 @contextlib.contextmanager
  72 def normalize_hostnames(ctx):
  73     """
  74     Ensure we have short hostnames throughout, for consistency between
  75     remote.shortname and socket.gethostname() in cephadm.
  76     """
  77     log.info('Normalizing hostnames...')
  78     ctx.cluster.run(args=[
  79         'sudo',
  80         'hostname',
  81         run.Raw('$(hostname -s)'),
  82     ])
  83
  84     try:
  85         yield
  86     finally:
  87         pass
  88
  89 @contextlib.contextmanager
  90 def download_cephadm(ctx, config, ref):
  91     cluster_name = config['cluster']
  92
  93     if config.get('cephadm_mode') != 'cephadm-package':
  94         ref = config.get('cephadm_branch', ref)
  95         git_url = teuth_config.get_ceph_git_url()
  96         log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref))
  97         if git_url.startswith('https://github.com/'):
  98             # git archive doesn't like https:// URLs, which we use with github.
  99             rest = git_url.split('https://github.com/', 1)[1]
 100             rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix
 101             ctx.cluster.run(
 102                 args=[
 103                     'curl', '--silent',
 104                     'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm',
 105                     run.Raw('>'),
 106                     ctx.cephadm,
 107                     run.Raw('&&'),
 108                     'ls', '-l',
 109                     ctx.cephadm,
 110                 ],
 111             )
 112         else:
 113             ctx.cluster.run(
 114                 args=[
 115                     'git', 'archive',
 116                     '--remote=' + git_url,
 117                     ref,
 118                     'src/cephadm/cephadm',
 119                     run.Raw('|'),
 120                     'tar', '-xO', 'src/cephadm/cephadm',
 121                     run.Raw('>'),
 122                     ctx.cephadm,
 123                 ],
 124             )
 125         # sanity-check the resulting file and set executable bit
 126         cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
 127         ctx.cluster.run(
 128             args=[
 129                 'test', '-s', ctx.cephadm,
 130                 run.Raw('&&'),
 131                 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
 132                 run.Raw('&&'),
 133                 'chmod', '+x', ctx.cephadm,
 134             ],
 135         )
 136
 137     try:
 138         yield
 139     finally:
 140         log.info('Removing cluster...')
 141         ctx.cluster.run(args=[
 142             'sudo',
 143             ctx.cephadm,
 144             'rm-cluster',
 145             '--fsid', ctx.ceph[cluster_name].fsid,
 146             '--force',
 147         ])
 148
 149         if config.get('cephadm_mode') == 'root':
 150             log.info('Removing cephadm ...')
 151             ctx.cluster.run(
 152                 args=[
 153                     'rm',
 154                     '-rf',
 155                     ctx.cephadm,
 156                 ],
 157             )
 158
 159 @contextlib.contextmanager
 160 def ceph_log(ctx, config):
 161     cluster_name = config['cluster']
 162     fsid = ctx.ceph[cluster_name].fsid
 163
 164     # Add logs directory to job's info log file
 165     with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
 166         info_yaml = yaml.safe_load(info_file)
 167         info_file.seek(0)
 168         if 'archive' not in info_yaml:
 169             info_yaml['archive'] = {'log': '/var/log/ceph'}
 170         else:
 171             info_yaml['archive']['log'] = '/var/log/ceph'
 172         yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
 173
 174     try:
 175         yield
 176
 177     except Exception:
 178         # we need to know this below
 179         ctx.summary['success'] = False
 180         raise
 181
 182     finally:
 183         log.info('Checking cluster log for badness...')
 184         def first_in_ceph_log(pattern, excludes):
 185             """
 186             Find the first occurrence of the pattern specified in the Ceph log,
 187             Returns None if none found.
 188
 189             :param pattern: Pattern scanned for.
 190             :param excludes: Patterns to ignore.
 191             :return: First line of text (or None if not found)
 192             """
 193             args = [
 194                 'sudo',
 195                 'egrep', pattern,
 196                 '/var/log/ceph/{fsid}/ceph.log'.format(
 197                     fsid=fsid),
 198             ]
 199             if excludes:
 200                 for exclude in excludes:
 201                     args.extend([run.Raw('|'), 'egrep', '-v', exclude])
 202             args.extend([
 203                 run.Raw('|'), 'head', '-n', '1',
 204             ])
 205             r = ctx.ceph[cluster_name].bootstrap_remote.run(
 206                 stdout=StringIO(),
 207                 args=args,
 208             )
 209             stdout = r.stdout.getvalue()
 210             if stdout != '':
 211                 return stdout
 212             return None
 213
 214         if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
 215                              config.get('log-whitelist')) is not None:
 216             log.warning('Found errors (ERR|WRN|SEC) in cluster log')
 217             ctx.summary['success'] = False
 218             # use the most severe problem as the failure reason
 219             if 'failure_reason' not in ctx.summary:
 220                 for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
 221                     match = first_in_ceph_log(pattern, config['log-whitelist'])
 222                     if match is not None:
 223                         ctx.summary['failure_reason'] = \
 224                             '"{match}" in cluster log'.format(
 225                                 match=match.rstrip('\n'),
 226                             )
 227                         break
 228
 229         if ctx.archive is not None and \
 230                 not (ctx.config.get('archive-on-error') and ctx.summary['success']):
 231             # and logs
 232             log.info('Compressing logs...')
 233             run.wait(
 234                 ctx.cluster.run(
 235                     args=[
 236                         'sudo',
 237                         'find',
 238                         '/var/log/ceph',   # all logs, not just for the cluster
 239                         '/var/log/rbd-target-api', # ceph-iscsi
 240                         '-name',
 241                         '*.log',
 242                         '-print0',
 243                         run.Raw('|'),
 244                         'sudo',
 245                         'xargs',
 246                         '-0',
 247                         '--no-run-if-empty',
 248                         '--',
 249                         'gzip',
 250                         '--',
 251                     ],
 252                     wait=False,
 253                 ),
 254             )
 255
 256             log.info('Archiving logs...')
 257             path = os.path.join(ctx.archive, 'remote')
 258             try:
 259                 os.makedirs(path)
 260             except OSError:
 261                 pass
 262             for remote in ctx.cluster.remotes.keys():
 263                 sub = os.path.join(path, remote.name)
 264                 try:
 265                     os.makedirs(sub)
 266                 except OSError:
 267                     pass
 268                 try:
 269                     teuthology.pull_directory(remote, '/var/log/ceph',  # everything
 270                                               os.path.join(sub, 'log'))
 271                 except ReadError:
 272                     pass
 273
 274 @contextlib.contextmanager
 275 def ceph_crash(ctx, config):
 276     """
 277     Gather crash dumps from /var/lib/ceph/$fsid/crash
 278     """
 279     cluster_name = config['cluster']
 280     fsid = ctx.ceph[cluster_name].fsid
 281
 282     # Add logs directory to job's info log file
 283     with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
 284         info_yaml = yaml.safe_load(info_file)
 285         info_file.seek(0)
 286         if 'archive' not in info_yaml:
 287             info_yaml['archive'] = {'crash': '/var/lib/ceph/%s/crash' % fsid}
 288         else:
 289             info_yaml['archive']['crash'] = '/var/lib/ceph/%s/crash' % fsid
 290         yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
 291
 292     try:
 293         yield
 294
 295     finally:
 296         if ctx.archive is not None:
 297             log.info('Archiving crash dumps...')
 298             path = os.path.join(ctx.archive, 'remote')
 299             try:
 300                 os.makedirs(path)
 301             except OSError:
 302                 pass
 303             for remote in ctx.cluster.remotes.keys():
 304                 sub = os.path.join(path, remote.name)
 305                 try:
 306                     os.makedirs(sub)
 307                 except OSError:
 308                     pass
 309                 try:
 310                     teuthology.pull_directory(remote,
 311                                               '/var/lib/ceph/%s/crash' % fsid,
 312                                               os.path.join(sub, 'crash'))
 313                 except ReadError:
 314                     pass
 315
 316 @contextlib.contextmanager
 317 def ceph_bootstrap(ctx, config, registry):
 318     """
 319     Bootstrap ceph cluster, setup containers' registry mirror before
 320     the bootstrap if the registry is provided.
 321
 322     :param ctx: the argparse.Namespace object
 323     :param config: the config dict
 324     :param registry: url to  containers' mirror registry
 325     """
 326     cluster_name = config['cluster']
 327     testdir = teuthology.get_testdir(ctx)
 328     fsid = ctx.ceph[cluster_name].fsid
 329
 330     bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
 331     first_mon = ctx.ceph[cluster_name].first_mon
 332     first_mon_role = ctx.ceph[cluster_name].first_mon_role
 333     mons = ctx.ceph[cluster_name].mons
 334
 335     ctx.cluster.run(args=[
 336         'sudo', 'mkdir', '-p', '/etc/ceph',
 337         ]);
 338     ctx.cluster.run(args=[
 339         'sudo', 'chmod', '777', '/etc/ceph',
 340         ]);
 341     if registry:
 342         add_mirror_to_cluster(ctx, registry)
 343     try:
 344         # write seed config
 345         log.info('Writing seed config...')
 346         conf_fp = BytesIO()
 347         seed_config = build_initial_config(ctx, config)
 348         seed_config.write(conf_fp)
 349         teuthology.write_file(
 350             remote=bootstrap_remote,
 351             path='{}/seed.{}.conf'.format(testdir, cluster_name),
 352             data=conf_fp.getvalue())
 353         log.debug('Final config:\n' + conf_fp.getvalue().decode())
 354         ctx.ceph[cluster_name].conf = seed_config
 355
 356         # register initial daemons
 357         ctx.daemons.register_daemon(
 358             bootstrap_remote, 'mon', first_mon,
 359             cluster=cluster_name,
 360             fsid=fsid,
 361             logger=log.getChild('mon.' + first_mon),
 362             wait=False,
 363             started=True,
 364         )
 365         if not ctx.ceph[cluster_name].roleless:
 366             first_mgr = ctx.ceph[cluster_name].first_mgr
 367             ctx.daemons.register_daemon(
 368                 bootstrap_remote, 'mgr', first_mgr,
 369                 cluster=cluster_name,
 370                 fsid=fsid,
 371                 logger=log.getChild('mgr.' + first_mgr),
 372                 wait=False,
 373                 started=True,
 374             )
 375
 376         # bootstrap
 377         log.info('Bootstrapping...')
 378         cmd = [
 379             'sudo',
 380             ctx.cephadm,
 381             '--image', ctx.ceph[cluster_name].image,
 382             '-v',
 383             'bootstrap',
 384             '--fsid', fsid,
 385             '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
 386             '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
 387             '--output-keyring',
 388             '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
 389             '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name),
 390         ]
 391         if not ctx.ceph[cluster_name].roleless:
 392             cmd += [
 393                 '--mon-id', first_mon,
 394                 '--mgr-id', first_mgr,
 395                 '--orphan-initial-daemons',   # we will do it explicitly!
 396                 '--skip-monitoring-stack',    # we'll provision these explicitly
 397             ]
 398         if mons[first_mon_role].startswith('['):
 399             cmd += ['--mon-addrv', mons[first_mon_role]]
 400         else:
 401             cmd += ['--mon-ip', mons[first_mon_role]]
 402         if config.get('skip_dashboard'):
 403             cmd += ['--skip-dashboard']
 404         # bootstrap makes the keyring root 0600, so +r it for our purposes
 405         cmd += [
 406             run.Raw('&&'),
 407             'sudo', 'chmod', '+r',
 408             '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
 409         ]
 410         bootstrap_remote.run(args=cmd)
 411
 412         # fetch keys and configs
 413         log.info('Fetching config...')
 414         ctx.ceph[cluster_name].config_file = teuthology.get_file(
 415             remote=bootstrap_remote,
 416             path='/etc/ceph/{}.conf'.format(cluster_name))
 417         log.info('Fetching client.admin keyring...')
 418         ctx.ceph[cluster_name].admin_keyring = teuthology.get_file(
 419             remote=bootstrap_remote,
 420             path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name))
 421         log.info('Fetching mon keyring...')
 422         ctx.ceph[cluster_name].mon_keyring = teuthology.get_file(
 423             remote=bootstrap_remote,
 424             path='/var/lib/ceph/%s/mon.%s/keyring' % (fsid, first_mon),
 425             sudo=True)
 426
 427         # fetch ssh key, distribute to additional nodes
 428         log.info('Fetching pub ssh key...')
 429         ssh_pub_key = teuthology.get_file(
 430             remote=bootstrap_remote,
 431             path='{}/{}.pub'.format(testdir, cluster_name)
 432         ).decode('ascii').strip()
 433
 434         log.info('Installing pub ssh key for root users...')
 435         ctx.cluster.run(args=[
 436             'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
 437             run.Raw('&&'),
 438             'echo', ssh_pub_key,
 439             run.Raw('|'),
 440             'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
 441             run.Raw('&&'),
 442             'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
 443         ])
 444
 445         # set options
 446         _shell(ctx, cluster_name, bootstrap_remote,
 447                ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
 448
 449         # add other hosts
 450         for remote in ctx.cluster.remotes.keys():
 451             if remote == bootstrap_remote:
 452                 continue
 453             log.info('Writing (initial) conf and keyring to %s' % remote.shortname)
 454             teuthology.write_file(
 455                 remote=remote,
 456                 path='/etc/ceph/{}.conf'.format(cluster_name),
 457                 data=ctx.ceph[cluster_name].config_file)
 458             teuthology.write_file(
 459                 remote=remote,
 460                 path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
 461                 data=ctx.ceph[cluster_name].admin_keyring)
 462
 463             log.info('Adding host %s to orchestrator...' % remote.shortname)
 464             _shell(ctx, cluster_name, remote, [
 465                 'ceph', 'orch', 'host', 'add',
 466                 remote.shortname
 467             ])
 468             r = _shell(ctx, cluster_name, remote,
 469                        ['ceph', 'orch', 'host', 'ls', '--format=json'],
 470                        stdout=StringIO())
 471             hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())]
 472             assert remote.shortname in hosts
 473
 474         yield
 475
 476     finally:
 477         log.info('Cleaning up testdir ceph.* files...')
 478         ctx.cluster.run(args=[
 479             'rm', '-f',
 480             '{}/seed.{}.conf'.format(testdir, cluster_name),
 481             '{}/{}.pub'.format(testdir, cluster_name),
 482         ])
 483
 484         log.info('Stopping all daemons...')
 485
 486         # this doesn't block until they are all stopped...
 487         #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
 488
 489         # so, stop them individually
 490         for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True):
 491             cluster, type_, id_ = teuthology.split_role(role)
 492             try:
 493                 ctx.daemons.get_daemon(type_, id_, cluster).stop()
 494             except Exception:
 495                 log.exception('Failed to stop "{role}"'.format(role=role))
 496                 raise
 497
 498         # clean up /etc/ceph
 499         ctx.cluster.run(args=[
 500             'sudo', 'rm', '-f',
 501             '/etc/ceph/{}.conf'.format(cluster_name),
 502             '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
 503         ])
 504
 505 @contextlib.contextmanager
 506 def ceph_mons(ctx, config):
 507     """
 508     Deploy any additional mons
 509     """
 510     cluster_name = config['cluster']
 511     fsid = ctx.ceph[cluster_name].fsid
 512     num_mons = 1
 513
 514     try:
 515         for remote, roles in ctx.cluster.remotes.items():
 516             for mon in [r for r in roles
 517                         if teuthology.is_type('mon', cluster_name)(r)]:
 518                 c_, _, id_ = teuthology.split_role(mon)
 519                 if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
 520                     continue
 521                 log.info('Adding %s on %s' % (mon, remote.shortname))
 522                 num_mons += 1
 523                 _shell(ctx, cluster_name, remote, [
 524                     'ceph', 'orch', 'daemon', 'add', 'mon',
 525                     remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
 526                 ])
 527                 ctx.daemons.register_daemon(
 528                     remote, 'mon', id_,
 529                     cluster=cluster_name,
 530                     fsid=fsid,
 531                     logger=log.getChild(mon),
 532                     wait=False,
 533                     started=True,
 534                 )
 535
 536                 with contextutil.safe_while(sleep=1, tries=180) as proceed:
 537                     while proceed():
 538                         log.info('Waiting for %d mons in monmap...' % (num_mons))
 539                         r = _shell(
 540                             ctx=ctx,
 541                             cluster_name=cluster_name,
 542                             remote=remote,
 543                             args=[
 544                                 'ceph', 'mon', 'dump', '-f', 'json',
 545                             ],
 546                             stdout=StringIO(),
 547                         )
 548                         j = json.loads(r.stdout.getvalue())
 549                         if len(j['mons']) == num_mons:
 550                             break
 551
 552         # refresh our (final) ceph.conf file
 553         bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
 554         log.info('Generating final ceph.conf file...')
 555         r = _shell(
 556             ctx=ctx,
 557             cluster_name=cluster_name,
 558             remote=bootstrap_remote,
 559             args=[
 560                 'ceph', 'config', 'generate-minimal-conf',
 561             ],
 562             stdout=StringIO(),
 563         )
 564         ctx.ceph[cluster_name].config_file = r.stdout.getvalue()
 565
 566         yield
 567
 568     finally:
 569         pass
 570
 571 @contextlib.contextmanager
 572 def ceph_mgrs(ctx, config):
 573     """
 574     Deploy any additional mgrs
 575     """
 576     cluster_name = config['cluster']
 577     fsid = ctx.ceph[cluster_name].fsid
 578
 579     try:
 580         nodes = []
 581         daemons = {}
 582         for remote, roles in ctx.cluster.remotes.items():
 583             for mgr in [r for r in roles
 584                         if teuthology.is_type('mgr', cluster_name)(r)]:
 585                 c_, _, id_ = teuthology.split_role(mgr)
 586                 if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr:
 587                     continue
 588                 log.info('Adding %s on %s' % (mgr, remote.shortname))
 589                 nodes.append(remote.shortname + '=' + id_)
 590                 daemons[mgr] = (remote, id_)
 591         if nodes:
 592             _shell(ctx, cluster_name, remote, [
 593                 'ceph', 'orch', 'apply', 'mgr',
 594                 str(len(nodes) + 1) + ';' + ';'.join(nodes)]
 595             )
 596         for mgr, i in daemons.items():
 597             remote, id_ = i
 598             ctx.daemons.register_daemon(
 599                 remote, 'mgr', id_,
 600                 cluster=cluster_name,
 601                 fsid=fsid,
 602                 logger=log.getChild(mgr),
 603                 wait=False,
 604                 started=True,
 605             )
 606
 607         yield
 608
 609     finally:
 610         pass
 611
 612 @contextlib.contextmanager
 613 def ceph_osds(ctx, config):
 614     """
 615     Deploy OSDs
 616     """
 617     cluster_name = config['cluster']
 618     fsid = ctx.ceph[cluster_name].fsid
 619
 620     try:
 621         log.info('Deploying OSDs...')
 622
 623         # provision OSDs in numeric order
 624         id_to_remote = {}
 625         devs_by_remote = {}
 626         for remote, roles in ctx.cluster.remotes.items():
 627             devs_by_remote[remote] = teuthology.get_scratch_devices(remote)
 628             for osd in [r for r in roles
 629                         if teuthology.is_type('osd', cluster_name)(r)]:
 630                 _, _, id_ = teuthology.split_role(osd)
 631                 id_to_remote[int(id_)] = (osd, remote)
 632
 633         cur = 0
 634         for osd_id in sorted(id_to_remote.keys()):
 635             osd, remote = id_to_remote[osd_id]
 636             _, _, id_ = teuthology.split_role(osd)
 637             assert int(id_) == cur
 638             devs = devs_by_remote[remote]
 639             assert devs   ## FIXME ##
 640             dev = devs.pop()
 641             if all(_ in dev for _ in ('lv', 'vg')):
 642                 short_dev = dev.replace('/dev/', '')
 643             else:
 644                 short_dev = dev
 645             log.info('Deploying %s on %s with %s...' % (
 646                 osd, remote.shortname, dev))
 647             _shell(ctx, cluster_name, remote, [
 648                 'ceph-volume', 'lvm', 'zap', dev])
 649             _shell(ctx, cluster_name, remote, [
 650                 'ceph', 'orch', 'daemon', 'add', 'osd',
 651                 remote.shortname + ':' + short_dev
 652             ])
 653             ctx.daemons.register_daemon(
 654                 remote, 'osd', id_,
 655                 cluster=cluster_name,
 656                 fsid=fsid,
 657                 logger=log.getChild(osd),
 658                 wait=False,
 659                 started=True,
 660             )
 661             cur += 1
 662
 663         yield
 664     finally:
 665         pass
 666
 667 @contextlib.contextmanager
 668 def ceph_mdss(ctx, config):
 669     """
 670     Deploy MDSss
 671     """
 672     cluster_name = config['cluster']
 673     fsid = ctx.ceph[cluster_name].fsid
 674
 675     nodes = []
 676     daemons = {}
 677     for remote, roles in ctx.cluster.remotes.items():
 678         for role in [r for r in roles
 679                     if teuthology.is_type('mds', cluster_name)(r)]:
 680             c_, _, id_ = teuthology.split_role(role)
 681             log.info('Adding %s on %s' % (role, remote.shortname))
 682             nodes.append(remote.shortname + '=' + id_)
 683             daemons[role] = (remote, id_)
 684     if nodes:
 685         _shell(ctx, cluster_name, remote, [
 686             'ceph', 'orch', 'apply', 'mds',
 687             'all',
 688             str(len(nodes)) + ';' + ';'.join(nodes)]
 689         )
 690     for role, i in daemons.items():
 691         remote, id_ = i
 692         ctx.daemons.register_daemon(
 693             remote, 'mds', id_,
 694             cluster=cluster_name,
 695             fsid=fsid,
 696             logger=log.getChild(role),
 697             wait=False,
 698             started=True,
 699         )
 700
 701     yield
 702
 703 @contextlib.contextmanager
 704 def ceph_monitoring(daemon_type, ctx, config):
 705     """
 706     Deploy prometheus, node-exporter, etc.
 707     """
 708     cluster_name = config['cluster']
 709     fsid = ctx.ceph[cluster_name].fsid
 710
 711     nodes = []
 712     daemons = {}
 713     for remote, roles in ctx.cluster.remotes.items():
 714         for role in [r for r in roles
 715                     if teuthology.is_type(daemon_type, cluster_name)(r)]:
 716             c_, _, id_ = teuthology.split_role(role)
 717             log.info('Adding %s on %s' % (role, remote.shortname))
 718             nodes.append(remote.shortname + '=' + id_)
 719             daemons[role] = (remote, id_)
 720     if nodes:
 721         _shell(ctx, cluster_name, remote, [
 722             'ceph', 'orch', 'apply', daemon_type,
 723             str(len(nodes)) + ';' + ';'.join(nodes)]
 724         )
 725     for role, i in daemons.items():
 726         remote, id_ = i
 727         ctx.daemons.register_daemon(
 728             remote, daemon_type, id_,
 729             cluster=cluster_name,
 730             fsid=fsid,
 731             logger=log.getChild(role),
 732             wait=False,
 733             started=True,
 734         )
 735
 736     yield
 737
 738 @contextlib.contextmanager
 739 def ceph_rgw(ctx, config):
 740     """
 741     Deploy rgw
 742     """
 743     cluster_name = config['cluster']
 744     fsid = ctx.ceph[cluster_name].fsid
 745
 746     nodes = {}
 747     daemons = {}
 748     for remote, roles in ctx.cluster.remotes.items():
 749         for role in [r for r in roles
 750                     if teuthology.is_type('rgw', cluster_name)(r)]:
 751             c_, _, id_ = teuthology.split_role(role)
 752             log.info('Adding %s on %s' % (role, remote.shortname))
 753             realmzone = '.'.join(id_.split('.')[0:2])
 754             if realmzone not in nodes:
 755                 nodes[realmzone] = []
 756             nodes[realmzone].append(remote.shortname + '=' + id_)
 757             daemons[role] = (remote, id_)
 758
 759     for realmzone in nodes.keys():
 760         (realm, zone) = realmzone.split('.', 1)
 761
 762         # TODO: those should be moved to mgr/cephadm
 763         _shell(ctx, cluster_name, remote,
 764                ['radosgw-admin', 'realm', 'create', '--rgw-realm', realm, '--default']
 765         )
 766         _shell(ctx, cluster_name, remote,
 767                ['radosgw-admin', 'zonegroup', 'create', '--rgw-zonegroup=default', '--master', '--default']
 768         )
 769         _shell(ctx, cluster_name, remote,
 770                ['radosgw-admin', 'zone', 'create', '--rgw-zonegroup=default', '--rgw-zone', zone,  '--master', '--default']
 771         )
 772
 773     for realmzone, nodes in nodes.items():
 774         (realm, zone) = realmzone.split('.', 1)
 775         _shell(ctx, cluster_name, remote, [
 776             'ceph', 'orch', 'apply', 'rgw', realm, zone,
 777              '--placement',
 778              str(len(nodes)) + ';' + ';'.join(nodes)]
 779         )
 780     for role, i in daemons.items():
 781         remote, id_ = i
 782         ctx.daemons.register_daemon(
 783             remote, 'rgw', id_,
 784             cluster=cluster_name,
 785             fsid=fsid,
 786             logger=log.getChild(role),
 787             wait=False,
 788             started=True,
 789         )
 790
 791     yield
 792
 793
 794 @contextlib.contextmanager
 795 def ceph_iscsi(ctx, config):
 796     """
 797     Deploy iSCSIs
 798     """
 799     cluster_name = config['cluster']
 800     fsid = ctx.ceph[cluster_name].fsid
 801
 802     nodes = []
 803     daemons = {}
 804     for remote, roles in ctx.cluster.remotes.items():
 805         for role in [r for r in roles
 806                     if teuthology.is_type('iscsi', cluster_name)(r)]:
 807             c_, _, id_ = teuthology.split_role(role)
 808             log.info('Adding %s on %s' % (role, remote.shortname))
 809             nodes.append(remote.shortname + '=' + id_)
 810             daemons[role] = (remote, id_)
 811     if nodes:
 812         poolname = 'iscsi'
 813         # ceph osd pool create iscsi 3 3 replicated
 814         _shell(ctx, cluster_name, remote, [
 815             'ceph', 'osd', 'pool', 'create',
 816             poolname, '3', '3', 'replicated']
 817         )
 818
 819         _shell(ctx, cluster_name, remote, [
 820             'ceph', 'osd', 'pool', 'application', 'enable',
 821             poolname, 'rbd']
 822         )
 823
 824         # ceph orch apply iscsi iscsi user password
 825         _shell(ctx, cluster_name, remote, [
 826             'ceph', 'orch', 'apply', 'iscsi',
 827             poolname, 'user', 'password',
 828             '--placement', str(len(nodes)) + ';' + ';'.join(nodes)]
 829         )
 830     for role, i in daemons.items():
 831         remote, id_ = i
 832         ctx.daemons.register_daemon(
 833             remote, 'iscsi', id_,
 834             cluster=cluster_name,
 835             fsid=fsid,
 836             logger=log.getChild(role),
 837             wait=False,
 838             started=True,
 839         )
 840
 841     yield
 842
 843 @contextlib.contextmanager
 844 def ceph_clients(ctx, config):
 845     cluster_name = config['cluster']
 846     testdir = teuthology.get_testdir(ctx)
 847
 848     log.info('Setting up client nodes...')
 849     clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
 850     testdir = teuthology.get_testdir(ctx)
 851     coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
 852     for remote, roles_for_host in clients.remotes.items():
 853         for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
 854                                                      cluster_name):
 855             name = teuthology.ceph_role(role)
 856             client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name,
 857                                                                 name)
 858             r = _shell(
 859                 ctx=ctx,
 860                 cluster_name=cluster_name,
 861                 remote=remote,
 862                 args=[
 863                     'ceph', 'auth',
 864                     'get-or-create', name,
 865                     'mon', 'allow *',
 866                     'osd', 'allow *',
 867                     'mds', 'allow *',
 868                     'mgr', 'allow *',
 869                 ],
 870                 stdout=StringIO(),
 871             )
 872             keyring = r.stdout.getvalue()
 873             teuthology.sudo_write_file(
 874                 remote=remote,
 875                 path=client_keyring,
 876                 data=keyring,
 877                 perms='0644'
 878             )
 879     yield
 880
 881 @contextlib.contextmanager
 882 def ceph_initial():
 883     try:
 884         yield
 885     finally:
 886         log.info('Teardown complete')
 887
 888 ## public methods
 889 @contextlib.contextmanager
 890 def stop(ctx, config):
 891     """
 892     Stop ceph daemons
 893
 894     For example::
 895       tasks:
 896       - ceph.stop: [mds.*]
 897
 898       tasks:
 899       - ceph.stop: [osd.0, osd.2]
 900
 901       tasks:
 902       - ceph.stop:
 903           daemons: [osd.0, osd.2]
 904
 905     """
 906     if config is None:
 907         config = {}
 908     elif isinstance(config, list):
 909         config = {'daemons': config}
 910
 911     daemons = ctx.daemons.resolve_role_list(
 912         config.get('daemons', None), CEPH_ROLE_TYPES, True)
 913     clusters = set()
 914
 915     for role in daemons:
 916         cluster, type_, id_ = teuthology.split_role(role)
 917         ctx.daemons.get_daemon(type_, id_, cluster).stop()
 918         clusters.add(cluster)
 919
 920 #    for cluster in clusters:
 921 #        ctx.ceph[cluster].watchdog.stop()
 922 #        ctx.ceph[cluster].watchdog.join()
 923
 924     yield
 925
 926 def shell(ctx, config):
 927     """
 928     Execute (shell) commands
 929     """
 930     cluster_name = config.get('cluster', 'ceph')
 931
 932     env = []
 933     if 'env' in config:
 934         for k in config['env']:
 935             env.extend(['-e', k + '=' + ctx.config.get(k, '')])
 936         del config['env']
 937
 938     if 'all' in config and len(config) == 1:
 939         a = config['all']
 940         roles = teuthology.all_roles(ctx.cluster)
 941         config = dict((id_, a) for id_ in roles)
 942
 943     for role, ls in config.items():
 944         (remote,) = ctx.cluster.only(role).remotes.keys()
 945         log.info('Running commands on role %s host %s', role, remote.name)
 946         for c in ls:
 947             _shell(ctx, cluster_name, remote,
 948                    ['bash', '-c', c],
 949                    extra_cephadm_args=env)
 950
 951 @contextlib.contextmanager
 952 def tweaked_option(ctx, config):
 953     """
 954     set an option, and then restore it with its original value
 955
 956     Note, due to the way how tasks are executed/nested, it's not suggested to
 957     use this method as a standalone task. otherwise, it's likely that it will
 958     restore the tweaked option at the /end/ of 'tasks' block.
 959     """
 960     saved_options = {}
 961     # we can complicate this when necessary
 962     options = ['mon-health-to-clog']
 963     type_, id_ = 'mon', '*'
 964     cluster = config.get('cluster', 'ceph')
 965     manager = ctx.managers[cluster]
 966     if id_ == '*':
 967         get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_))
 968     else:
 969         get_from = id_
 970     for option in options:
 971         if option not in config:
 972             continue
 973         value = 'true' if config[option] else 'false'
 974         option = option.replace('-', '_')
 975         old_value = manager.get_config(type_, get_from, option)
 976         if value != old_value:
 977             saved_options[option] = old_value
 978             manager.inject_args(type_, id_, option, value)
 979     yield
 980     for option, value in saved_options.items():
 981         manager.inject_args(type_, id_, option, value)
 982
 983 @contextlib.contextmanager
 984 def restart(ctx, config):
 985     """
 986    restart ceph daemons
 987
 988    For example::
 989       tasks:
 990       - ceph.restart: [all]
 991
 992    For example::
 993       tasks:
 994       - ceph.restart: [osd.0, mon.1, mds.*]
 995
 996    or::
 997
 998       tasks:
 999       - ceph.restart:
1000           daemons: [osd.0, mon.1]
1001           wait-for-healthy: false
1002           wait-for-osds-up: true
1003
1004     :param ctx: Context
1005     :param config: Configuration
1006     """
1007     if config is None:
1008         config = {}
1009     elif isinstance(config, list):
1010         config = {'daemons': config}
1011
1012     daemons = ctx.daemons.resolve_role_list(
1013         config.get('daemons', None), CEPH_ROLE_TYPES, True)
1014     clusters = set()
1015
1016     log.info('daemons %s' % daemons)
1017     with tweaked_option(ctx, config):
1018         for role in daemons:
1019             cluster, type_, id_ = teuthology.split_role(role)
1020             d = ctx.daemons.get_daemon(type_, id_, cluster)
1021             assert d, 'daemon %s does not exist' % role
1022             d.stop()
1023             if type_ == 'osd':
1024                 ctx.managers[cluster].mark_down_osd(id_)
1025             d.restart()
1026             clusters.add(cluster)
1027
1028     if config.get('wait-for-healthy', True):
1029         for cluster in clusters:
1030             healthy(ctx=ctx, config=dict(cluster=cluster))
1031     if config.get('wait-for-osds-up', False):
1032         for cluster in clusters:
1033             ctx.managers[cluster].wait_for_all_osds_up()
1034     yield
1035
1036 @contextlib.contextmanager
1037 def distribute_config_and_admin_keyring(ctx, config):
1038     """
1039     Distribute a sufficient config and keyring for clients
1040     """
1041     cluster_name = config['cluster']
1042     log.info('Distributing (final) config and client.admin keyring...')
1043     for remote, roles in ctx.cluster.remotes.items():
1044         teuthology.sudo_write_file(
1045             remote=remote,
1046             path='/etc/ceph/{}.conf'.format(cluster_name),
1047             data=ctx.ceph[cluster_name].config_file)
1048         teuthology.sudo_write_file(
1049             remote=remote,
1050             path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
1051             data=ctx.ceph[cluster_name].admin_keyring)
1052     try:
1053         yield
1054     finally:
1055         ctx.cluster.run(args=[
1056             'sudo', 'rm', '-f',
1057             '/etc/ceph/{}.conf'.format(cluster_name),
1058             '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
1059         ])
1060
1061 @contextlib.contextmanager
1062 def crush_setup(ctx, config):
1063     cluster_name = config['cluster']
1064
1065     profile = config.get('crush_tunables', 'default')
1066     log.info('Setting crush tunables to %s', profile)
1067     _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
1068         args=['ceph', 'osd', 'crush', 'tunables', profile])
1069     yield
1070
1071 @contextlib.contextmanager
1072 def _bypass():
1073     yield
1074
1075 @contextlib.contextmanager
1076 def initialize_config(ctx, config):
1077     cluster_name = config['cluster']
1078     testdir = teuthology.get_testdir(ctx)
1079
1080     ctx.ceph[cluster_name].thrashers = []
1081     # fixme: setup watchdog, ala ceph.py
1082
1083     ctx.ceph[cluster_name].roleless = False  # see below
1084
1085     first_ceph_cluster = False
1086     if not hasattr(ctx, 'daemons'):
1087         first_ceph_cluster = True
1088
1089     # cephadm mode?
1090     if 'cephadm_mode' not in config:
1091         config['cephadm_mode'] = 'root'
1092     assert config['cephadm_mode'] in ['root', 'cephadm-package']
1093     if config['cephadm_mode'] == 'root':
1094         ctx.cephadm = testdir + '/cephadm'
1095     else:
1096         ctx.cephadm = 'cephadm'  # in the path
1097
1098     if first_ceph_cluster:
1099         # FIXME: this is global for all clusters
1100         ctx.daemons = DaemonGroup(
1101             use_cephadm=ctx.cephadm)
1102
1103     # uuid
1104     fsid = str(uuid.uuid1())
1105     log.info('Cluster fsid is %s' % fsid)
1106     ctx.ceph[cluster_name].fsid = fsid
1107
1108     # mon ips
1109     log.info('Choosing monitor IPs and ports...')
1110     remotes_and_roles = ctx.cluster.remotes.items()
1111     ips = [host for (host, port) in
1112            (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
1113
1114     if config.get('roleless', False):
1115         # mons will be named after hosts
1116         first_mon = None
1117         for remote, _ in remotes_and_roles:
1118             ctx.cluster.remotes[remote].append('mon.' + remote.shortname)
1119             if not first_mon:
1120                 first_mon = remote.shortname
1121                 bootstrap_remote = remote
1122         log.info('No mon roles; fabricating mons')
1123
1124     roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()]
1125
1126     ctx.ceph[cluster_name].mons = get_mons(
1127         roles, ips, cluster_name,
1128         mon_bind_msgr2=config.get('mon_bind_msgr2', True),
1129         mon_bind_addrvec=config.get('mon_bind_addrvec', True),
1130     )
1131     log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)
1132
1133     if config.get('roleless', False):
1134         ctx.ceph[cluster_name].roleless = True
1135         ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
1136         ctx.ceph[cluster_name].first_mon = first_mon
1137         ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
1138     else:
1139         first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
1140         _, _, first_mon = teuthology.split_role(first_mon_role)
1141         (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys()
1142         log.info('First mon is mon.%s on %s' % (first_mon,
1143                                                 bootstrap_remote.shortname))
1144         ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
1145         ctx.ceph[cluster_name].first_mon = first_mon
1146         ctx.ceph[cluster_name].first_mon_role = first_mon_role
1147
1148         others = ctx.cluster.remotes[bootstrap_remote]
1149         mgrs = sorted([r for r in others
1150                        if teuthology.is_type('mgr', cluster_name)(r)])
1151         if not mgrs:
1152             raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon)
1153         _, _, first_mgr = teuthology.split_role(mgrs[0])
1154         log.info('First mgr is %s' % (first_mgr))
1155         ctx.ceph[cluster_name].first_mgr = first_mgr
1156     yield
1157
1158 @contextlib.contextmanager
1159 def task(ctx, config):
1160     """
1161     Deploy ceph cluster using cephadm
1162
1163     Setup containers' mirrors before the bootstrap, if corresponding
1164     config provided in teuthology server config yaml file.
1165
1166     For example, teuthology.yaml can contain the 'defaults' section:
1167
1168         defaults:
1169           cephadm:
1170             containers:
1171               registry_mirrors:
1172                 docker.io: 'registry.mirror.example.com:5000'
1173               image: 'quay.io/ceph-ci/ceph'
1174
1175     Using overrides makes it possible to customize it per run.
1176     The equivalent 'overrides' section looks like:
1177
1178         overrides:
1179           cephadm:
1180             containers:
1181               registry_mirrors:
1182                 docker.io: 'registry.mirror.example.com:5000'
1183               image: 'quay.io/ceph-ci/ceph'
1184
1185     :param ctx: the argparse.Namespace object
1186     :param config: the config dict
1187     """
1188     if config is None:
1189         config = {}
1190
1191     assert isinstance(config, dict), \
1192         "task only supports a dictionary for configuration"
1193
1194     overrides = ctx.config.get('overrides', {})
1195     teuthology.deep_merge(config, overrides.get('ceph', {}))
1196     teuthology.deep_merge(config, overrides.get('cephadm', {}))
1197     log.info('Config: ' + str(config))
1198
1199     testdir = teuthology.get_testdir(ctx)
1200
1201     # set up cluster context
1202     if not hasattr(ctx, 'ceph'):
1203         ctx.ceph = {}
1204         ctx.managers = {}
1205     if 'cluster' not in config:
1206         config['cluster'] = 'ceph'
1207     cluster_name = config['cluster']
1208     if cluster_name not in ctx.ceph:
1209         ctx.ceph[cluster_name] = argparse.Namespace()
1210         ctx.ceph[cluster_name].bootstrapped = False
1211
1212     # image
1213     teuth_defaults = teuth_config.get('defaults', {})
1214     cephadm_defaults = teuth_defaults.get('cephadm', {})
1215     containers_defaults = cephadm_defaults.get('containers', {})
1216     mirrors_defaults = containers_defaults.get('registry_mirrors', {})
1217     container_registry_mirror = mirrors_defaults.get('docker.io', None)
1218     container_image_name = containers_defaults.get('image', None)
1219
1220     containers = config.get('containers', {})
1221     mirrors = containers.get('registry_mirrors', {})
1222     container_image_name = containers.get('image', container_image_name)
1223     container_registry_mirror = mirrors.get('docker.io',
1224                                             container_registry_mirror)
1225
1226
1227     if not hasattr(ctx.ceph[cluster_name], 'image'):
1228         ctx.ceph[cluster_name].image = config.get('image')
1229     ref = None
1230     if not ctx.ceph[cluster_name].image:
1231         if not container_image_name:
1232             raise Exception("Configuration error occurred. "
1233                             "The 'image' value is undefined for 'cephadm' task. "
1234                             "Please provide corresponding options in the task's "
1235                             "config, task 'overrides', or teuthology 'defaults' "
1236                             "section.")
1237         sha1 = config.get('sha1')
1238         flavor = config.get('flavor', 'default')
1239
1240         if sha1:
1241             if flavor == "crimson":
1242                 ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor
1243             else:
1244                 ctx.ceph[cluster_name].image = container_image_name + ':' + sha1
1245             ref = sha1
1246         else:
1247             # hmm, fall back to branch?
1248             branch = config.get('branch', 'master')
1249             ref = branch
1250             ctx.ceph[cluster_name].image = container_image_name + ':' + branch
1251     log.info('Cluster image is %s' % ctx.ceph[cluster_name].image)
1252
1253
1254     with contextutil.nested(
1255             #if the cluster is already bootstrapped bypass corresponding methods
1256             lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1257                               else initialize_config(ctx=ctx, config=config),
1258             lambda: ceph_initial(),
1259             lambda: normalize_hostnames(ctx=ctx),
1260             lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1261                               else download_cephadm(ctx=ctx, config=config, ref=ref),
1262             lambda: ceph_log(ctx=ctx, config=config),
1263             lambda: ceph_crash(ctx=ctx, config=config),
1264             lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\
1265                               else ceph_bootstrap(ctx, config,
1266                                                   container_registry_mirror),
1267             lambda: crush_setup(ctx=ctx, config=config),
1268             lambda: ceph_mons(ctx=ctx, config=config),
1269             lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config),
1270             lambda: ceph_mgrs(ctx=ctx, config=config),
1271             lambda: ceph_osds(ctx=ctx, config=config),
1272             lambda: ceph_mdss(ctx=ctx, config=config),
1273             lambda: ceph_rgw(ctx=ctx, config=config),
1274             lambda: ceph_iscsi(ctx=ctx, config=config),
1275             lambda: ceph_monitoring('prometheus', ctx=ctx, config=config),
1276             lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config),
1277             lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config),
1278             lambda: ceph_monitoring('grafana', ctx=ctx, config=config),
1279             lambda: ceph_clients(ctx=ctx, config=config),
1280     ):
1281         ctx.managers[cluster_name] = CephManager(
1282             ctx.ceph[cluster_name].bootstrap_remote,
1283             ctx=ctx,
1284             logger=log.getChild('ceph_manager.' + cluster_name),
1285             cluster=cluster_name,
1286             cephadm=True,
1287         )
1288
1289         try:
1290             if config.get('wait-for-healthy', True):
1291                 healthy(ctx=ctx, config=config)
1292
1293             log.info('Setup complete, yielding')
1294             yield
1295
1296         finally:
1297             log.info('Teardown begin')
1298
1299
1300 def registries_add_mirror_to_docker_io(conf, mirror):
1301     config = toml.loads(conf)
1302     is_v1 = 'registries' in config
1303     if is_v1:
1304         search = config.get('registries', {}).get('search', {}).get('registries', [])
1305         insecure = config.get('registries', {}).get('search', {}).get('insecure', [])
1306         # v2: MutableMapping[str, Any] = { needs Python 3
1307         v2 = {
1308             'unqualified-search-registries': search,
1309             'registry': [
1310                 {
1311                     'prefix': reg,
1312                     'location': reg,
1313                     'insecure': reg in insecure,
1314                     'blocked': False,
1315                 } for reg in search
1316             ]
1317         }
1318     else:
1319         v2 = config  # type: ignore
1320     dockers = [
1321         r for r in v2['registry'] if
1322            r.get('prefix') == 'docker.io' or r.get('location') == 'docker.io'
1323     ]
1324     if dockers:
1325         docker = dockers[0]
1326         if 'mirror' not in docker:
1327             docker['mirror'] = [{
1328                 "location": mirror,
1329                 "insecure": True,
1330             }]
1331     return v2
1332
1333
1334 def add_mirror_to_cluster(ctx, mirror):
1335     log.info('Adding local image mirror %s' % mirror)
1336
1337     registries_conf = '/etc/containers/registries.conf'
1338
1339     for remote in ctx.cluster.remotes.keys():
1340         try:
1341             config = teuthology.get_file(
1342                 remote=remote,
1343                 path=registries_conf
1344             )
1345             new_config = toml.dumps(registries_add_mirror_to_docker_io(config.decode('utf-8'), mirror))
1346
1347             teuthology.sudo_write_file(
1348                 remote=remote,
1349                 path=registries_conf,
1350                 data=six.ensure_str(new_config),
1351             )
1352         except IOError as e:  # py3: use FileNotFoundError instead.
1353             if e.errno != errno.ENOENT:
1354                 raise
1355
1356             # Docker doesn't ship a registries.conf
1357             log.info('Failed to add mirror: %s' % str(e))