ceph/qa/tasks/mgr/mgr_test_case.py

   1 import json
   2 import logging
   3
   4 from unittest import SkipTest
   5
   6 from teuthology import misc
   7 from tasks.ceph_test_case import CephTestCase
   8
   9 # TODO move definition of CephCluster away from the CephFS stuff
  10 from tasks.cephfs.filesystem import CephCluster
  11
  12
  13 log = logging.getLogger(__name__)
  14
  15
  16 class MgrCluster(CephCluster):
  17     def __init__(self, ctx):
  18         super(MgrCluster, self).__init__(ctx)
  19         self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
  20
  21         if len(self.mgr_ids) == 0:
  22             raise RuntimeError(
  23                 "This task requires at least one manager daemon")
  24
  25         self.mgr_daemons = dict(
  26             [(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id
  27              in self.mgr_ids])
  28
  29     def mgr_stop(self, mgr_id):
  30         self.mgr_daemons[mgr_id].stop()
  31
  32     def mgr_fail(self, mgr_id):
  33         self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
  34
  35     def mgr_restart(self, mgr_id):
  36         self.mgr_daemons[mgr_id].restart()
  37
  38     def get_mgr_map(self):
  39         return json.loads(
  40             self.mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))
  41
  42     def get_active_id(self):
  43         return self.get_mgr_map()["active_name"]
  44
  45     def get_standby_ids(self):
  46         return [s['name'] for s in self.get_mgr_map()["standbys"]]
  47
  48     def set_module_conf(self, module, key, val):
  49         self.mon_manager.raw_cluster_cmd("config", "set", "mgr",
  50                                          "mgr/{0}/{1}".format(
  51                                              module, key
  52                                          ), val)
  53
  54     def set_module_localized_conf(self, module, mgr_id, key, val, force):
  55         cmd = ["config", "set", "mgr",
  56                "/".join(["mgr", module, mgr_id, key]),
  57                val]
  58         if force:
  59             cmd.append("--force")
  60         self.mon_manager.raw_cluster_cmd(*cmd)
  61
  62
  63 class MgrTestCase(CephTestCase):
  64     MGRS_REQUIRED = 1
  65
  66     @classmethod
  67     def setup_mgrs(cls):
  68         # Stop all the daemons
  69         for daemon in cls.mgr_cluster.mgr_daemons.values():
  70             daemon.stop()
  71
  72         for mgr_id in cls.mgr_cluster.mgr_ids:
  73             cls.mgr_cluster.mgr_fail(mgr_id)
  74
  75         # Unload all non-default plugins
  76         loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
  77                    "mgr", "module", "ls"))['enabled_modules']
  78         unload_modules = set(loaded) - {"cephadm", "restful"}
  79
  80         for m in unload_modules:
  81             cls.mgr_cluster.mon_manager.raw_cluster_cmd(
  82                 "mgr", "module", "disable", m)
  83
  84         # Start all the daemons
  85         for daemon in cls.mgr_cluster.mgr_daemons.values():
  86             daemon.restart()
  87
  88         # Wait for an active to come up
  89         cls.wait_until_true(lambda: cls.mgr_cluster.get_active_id() != "",
  90                              timeout=20)
  91
  92         expect_standbys = set(cls.mgr_cluster.mgr_ids) \
  93                           - {cls.mgr_cluster.get_active_id()}
  94         cls.wait_until_true(
  95             lambda: set(cls.mgr_cluster.get_standby_ids()) == expect_standbys,
  96             timeout=20)
  97
  98     @classmethod
  99     def setUpClass(cls):
 100         # The test runner should have populated this
 101         assert cls.mgr_cluster is not None
 102
 103         if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED:
 104             raise SkipTest(
 105                 "Only have {0} manager daemons, {1} are required".format(
 106                     len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
 107
 108         cls.setup_mgrs()
 109
 110     @classmethod
 111     def _unload_module(cls, module_name):
 112         def is_disabled():
 113             enabled_modules = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
 114                 'mgr', 'module', 'ls'))['enabled_modules']
 115             return module_name not in enabled_modules
 116
 117         if is_disabled():
 118             return
 119
 120         log.info("Unloading Mgr module %s ...", module_name)
 121         cls.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'module', 'disable', module_name)
 122         cls.wait_until_true(is_disabled, timeout=30)
 123
 124     @classmethod
 125     def _load_module(cls, module_name):
 126         loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
 127             "mgr", "module", "ls"))['enabled_modules']
 128         if module_name in loaded:
 129             # The enable command is idempotent, but our wait for a restart
 130             # isn't, so let's return now if it's already loaded
 131             return
 132
 133         initial_mgr_map = cls.mgr_cluster.get_mgr_map()
 134
 135         # check if the the module is configured as an always on module
 136         mgr_daemons = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
 137             "mgr", "metadata"))
 138
 139         for daemon in mgr_daemons:
 140             if daemon["name"] == initial_mgr_map["active_name"]:
 141                 ceph_version = daemon["ceph_release"]
 142                 always_on = initial_mgr_map["always_on_modules"].get(ceph_version, [])
 143                 if module_name in always_on:
 144                     return
 145
 146         log.info("Loading Mgr module %s ...", module_name)
 147         initial_gid = initial_mgr_map['active_gid']
 148         cls.mgr_cluster.mon_manager.raw_cluster_cmd(
 149             "mgr", "module", "enable", module_name, "--force")
 150
 151         # Wait for the module to load
 152         def has_restarted():
 153             mgr_map = cls.mgr_cluster.get_mgr_map()
 154             done = mgr_map['active_gid'] != initial_gid and mgr_map['available']
 155             if done:
 156                 log.info("Restarted after module load (new active {0}/{1})".format(
 157                     mgr_map['active_name'], mgr_map['active_gid']))
 158             return done
 159         cls.wait_until_true(has_restarted, timeout=30)
 160
 161
 162     @classmethod
 163     def _get_uri(cls, service_name):
 164         # Little dict hack so that I can assign into this from
 165         # the get_or_none function
 166         mgr_map = {'x': None}
 167
 168         def _get_or_none():
 169             mgr_map['x'] = cls.mgr_cluster.get_mgr_map()
 170             result = mgr_map['x']['services'].get(service_name, None)
 171             return result
 172
 173         cls.wait_until_true(lambda: _get_or_none() is not None, 30)
 174
 175         uri = mgr_map['x']['services'][service_name]
 176
 177         log.info("Found {0} at {1} (daemon {2}/{3})".format(
 178             service_name, uri, mgr_map['x']['active_name'],
 179             mgr_map['x']['active_gid']))
 180
 181         return uri
 182
 183     @classmethod
 184     def _assign_ports(cls, module_name, config_name, min_port=7789):
 185         """
 186         To avoid the need to run lots of hosts in teuthology tests to
 187         get different URLs per mgr, we will hand out different ports
 188         to each mgr here.
 189
 190         This is already taken care of for us when running in a vstart
 191         environment.
 192         """
 193         # Start handing out ports well above Ceph's range.
 194         assign_port = min_port
 195
 196         for mgr_id in cls.mgr_cluster.mgr_ids:
 197             cls.mgr_cluster.mgr_stop(mgr_id)
 198             cls.mgr_cluster.mgr_fail(mgr_id)
 199
 200         for mgr_id in cls.mgr_cluster.mgr_ids:
 201             log.info("Using port {0} for {1} on mgr.{2}".format(
 202                 assign_port, module_name, mgr_id
 203             ))
 204             cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id,
 205                                                       config_name,
 206                                                       str(assign_port),
 207                                                       force=True)
 208             assign_port += 1
 209
 210         for mgr_id in cls.mgr_cluster.mgr_ids:
 211             cls.mgr_cluster.mgr_restart(mgr_id)
 212
 213         def is_available():
 214             mgr_map = cls.mgr_cluster.get_mgr_map()
 215             done = mgr_map['available']
 216             if done:
 217                 log.info("Available after assign ports (new active {0}/{1})".format(
 218                     mgr_map['active_name'], mgr_map['active_gid']))
 219             return done
 220         cls.wait_until_true(is_available, timeout=30)