ceph/qa/tasks/mgr/test_module_selftest.py

   1
   2 import time
   3 import requests
   4 import errno
   5 import logging
   6 from teuthology.exceptions import CommandFailedError
   7
   8 from tasks.mgr.mgr_test_case import MgrTestCase
   9
  10 log = logging.getLogger(__name__)
  11
  12
  13 class TestModuleSelftest(MgrTestCase):
  14     """
  15     That modules with a self-test command can be loaded and execute it
  16     without errors.
  17
  18     This is not a substitute for really testing the modules, but it
  19     is quick and is designed to catch regressions that could occur
  20     if data structures change in a way that breaks how the modules
  21     touch them.
  22     """
  23     MGRS_REQUIRED = 1
  24
  25     def setUp(self):
  26         self.setup_mgrs()
  27
  28     def _selftest_plugin(self, module_name):
  29         self._load_module("selftest")
  30         self._load_module(module_name)
  31
  32         # Execute the module's self_test() method
  33         self.mgr_cluster.mon_manager.raw_cluster_cmd(
  34                 "mgr", "self-test", "module", module_name)
  35
  36     def test_zabbix(self):
  37         # Set these mandatory config fields so that the zabbix module
  38         # won't trigger health/log errors on load/serve.
  39         self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
  40         self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
  41         self._selftest_plugin("zabbix")
  42
  43     def test_prometheus(self):
  44         self._assign_ports("prometheus", "server_port", min_port=8100)
  45         self._selftest_plugin("prometheus")
  46
  47     def test_influx(self):
  48         self._selftest_plugin("influx")
  49
  50     def test_diskprediction_local(self):
  51         self._selftest_plugin("diskprediction_local")
  52
  53     def test_diskprediction_cloud(self):
  54         self._selftest_plugin("diskprediction_cloud")
  55
  56     def test_telegraf(self):
  57         self._selftest_plugin("telegraf")
  58
  59     def test_iostat(self):
  60         self._selftest_plugin("iostat")
  61
  62     def test_devicehealth(self):
  63         self._selftest_plugin("devicehealth")
  64         # Clean up the pool that the module creates, because otherwise
  65         # it's low PG count causes test failures.
  66         pool_name = "device_health_metrics"
  67         self.mgr_cluster.mon_manager.raw_cluster_cmd(
  68                 "osd", "pool", "delete", pool_name, pool_name,
  69                 "--yes-i-really-really-mean-it")
  70
  71     def test_selftest_run(self):
  72         self._load_module("selftest")
  73         self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
  74
  75     def test_telemetry(self):
  76         self._selftest_plugin("telemetry")
  77
  78     def test_crash(self):
  79         self._selftest_plugin("crash")
  80
  81     def test_selftest_config_update(self):
  82         """
  83         That configuration updates are seen by running mgr modules
  84         """
  85         self._load_module("selftest")
  86
  87         def get_value():
  88             return self.mgr_cluster.mon_manager.raw_cluster_cmd(
  89                 "mgr", "self-test", "config", "get", "testkey").strip()
  90
  91         self.assertEqual(get_value(), "None")
  92         self.mgr_cluster.mon_manager.raw_cluster_cmd(
  93             "config", "set", "mgr", "mgr/selftest/testkey", "foo")
  94         self.wait_until_equal(get_value, "foo", timeout=10)
  95
  96         def get_localized_value():
  97             return self.mgr_cluster.mon_manager.raw_cluster_cmd(
  98                 "mgr", "self-test", "config", "get_localized", "testkey").strip()
  99
 100         self.assertEqual(get_localized_value(), "foo")
 101         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 102             "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
 103                 self.mgr_cluster.get_active_id()),
 104             "bar")
 105         self.wait_until_equal(get_localized_value, "bar", timeout=10)
 106
 107     def test_selftest_config_upgrade(self):
 108         """
 109         That pre-mimic config-key config settings are migrated into
 110         mimic-style config settings and visible from mgr modules.
 111         """
 112         self._load_module("selftest")
 113
 114         def get_value():
 115             return self.mgr_cluster.mon_manager.raw_cluster_cmd(
 116                     "mgr", "self-test", "config", "get", "testkey").strip()
 117
 118         def get_config():
 119             lines = self.mgr_cluster.mon_manager.raw_cluster_cmd(
 120                     "config", "dump")\
 121                             .strip().split("\n")
 122             result = []
 123             for line in lines[1:]:
 124                 tokens = line.strip().split()
 125                 log.info("tokens: {0}".format(tokens))
 126                 subsys, key, value = tokens[0], tokens[2], tokens[3]
 127                 result.append((subsys, key, value))
 128
 129             return result
 130
 131         # Stop ceph-mgr while we synthetically create a pre-mimic
 132         # configuration scenario
 133         for mgr_id in self.mgr_cluster.mgr_daemons.keys():
 134             self.mgr_cluster.mgr_stop(mgr_id)
 135             self.mgr_cluster.mgr_fail(mgr_id)
 136
 137         # Blow away any modern-style mgr module config options
 138         # (the ceph-mgr implementation may only do the upgrade if
 139         #  it doesn't see new style options)
 140         stash = []
 141         for subsys, key, value in get_config():
 142             if subsys == "mgr" and key.startswith("mgr/"):
 143                 log.info("Removing config key {0} ahead of upgrade".format(
 144                     key))
 145                 self.mgr_cluster.mon_manager.raw_cluster_cmd(
 146                         "config", "rm", subsys, key)
 147                 stash.append((subsys, key, value))
 148
 149         # Inject an old-style configuration setting in config-key
 150         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 151                 "config-key", "set", "mgr/selftest/testkey", "testvalue")
 152
 153         # Inject configuration settings that looks data-ish and should
 154         # not be migrated to a config key
 155         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 156                 "config-key", "set", "mgr/selftest/testnewline", "foo\nbar")
 157
 158         # Inject configuration setting that does not appear in the
 159         # module's config schema
 160         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 161                 "config-key", "set", "mgr/selftest/kvitem", "foo\nbar")
 162
 163         # Bring mgr daemons back online, the one that goes active
 164         # should be doing the upgrade.
 165         for mgr_id in self.mgr_cluster.mgr_daemons.keys():
 166             self.mgr_cluster.mgr_restart(mgr_id)
 167
 168         # Wait for a new active
 169         self.wait_until_true(
 170                 lambda: self.mgr_cluster.get_active_id() != "", timeout=30)
 171
 172         # Check that the selftest module sees the upgraded value
 173         self.assertEqual(get_value(), "testvalue")
 174
 175         # Check that the upgraded value is visible in the configuration
 176         seen_keys = [k for s,k,v in get_config()]
 177         self.assertIn("mgr/selftest/testkey", seen_keys)
 178
 179         # ...and that the non-config-looking one isn't
 180         self.assertNotIn("mgr/selftest/testnewline", seen_keys)
 181
 182         # ...and that the not-in-schema one isn't
 183         self.assertNotIn("mgr/selftest/kvitem", seen_keys)
 184
 185         # Restore previous configuration
 186         for subsys, key, value in stash:
 187             self.mgr_cluster.mon_manager.raw_cluster_cmd(
 188                     "config", "set", subsys, key, value)
 189
 190     def test_selftest_command_spam(self):
 191         # Use the selftest module to stress the mgr daemon
 192         self._load_module("selftest")
 193
 194         # Use the dashboard to test that the mgr is still able to do its job
 195         self._assign_ports("dashboard", "ssl_server_port")
 196         self._load_module("dashboard")
 197         self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
 198                                                      "create-self-signed-cert")
 199
 200         original_active = self.mgr_cluster.get_active_id()
 201         original_standbys = self.mgr_cluster.get_standby_ids()
 202
 203         self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
 204                                                      "background", "start",
 205                                                      "command_spam")
 206
 207         dashboard_uri = self._get_uri("dashboard")
 208
 209         delay = 10
 210         periods = 10
 211         for i in range(0, periods):
 212             t1 = time.time()
 213             # Check that an HTTP module remains responsive
 214             r = requests.get(dashboard_uri, verify=False)
 215             self.assertEqual(r.status_code, 200)
 216
 217             # Check that a native non-module command remains responsive
 218             self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
 219
 220             time.sleep(delay - (time.time() - t1))
 221
 222         self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
 223                                                      "background", "stop")
 224
 225         # Check that all mgr daemons are still running
 226         self.assertEqual(original_active, self.mgr_cluster.get_active_id())
 227         self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
 228
 229     def test_module_commands(self):
 230         """
 231         That module-handled commands have appropriate  behavior on
 232         disabled/failed/recently-enabled modules.
 233         """
 234
 235         # Calling a command on a disabled module should return the proper
 236         # error code.
 237         self._load_module("selftest")
 238         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 239             "mgr", "module", "disable", "selftest")
 240         with self.assertRaises(CommandFailedError) as exc_raised:
 241             self.mgr_cluster.mon_manager.raw_cluster_cmd(
 242                 "mgr", "self-test", "run")
 243
 244         self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
 245
 246         # Calling a command that really doesn't exist should give me EINVAL.
 247         with self.assertRaises(CommandFailedError) as exc_raised:
 248             self.mgr_cluster.mon_manager.raw_cluster_cmd(
 249                 "osd", "albatross")
 250
 251         self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
 252
 253         # Enabling a module and then immediately using ones of its commands
 254         # should work (#21683)
 255         self._load_module("selftest")
 256         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 257             "mgr", "self-test", "config", "get", "testkey")
 258
 259         # Calling a command for a failed module should return the proper
 260         # error code.
 261         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 262             "mgr", "self-test", "background", "start", "throw_exception")
 263         with self.assertRaises(CommandFailedError) as exc_raised:
 264             self.mgr_cluster.mon_manager.raw_cluster_cmd(
 265                 "mgr", "self-test", "run"
 266             )
 267         self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
 268
 269         # A health alert should be raised for a module that has thrown
 270         # an exception from its serve() method
 271         self.wait_for_health(
 272             "Module 'selftest' has failed: Synthetic exception in serve",
 273             timeout=30)
 274
 275         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 276             "mgr", "module", "disable", "selftest")
 277
 278         self.wait_for_health_clear(timeout=30)
 279
 280     def test_module_remote(self):
 281         """
 282         Use the selftest module to exercise inter-module communication
 283         """
 284         self._load_module("selftest")
 285         # The "self-test remote" operation just happens to call into
 286         # influx.
 287         self._load_module("influx")
 288
 289         self.mgr_cluster.mon_manager.raw_cluster_cmd(
 290             "mgr", "self-test", "remote")
 291
 292     def test_selftest_cluster_log(self):
 293         """
 294         Use the selftest module to test the cluster/audit log interface.
 295         """
 296         priority_map = {
 297             "info": "INF",
 298             "security": "SEC",
 299             "warning": "WRN",
 300             "error": "ERR"
 301         }
 302         self._load_module("selftest")
 303         for priority in priority_map.keys():
 304             message = "foo bar {}".format(priority)
 305             log_message = "[{}] {}".format(priority_map[priority], message)
 306             # Check for cluster/audit logs:
 307             # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
 308             # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
 309             # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
 310             # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
 311             with self.assert_cluster_log(log_message):
 312                 self.mgr_cluster.mon_manager.raw_cluster_cmd(
 313                     "mgr", "self-test", "cluster-log", "cluster",
 314                     priority, message)
 315             with self.assert_cluster_log(log_message, watch_channel="audit"):
 316                 self.mgr_cluster.mon_manager.raw_cluster_cmd(
 317                     "mgr", "self-test", "cluster-log", "audit",
 318                     priority, message)
 319
 320     def test_selftest_cluster_log_unknown_channel(self):
 321         """
 322         Use the selftest module to test the cluster/audit log interface.
 323         """
 324         with self.assertRaises(CommandFailedError) as exc_raised:
 325             self.mgr_cluster.mon_manager.raw_cluster_cmd(
 326                 "mgr", "self-test", "cluster-log", "xyz",
 327                 "ERR", "The channel does not exist")
 328         self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)