]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mgr/test_module_selftest.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / mgr / test_module_selftest.py
CommitLineData
3efd9988
FG
1
2import time
3import requests
11fdf7f2
TL
4import errno
5import logging
6from teuthology.exceptions import CommandFailedError
3efd9988
FG
7
8from tasks.mgr.mgr_test_case import MgrTestCase
9
11fdf7f2
TL
10log = logging.getLogger(__name__)
11
3efd9988
FG
12
13class TestModuleSelftest(MgrTestCase):
14 """
15 That modules with a self-test command can be loaded and execute it
16 without errors.
17
18 This is not a substitute for really testing the modules, but it
19 is quick and is designed to catch regressions that could occur
20 if data structures change in a way that breaks how the modules
21 touch them.
22 """
23 MGRS_REQUIRED = 1
24
11fdf7f2 25 def setUp(self):
9f95a23c 26 super(TestModuleSelftest, self).setUp()
11fdf7f2
TL
27 self.setup_mgrs()
28
3efd9988 29 def _selftest_plugin(self, module_name):
11fdf7f2 30 self._load_module("selftest")
3efd9988
FG
31 self._load_module(module_name)
32
11fdf7f2
TL
33 # Execute the module's self_test() method
34 self.mgr_cluster.mon_manager.raw_cluster_cmd(
35 "mgr", "self-test", "module", module_name)
3efd9988
FG
36
37 def test_zabbix(self):
b32b8144
FG
38 # Set these mandatory config fields so that the zabbix module
39 # won't trigger health/log errors on load/serve.
40 self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
41 self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
3efd9988
FG
42 self._selftest_plugin("zabbix")
43
44 def test_prometheus(self):
94b18763 45 self._assign_ports("prometheus", "server_port", min_port=8100)
3efd9988
FG
46 self._selftest_plugin("prometheus")
47
48 def test_influx(self):
49 self._selftest_plugin("influx")
50
11fdf7f2
TL
51 def test_diskprediction_local(self):
52 self._selftest_plugin("diskprediction_local")
53
9f95a23c
TL
54 # Not included in qa/packages/packages.yaml
55 #def test_diskprediction_cloud(self):
56 # self._selftest_plugin("diskprediction_cloud")
11fdf7f2
TL
57
58 def test_telegraf(self):
59 self._selftest_plugin("telegraf")
60
61 def test_iostat(self):
62 self._selftest_plugin("iostat")
63
64 def test_devicehealth(self):
65 self._selftest_plugin("devicehealth")
66 # Clean up the pool that the module creates, because otherwise
67 # it's low PG count causes test failures.
68 pool_name = "device_health_metrics"
69 self.mgr_cluster.mon_manager.raw_cluster_cmd(
70 "osd", "pool", "delete", pool_name, pool_name,
71 "--yes-i-really-really-mean-it")
72
3efd9988
FG
73 def test_selftest_run(self):
74 self._load_module("selftest")
75 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
76
11fdf7f2
TL
77 def test_telemetry(self):
78 self._selftest_plugin("telemetry")
79
80 def test_crash(self):
81 self._selftest_plugin("crash")
82
9f95a23c
TL
83 def test_orchestrator(self):
84 self._selftest_plugin("orchestrator")
eafe8130
TL
85
86
11fdf7f2
TL
87 def test_selftest_config_update(self):
88 """
89 That configuration updates are seen by running mgr modules
90 """
91 self._load_module("selftest")
92
93 def get_value():
94 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
95 "mgr", "self-test", "config", "get", "testkey").strip()
96
97 self.assertEqual(get_value(), "None")
98 self.mgr_cluster.mon_manager.raw_cluster_cmd(
99 "config", "set", "mgr", "mgr/selftest/testkey", "foo")
100 self.wait_until_equal(get_value, "foo", timeout=10)
101
102 def get_localized_value():
103 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
104 "mgr", "self-test", "config", "get_localized", "testkey").strip()
105
106 self.assertEqual(get_localized_value(), "foo")
107 self.mgr_cluster.mon_manager.raw_cluster_cmd(
108 "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
109 self.mgr_cluster.get_active_id()),
110 "bar")
111 self.wait_until_equal(get_localized_value, "bar", timeout=10)
112
113 def test_selftest_config_upgrade(self):
114 """
115 That pre-mimic config-key config settings are migrated into
116 mimic-style config settings and visible from mgr modules.
117 """
118 self._load_module("selftest")
119
120 def get_value():
121 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
122 "mgr", "self-test", "config", "get", "testkey").strip()
123
124 def get_config():
125 lines = self.mgr_cluster.mon_manager.raw_cluster_cmd(
126 "config", "dump")\
127 .strip().split("\n")
128 result = []
129 for line in lines[1:]:
130 tokens = line.strip().split()
131 log.info("tokens: {0}".format(tokens))
132 subsys, key, value = tokens[0], tokens[2], tokens[3]
133 result.append((subsys, key, value))
134
135 return result
136
137 # Stop ceph-mgr while we synthetically create a pre-mimic
138 # configuration scenario
139 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
140 self.mgr_cluster.mgr_stop(mgr_id)
141 self.mgr_cluster.mgr_fail(mgr_id)
142
143 # Blow away any modern-style mgr module config options
144 # (the ceph-mgr implementation may only do the upgrade if
145 # it doesn't see new style options)
146 stash = []
147 for subsys, key, value in get_config():
148 if subsys == "mgr" and key.startswith("mgr/"):
149 log.info("Removing config key {0} ahead of upgrade".format(
150 key))
151 self.mgr_cluster.mon_manager.raw_cluster_cmd(
152 "config", "rm", subsys, key)
153 stash.append((subsys, key, value))
154
155 # Inject an old-style configuration setting in config-key
156 self.mgr_cluster.mon_manager.raw_cluster_cmd(
157 "config-key", "set", "mgr/selftest/testkey", "testvalue")
158
159 # Inject configuration settings that looks data-ish and should
160 # not be migrated to a config key
161 self.mgr_cluster.mon_manager.raw_cluster_cmd(
162 "config-key", "set", "mgr/selftest/testnewline", "foo\nbar")
163
164 # Inject configuration setting that does not appear in the
165 # module's config schema
166 self.mgr_cluster.mon_manager.raw_cluster_cmd(
167 "config-key", "set", "mgr/selftest/kvitem", "foo\nbar")
168
169 # Bring mgr daemons back online, the one that goes active
170 # should be doing the upgrade.
171 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
172 self.mgr_cluster.mgr_restart(mgr_id)
173
174 # Wait for a new active
175 self.wait_until_true(
176 lambda: self.mgr_cluster.get_active_id() != "", timeout=30)
177
178 # Check that the selftest module sees the upgraded value
179 self.assertEqual(get_value(), "testvalue")
180
181 # Check that the upgraded value is visible in the configuration
182 seen_keys = [k for s,k,v in get_config()]
183 self.assertIn("mgr/selftest/testkey", seen_keys)
184
185 # ...and that the non-config-looking one isn't
186 self.assertNotIn("mgr/selftest/testnewline", seen_keys)
187
188 # ...and that the not-in-schema one isn't
189 self.assertNotIn("mgr/selftest/kvitem", seen_keys)
190
191 # Restore previous configuration
192 for subsys, key, value in stash:
193 self.mgr_cluster.mon_manager.raw_cluster_cmd(
194 "config", "set", subsys, key, value)
195
3efd9988
FG
196 def test_selftest_command_spam(self):
197 # Use the selftest module to stress the mgr daemon
198 self._load_module("selftest")
199
200 # Use the dashboard to test that the mgr is still able to do its job
11fdf7f2 201 self._assign_ports("dashboard", "ssl_server_port")
3efd9988 202 self._load_module("dashboard")
11fdf7f2
TL
203 self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
204 "create-self-signed-cert")
3efd9988
FG
205
206 original_active = self.mgr_cluster.get_active_id()
207 original_standbys = self.mgr_cluster.get_standby_ids()
208
209 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
210 "background", "start",
211 "command_spam")
212
213 dashboard_uri = self._get_uri("dashboard")
214
215 delay = 10
216 periods = 10
217 for i in range(0, periods):
218 t1 = time.time()
219 # Check that an HTTP module remains responsive
11fdf7f2 220 r = requests.get(dashboard_uri, verify=False)
3efd9988
FG
221 self.assertEqual(r.status_code, 200)
222
223 # Check that a native non-module command remains responsive
224 self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
225
226 time.sleep(delay - (time.time() - t1))
227
228 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
229 "background", "stop")
230
231 # Check that all mgr daemons are still running
232 self.assertEqual(original_active, self.mgr_cluster.get_active_id())
233 self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
11fdf7f2
TL
234
235 def test_module_commands(self):
236 """
237 That module-handled commands have appropriate behavior on
238 disabled/failed/recently-enabled modules.
239 """
240
241 # Calling a command on a disabled module should return the proper
242 # error code.
243 self._load_module("selftest")
244 self.mgr_cluster.mon_manager.raw_cluster_cmd(
245 "mgr", "module", "disable", "selftest")
246 with self.assertRaises(CommandFailedError) as exc_raised:
247 self.mgr_cluster.mon_manager.raw_cluster_cmd(
248 "mgr", "self-test", "run")
249
250 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
251
252 # Calling a command that really doesn't exist should give me EINVAL.
253 with self.assertRaises(CommandFailedError) as exc_raised:
254 self.mgr_cluster.mon_manager.raw_cluster_cmd(
255 "osd", "albatross")
256
257 self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
258
259 # Enabling a module and then immediately using ones of its commands
260 # should work (#21683)
261 self._load_module("selftest")
262 self.mgr_cluster.mon_manager.raw_cluster_cmd(
263 "mgr", "self-test", "config", "get", "testkey")
264
265 # Calling a command for a failed module should return the proper
266 # error code.
267 self.mgr_cluster.mon_manager.raw_cluster_cmd(
268 "mgr", "self-test", "background", "start", "throw_exception")
269 with self.assertRaises(CommandFailedError) as exc_raised:
270 self.mgr_cluster.mon_manager.raw_cluster_cmd(
271 "mgr", "self-test", "run"
272 )
273 self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
274
275 # A health alert should be raised for a module that has thrown
276 # an exception from its serve() method
277 self.wait_for_health(
278 "Module 'selftest' has failed: Synthetic exception in serve",
279 timeout=30)
280
281 self.mgr_cluster.mon_manager.raw_cluster_cmd(
282 "mgr", "module", "disable", "selftest")
283
284 self.wait_for_health_clear(timeout=30)
285
286 def test_module_remote(self):
287 """
288 Use the selftest module to exercise inter-module communication
289 """
290 self._load_module("selftest")
291 # The "self-test remote" operation just happens to call into
292 # influx.
293 self._load_module("influx")
294
295 self.mgr_cluster.mon_manager.raw_cluster_cmd(
296 "mgr", "self-test", "remote")
297
298 def test_selftest_cluster_log(self):
299 """
300 Use the selftest module to test the cluster/audit log interface.
301 """
302 priority_map = {
303 "info": "INF",
304 "security": "SEC",
305 "warning": "WRN",
306 "error": "ERR"
307 }
308 self._load_module("selftest")
309 for priority in priority_map.keys():
310 message = "foo bar {}".format(priority)
311 log_message = "[{}] {}".format(priority_map[priority], message)
312 # Check for cluster/audit logs:
313 # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
314 # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
315 # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
316 # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
317 with self.assert_cluster_log(log_message):
318 self.mgr_cluster.mon_manager.raw_cluster_cmd(
319 "mgr", "self-test", "cluster-log", "cluster",
320 priority, message)
321 with self.assert_cluster_log(log_message, watch_channel="audit"):
322 self.mgr_cluster.mon_manager.raw_cluster_cmd(
323 "mgr", "self-test", "cluster-log", "audit",
324 priority, message)
325
326 def test_selftest_cluster_log_unknown_channel(self):
327 """
328 Use the selftest module to test the cluster/audit log interface.
329 """
330 with self.assertRaises(CommandFailedError) as exc_raised:
331 self.mgr_cluster.mon_manager.raw_cluster_cmd(
332 "mgr", "self-test", "cluster-log", "xyz",
333 "ERR", "The channel does not exist")
334 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)