]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mgr/test_module_selftest.py
import ceph 15.2.14
[ceph.git] / ceph / qa / tasks / mgr / test_module_selftest.py
CommitLineData
3efd9988
FG
1
2import time
3import requests
11fdf7f2
TL
4import errno
5import logging
6d8e3169 6
11fdf7f2 7from teuthology.exceptions import CommandFailedError
3efd9988
FG
8
9from tasks.mgr.mgr_test_case import MgrTestCase
10
11fdf7f2
TL
11log = logging.getLogger(__name__)
12
3efd9988
FG
13
14class TestModuleSelftest(MgrTestCase):
15 """
16 That modules with a self-test command can be loaded and execute it
17 without errors.
18
19 This is not a substitute for really testing the modules, but it
20 is quick and is designed to catch regressions that could occur
21 if data structures change in a way that breaks how the modules
22 touch them.
23 """
24 MGRS_REQUIRED = 1
25
11fdf7f2 26 def setUp(self):
9f95a23c 27 super(TestModuleSelftest, self).setUp()
11fdf7f2
TL
28 self.setup_mgrs()
29
3efd9988 30 def _selftest_plugin(self, module_name):
11fdf7f2 31 self._load_module("selftest")
3efd9988
FG
32 self._load_module(module_name)
33
11fdf7f2
TL
34 # Execute the module's self_test() method
35 self.mgr_cluster.mon_manager.raw_cluster_cmd(
36 "mgr", "self-test", "module", module_name)
3efd9988
FG
37
38 def test_zabbix(self):
b32b8144
FG
39 # Set these mandatory config fields so that the zabbix module
40 # won't trigger health/log errors on load/serve.
41 self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
42 self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
3efd9988
FG
43 self._selftest_plugin("zabbix")
44
45 def test_prometheus(self):
94b18763 46 self._assign_ports("prometheus", "server_port", min_port=8100)
3efd9988
FG
47 self._selftest_plugin("prometheus")
48
49 def test_influx(self):
50 self._selftest_plugin("influx")
51
11fdf7f2 52 def test_diskprediction_local(self):
6d8e3169
FG
53 self._load_module("selftest")
54 python_version = self.mgr_cluster.mon_manager.raw_cluster_cmd(
55 "mgr", "self-test", "python-version")
56 if tuple(int(v) for v in python_version.split('.')) >= (3, 8):
57 # https://tracker.ceph.com/issues/45147
58 self.skipTest(f'python {python_version} not compatible with '
59 'diskprediction_local')
11fdf7f2
TL
60 self._selftest_plugin("diskprediction_local")
61
9f95a23c
TL
62 # Not included in qa/packages/packages.yaml
63 #def test_diskprediction_cloud(self):
64 # self._selftest_plugin("diskprediction_cloud")
11fdf7f2
TL
65
66 def test_telegraf(self):
67 self._selftest_plugin("telegraf")
68
69 def test_iostat(self):
70 self._selftest_plugin("iostat")
71
72 def test_devicehealth(self):
73 self._selftest_plugin("devicehealth")
74 # Clean up the pool that the module creates, because otherwise
75 # it's low PG count causes test failures.
76 pool_name = "device_health_metrics"
77 self.mgr_cluster.mon_manager.raw_cluster_cmd(
78 "osd", "pool", "delete", pool_name, pool_name,
79 "--yes-i-really-really-mean-it")
80
3efd9988
FG
81 def test_selftest_run(self):
82 self._load_module("selftest")
83 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
84
11fdf7f2
TL
85 def test_telemetry(self):
86 self._selftest_plugin("telemetry")
87
88 def test_crash(self):
89 self._selftest_plugin("crash")
90
9f95a23c
TL
91 def test_orchestrator(self):
92 self._selftest_plugin("orchestrator")
eafe8130
TL
93
94
11fdf7f2
TL
95 def test_selftest_config_update(self):
96 """
97 That configuration updates are seen by running mgr modules
98 """
99 self._load_module("selftest")
100
101 def get_value():
102 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
103 "mgr", "self-test", "config", "get", "testkey").strip()
104
105 self.assertEqual(get_value(), "None")
106 self.mgr_cluster.mon_manager.raw_cluster_cmd(
107 "config", "set", "mgr", "mgr/selftest/testkey", "foo")
108 self.wait_until_equal(get_value, "foo", timeout=10)
109
110 def get_localized_value():
111 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
112 "mgr", "self-test", "config", "get_localized", "testkey").strip()
113
114 self.assertEqual(get_localized_value(), "foo")
115 self.mgr_cluster.mon_manager.raw_cluster_cmd(
116 "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
117 self.mgr_cluster.get_active_id()),
118 "bar")
119 self.wait_until_equal(get_localized_value, "bar", timeout=10)
120
121 def test_selftest_config_upgrade(self):
122 """
123 That pre-mimic config-key config settings are migrated into
124 mimic-style config settings and visible from mgr modules.
125 """
126 self._load_module("selftest")
127
128 def get_value():
129 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
130 "mgr", "self-test", "config", "get", "testkey").strip()
131
132 def get_config():
133 lines = self.mgr_cluster.mon_manager.raw_cluster_cmd(
134 "config", "dump")\
135 .strip().split("\n")
136 result = []
137 for line in lines[1:]:
138 tokens = line.strip().split()
139 log.info("tokens: {0}".format(tokens))
140 subsys, key, value = tokens[0], tokens[2], tokens[3]
141 result.append((subsys, key, value))
142
143 return result
144
145 # Stop ceph-mgr while we synthetically create a pre-mimic
146 # configuration scenario
147 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
148 self.mgr_cluster.mgr_stop(mgr_id)
149 self.mgr_cluster.mgr_fail(mgr_id)
150
151 # Blow away any modern-style mgr module config options
152 # (the ceph-mgr implementation may only do the upgrade if
153 # it doesn't see new style options)
154 stash = []
155 for subsys, key, value in get_config():
156 if subsys == "mgr" and key.startswith("mgr/"):
157 log.info("Removing config key {0} ahead of upgrade".format(
158 key))
159 self.mgr_cluster.mon_manager.raw_cluster_cmd(
160 "config", "rm", subsys, key)
161 stash.append((subsys, key, value))
162
163 # Inject an old-style configuration setting in config-key
164 self.mgr_cluster.mon_manager.raw_cluster_cmd(
165 "config-key", "set", "mgr/selftest/testkey", "testvalue")
166
167 # Inject configuration settings that looks data-ish and should
168 # not be migrated to a config key
169 self.mgr_cluster.mon_manager.raw_cluster_cmd(
170 "config-key", "set", "mgr/selftest/testnewline", "foo\nbar")
171
172 # Inject configuration setting that does not appear in the
173 # module's config schema
174 self.mgr_cluster.mon_manager.raw_cluster_cmd(
175 "config-key", "set", "mgr/selftest/kvitem", "foo\nbar")
176
177 # Bring mgr daemons back online, the one that goes active
178 # should be doing the upgrade.
179 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
180 self.mgr_cluster.mgr_restart(mgr_id)
181
182 # Wait for a new active
183 self.wait_until_true(
184 lambda: self.mgr_cluster.get_active_id() != "", timeout=30)
185
186 # Check that the selftest module sees the upgraded value
187 self.assertEqual(get_value(), "testvalue")
188
189 # Check that the upgraded value is visible in the configuration
190 seen_keys = [k for s,k,v in get_config()]
191 self.assertIn("mgr/selftest/testkey", seen_keys)
192
193 # ...and that the non-config-looking one isn't
194 self.assertNotIn("mgr/selftest/testnewline", seen_keys)
195
196 # ...and that the not-in-schema one isn't
197 self.assertNotIn("mgr/selftest/kvitem", seen_keys)
198
199 # Restore previous configuration
200 for subsys, key, value in stash:
201 self.mgr_cluster.mon_manager.raw_cluster_cmd(
202 "config", "set", subsys, key, value)
203
3efd9988
FG
204 def test_selftest_command_spam(self):
205 # Use the selftest module to stress the mgr daemon
206 self._load_module("selftest")
207
208 # Use the dashboard to test that the mgr is still able to do its job
11fdf7f2 209 self._assign_ports("dashboard", "ssl_server_port")
3efd9988 210 self._load_module("dashboard")
11fdf7f2
TL
211 self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
212 "create-self-signed-cert")
3efd9988
FG
213
214 original_active = self.mgr_cluster.get_active_id()
215 original_standbys = self.mgr_cluster.get_standby_ids()
216
217 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
218 "background", "start",
219 "command_spam")
220
221 dashboard_uri = self._get_uri("dashboard")
222
223 delay = 10
224 periods = 10
225 for i in range(0, periods):
226 t1 = time.time()
227 # Check that an HTTP module remains responsive
11fdf7f2 228 r = requests.get(dashboard_uri, verify=False)
3efd9988
FG
229 self.assertEqual(r.status_code, 200)
230
231 # Check that a native non-module command remains responsive
232 self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
233
234 time.sleep(delay - (time.time() - t1))
235
236 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
237 "background", "stop")
238
239 # Check that all mgr daemons are still running
240 self.assertEqual(original_active, self.mgr_cluster.get_active_id())
241 self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
11fdf7f2
TL
242
243 def test_module_commands(self):
244 """
245 That module-handled commands have appropriate behavior on
246 disabled/failed/recently-enabled modules.
247 """
248
249 # Calling a command on a disabled module should return the proper
250 # error code.
251 self._load_module("selftest")
252 self.mgr_cluster.mon_manager.raw_cluster_cmd(
253 "mgr", "module", "disable", "selftest")
254 with self.assertRaises(CommandFailedError) as exc_raised:
255 self.mgr_cluster.mon_manager.raw_cluster_cmd(
256 "mgr", "self-test", "run")
257
258 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
259
260 # Calling a command that really doesn't exist should give me EINVAL.
261 with self.assertRaises(CommandFailedError) as exc_raised:
262 self.mgr_cluster.mon_manager.raw_cluster_cmd(
263 "osd", "albatross")
264
265 self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
266
267 # Enabling a module and then immediately using ones of its commands
268 # should work (#21683)
269 self._load_module("selftest")
270 self.mgr_cluster.mon_manager.raw_cluster_cmd(
271 "mgr", "self-test", "config", "get", "testkey")
272
273 # Calling a command for a failed module should return the proper
274 # error code.
275 self.mgr_cluster.mon_manager.raw_cluster_cmd(
276 "mgr", "self-test", "background", "start", "throw_exception")
277 with self.assertRaises(CommandFailedError) as exc_raised:
278 self.mgr_cluster.mon_manager.raw_cluster_cmd(
279 "mgr", "self-test", "run"
280 )
281 self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
282
283 # A health alert should be raised for a module that has thrown
284 # an exception from its serve() method
285 self.wait_for_health(
286 "Module 'selftest' has failed: Synthetic exception in serve",
287 timeout=30)
288
289 self.mgr_cluster.mon_manager.raw_cluster_cmd(
290 "mgr", "module", "disable", "selftest")
291
292 self.wait_for_health_clear(timeout=30)
293
294 def test_module_remote(self):
295 """
296 Use the selftest module to exercise inter-module communication
297 """
298 self._load_module("selftest")
299 # The "self-test remote" operation just happens to call into
300 # influx.
301 self._load_module("influx")
302
303 self.mgr_cluster.mon_manager.raw_cluster_cmd(
304 "mgr", "self-test", "remote")
305
306 def test_selftest_cluster_log(self):
307 """
308 Use the selftest module to test the cluster/audit log interface.
309 """
310 priority_map = {
311 "info": "INF",
312 "security": "SEC",
313 "warning": "WRN",
314 "error": "ERR"
315 }
316 self._load_module("selftest")
317 for priority in priority_map.keys():
318 message = "foo bar {}".format(priority)
319 log_message = "[{}] {}".format(priority_map[priority], message)
320 # Check for cluster/audit logs:
321 # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
322 # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
323 # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
324 # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
325 with self.assert_cluster_log(log_message):
326 self.mgr_cluster.mon_manager.raw_cluster_cmd(
327 "mgr", "self-test", "cluster-log", "cluster",
328 priority, message)
329 with self.assert_cluster_log(log_message, watch_channel="audit"):
330 self.mgr_cluster.mon_manager.raw_cluster_cmd(
331 "mgr", "self-test", "cluster-log", "audit",
332 priority, message)
333
334 def test_selftest_cluster_log_unknown_channel(self):
335 """
336 Use the selftest module to test the cluster/audit log interface.
337 """
338 with self.assertRaises(CommandFailedError) as exc_raised:
339 self.mgr_cluster.mon_manager.raw_cluster_cmd(
340 "mgr", "self-test", "cluster-log", "xyz",
341 "ERR", "The channel does not exist")
342 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)