]>
Commit | Line | Data |
---|---|---|
3efd9988 FG |
1 | |
2 | import time | |
3 | import requests | |
11fdf7f2 TL |
4 | import errno |
5 | import logging | |
6d8e3169 | 6 | |
11fdf7f2 | 7 | from teuthology.exceptions import CommandFailedError |
3efd9988 FG |
8 | |
9 | from tasks.mgr.mgr_test_case import MgrTestCase | |
10 | ||
11fdf7f2 TL |
11 | log = logging.getLogger(__name__) |
12 | ||
3efd9988 FG |
13 | |
14 | class TestModuleSelftest(MgrTestCase): | |
15 | """ | |
16 | That modules with a self-test command can be loaded and execute it | |
17 | without errors. | |
18 | ||
19 | This is not a substitute for really testing the modules, but it | |
20 | is quick and is designed to catch regressions that could occur | |
21 | if data structures change in a way that breaks how the modules | |
22 | touch them. | |
23 | """ | |
24 | MGRS_REQUIRED = 1 | |
25 | ||
11fdf7f2 | 26 | def setUp(self): |
9f95a23c | 27 | super(TestModuleSelftest, self).setUp() |
11fdf7f2 TL |
28 | self.setup_mgrs() |
29 | ||
3efd9988 | 30 | def _selftest_plugin(self, module_name): |
11fdf7f2 | 31 | self._load_module("selftest") |
3efd9988 FG |
32 | self._load_module(module_name) |
33 | ||
11fdf7f2 TL |
34 | # Execute the module's self_test() method |
35 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
36 | "mgr", "self-test", "module", module_name) | |
3efd9988 FG |
37 | |
38 | def test_zabbix(self): | |
b32b8144 FG |
39 | # Set these mandatory config fields so that the zabbix module |
40 | # won't trigger health/log errors on load/serve. | |
41 | self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost") | |
42 | self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo") | |
3efd9988 FG |
43 | self._selftest_plugin("zabbix") |
44 | ||
45 | def test_prometheus(self): | |
94b18763 | 46 | self._assign_ports("prometheus", "server_port", min_port=8100) |
3efd9988 FG |
47 | self._selftest_plugin("prometheus") |
48 | ||
49 | def test_influx(self): | |
50 | self._selftest_plugin("influx") | |
51 | ||
11fdf7f2 | 52 | def test_diskprediction_local(self): |
6d8e3169 FG |
53 | self._load_module("selftest") |
54 | python_version = self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
55 | "mgr", "self-test", "python-version") | |
56 | if tuple(int(v) for v in python_version.split('.')) >= (3, 8): | |
57 | # https://tracker.ceph.com/issues/45147 | |
58 | self.skipTest(f'python {python_version} not compatible with ' | |
59 | 'diskprediction_local') | |
11fdf7f2 TL |
60 | self._selftest_plugin("diskprediction_local") |
61 | ||
9f95a23c TL |
62 | # Not included in qa/packages/packages.yaml |
63 | #def test_diskprediction_cloud(self): | |
64 | # self._selftest_plugin("diskprediction_cloud") | |
11fdf7f2 TL |
65 | |
66 | def test_telegraf(self): | |
67 | self._selftest_plugin("telegraf") | |
68 | ||
69 | def test_iostat(self): | |
70 | self._selftest_plugin("iostat") | |
71 | ||
72 | def test_devicehealth(self): | |
73 | self._selftest_plugin("devicehealth") | |
74 | # Clean up the pool that the module creates, because otherwise | |
75 | # it's low PG count causes test failures. | |
76 | pool_name = "device_health_metrics" | |
77 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
78 | "osd", "pool", "delete", pool_name, pool_name, | |
79 | "--yes-i-really-really-mean-it") | |
80 | ||
3efd9988 FG |
81 | def test_selftest_run(self): |
82 | self._load_module("selftest") | |
83 | self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run") | |
84 | ||
11fdf7f2 TL |
85 | def test_telemetry(self): |
86 | self._selftest_plugin("telemetry") | |
87 | ||
88 | def test_crash(self): | |
89 | self._selftest_plugin("crash") | |
90 | ||
9f95a23c TL |
91 | def test_orchestrator(self): |
92 | self._selftest_plugin("orchestrator") | |
eafe8130 TL |
93 | |
94 | ||
11fdf7f2 TL |
95 | def test_selftest_config_update(self): |
96 | """ | |
97 | That configuration updates are seen by running mgr modules | |
98 | """ | |
99 | self._load_module("selftest") | |
100 | ||
101 | def get_value(): | |
102 | return self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
103 | "mgr", "self-test", "config", "get", "testkey").strip() | |
104 | ||
105 | self.assertEqual(get_value(), "None") | |
106 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
107 | "config", "set", "mgr", "mgr/selftest/testkey", "foo") | |
108 | self.wait_until_equal(get_value, "foo", timeout=10) | |
109 | ||
110 | def get_localized_value(): | |
111 | return self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
112 | "mgr", "self-test", "config", "get_localized", "testkey").strip() | |
113 | ||
114 | self.assertEqual(get_localized_value(), "foo") | |
115 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
116 | "config", "set", "mgr", "mgr/selftest/{}/testkey".format( | |
117 | self.mgr_cluster.get_active_id()), | |
118 | "bar") | |
119 | self.wait_until_equal(get_localized_value, "bar", timeout=10) | |
120 | ||
121 | def test_selftest_config_upgrade(self): | |
122 | """ | |
123 | That pre-mimic config-key config settings are migrated into | |
124 | mimic-style config settings and visible from mgr modules. | |
125 | """ | |
126 | self._load_module("selftest") | |
127 | ||
128 | def get_value(): | |
129 | return self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
130 | "mgr", "self-test", "config", "get", "testkey").strip() | |
131 | ||
132 | def get_config(): | |
133 | lines = self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
134 | "config", "dump")\ | |
135 | .strip().split("\n") | |
136 | result = [] | |
137 | for line in lines[1:]: | |
138 | tokens = line.strip().split() | |
139 | log.info("tokens: {0}".format(tokens)) | |
140 | subsys, key, value = tokens[0], tokens[2], tokens[3] | |
141 | result.append((subsys, key, value)) | |
142 | ||
143 | return result | |
144 | ||
145 | # Stop ceph-mgr while we synthetically create a pre-mimic | |
146 | # configuration scenario | |
147 | for mgr_id in self.mgr_cluster.mgr_daemons.keys(): | |
148 | self.mgr_cluster.mgr_stop(mgr_id) | |
149 | self.mgr_cluster.mgr_fail(mgr_id) | |
150 | ||
151 | # Blow away any modern-style mgr module config options | |
152 | # (the ceph-mgr implementation may only do the upgrade if | |
153 | # it doesn't see new style options) | |
154 | stash = [] | |
155 | for subsys, key, value in get_config(): | |
156 | if subsys == "mgr" and key.startswith("mgr/"): | |
157 | log.info("Removing config key {0} ahead of upgrade".format( | |
158 | key)) | |
159 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
160 | "config", "rm", subsys, key) | |
161 | stash.append((subsys, key, value)) | |
162 | ||
163 | # Inject an old-style configuration setting in config-key | |
164 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
165 | "config-key", "set", "mgr/selftest/testkey", "testvalue") | |
166 | ||
167 | # Inject configuration settings that looks data-ish and should | |
168 | # not be migrated to a config key | |
169 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
170 | "config-key", "set", "mgr/selftest/testnewline", "foo\nbar") | |
171 | ||
172 | # Inject configuration setting that does not appear in the | |
173 | # module's config schema | |
174 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
175 | "config-key", "set", "mgr/selftest/kvitem", "foo\nbar") | |
176 | ||
177 | # Bring mgr daemons back online, the one that goes active | |
178 | # should be doing the upgrade. | |
179 | for mgr_id in self.mgr_cluster.mgr_daemons.keys(): | |
180 | self.mgr_cluster.mgr_restart(mgr_id) | |
181 | ||
182 | # Wait for a new active | |
183 | self.wait_until_true( | |
184 | lambda: self.mgr_cluster.get_active_id() != "", timeout=30) | |
185 | ||
186 | # Check that the selftest module sees the upgraded value | |
187 | self.assertEqual(get_value(), "testvalue") | |
188 | ||
189 | # Check that the upgraded value is visible in the configuration | |
190 | seen_keys = [k for s,k,v in get_config()] | |
191 | self.assertIn("mgr/selftest/testkey", seen_keys) | |
192 | ||
193 | # ...and that the non-config-looking one isn't | |
194 | self.assertNotIn("mgr/selftest/testnewline", seen_keys) | |
195 | ||
196 | # ...and that the not-in-schema one isn't | |
197 | self.assertNotIn("mgr/selftest/kvitem", seen_keys) | |
198 | ||
199 | # Restore previous configuration | |
200 | for subsys, key, value in stash: | |
201 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
202 | "config", "set", subsys, key, value) | |
203 | ||
3efd9988 FG |
204 | def test_selftest_command_spam(self): |
205 | # Use the selftest module to stress the mgr daemon | |
206 | self._load_module("selftest") | |
207 | ||
208 | # Use the dashboard to test that the mgr is still able to do its job | |
11fdf7f2 | 209 | self._assign_ports("dashboard", "ssl_server_port") |
3efd9988 | 210 | self._load_module("dashboard") |
11fdf7f2 TL |
211 | self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", |
212 | "create-self-signed-cert") | |
3efd9988 FG |
213 | |
214 | original_active = self.mgr_cluster.get_active_id() | |
215 | original_standbys = self.mgr_cluster.get_standby_ids() | |
216 | ||
217 | self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", | |
218 | "background", "start", | |
219 | "command_spam") | |
220 | ||
221 | dashboard_uri = self._get_uri("dashboard") | |
222 | ||
223 | delay = 10 | |
224 | periods = 10 | |
225 | for i in range(0, periods): | |
226 | t1 = time.time() | |
227 | # Check that an HTTP module remains responsive | |
11fdf7f2 | 228 | r = requests.get(dashboard_uri, verify=False) |
3efd9988 FG |
229 | self.assertEqual(r.status_code, 200) |
230 | ||
231 | # Check that a native non-module command remains responsive | |
232 | self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df") | |
233 | ||
234 | time.sleep(delay - (time.time() - t1)) | |
235 | ||
236 | self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", | |
237 | "background", "stop") | |
238 | ||
239 | # Check that all mgr daemons are still running | |
240 | self.assertEqual(original_active, self.mgr_cluster.get_active_id()) | |
241 | self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids()) | |
11fdf7f2 TL |
242 | |
243 | def test_module_commands(self): | |
244 | """ | |
245 | That module-handled commands have appropriate behavior on | |
246 | disabled/failed/recently-enabled modules. | |
247 | """ | |
248 | ||
249 | # Calling a command on a disabled module should return the proper | |
250 | # error code. | |
251 | self._load_module("selftest") | |
252 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
253 | "mgr", "module", "disable", "selftest") | |
254 | with self.assertRaises(CommandFailedError) as exc_raised: | |
255 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
256 | "mgr", "self-test", "run") | |
257 | ||
258 | self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) | |
259 | ||
260 | # Calling a command that really doesn't exist should give me EINVAL. | |
261 | with self.assertRaises(CommandFailedError) as exc_raised: | |
262 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
263 | "osd", "albatross") | |
264 | ||
265 | self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL) | |
266 | ||
267 | # Enabling a module and then immediately using ones of its commands | |
268 | # should work (#21683) | |
269 | self._load_module("selftest") | |
270 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
271 | "mgr", "self-test", "config", "get", "testkey") | |
272 | ||
273 | # Calling a command for a failed module should return the proper | |
274 | # error code. | |
275 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
276 | "mgr", "self-test", "background", "start", "throw_exception") | |
277 | with self.assertRaises(CommandFailedError) as exc_raised: | |
278 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
279 | "mgr", "self-test", "run" | |
280 | ) | |
281 | self.assertEqual(exc_raised.exception.exitstatus, errno.EIO) | |
282 | ||
283 | # A health alert should be raised for a module that has thrown | |
284 | # an exception from its serve() method | |
285 | self.wait_for_health( | |
286 | "Module 'selftest' has failed: Synthetic exception in serve", | |
287 | timeout=30) | |
288 | ||
289 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
290 | "mgr", "module", "disable", "selftest") | |
291 | ||
292 | self.wait_for_health_clear(timeout=30) | |
293 | ||
294 | def test_module_remote(self): | |
295 | """ | |
296 | Use the selftest module to exercise inter-module communication | |
297 | """ | |
298 | self._load_module("selftest") | |
299 | # The "self-test remote" operation just happens to call into | |
300 | # influx. | |
301 | self._load_module("influx") | |
302 | ||
303 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
304 | "mgr", "self-test", "remote") | |
305 | ||
306 | def test_selftest_cluster_log(self): | |
307 | """ | |
308 | Use the selftest module to test the cluster/audit log interface. | |
309 | """ | |
310 | priority_map = { | |
311 | "info": "INF", | |
312 | "security": "SEC", | |
313 | "warning": "WRN", | |
314 | "error": "ERR" | |
315 | } | |
316 | self._load_module("selftest") | |
317 | for priority in priority_map.keys(): | |
318 | message = "foo bar {}".format(priority) | |
319 | log_message = "[{}] {}".format(priority_map[priority], message) | |
320 | # Check for cluster/audit logs: | |
321 | # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info | |
322 | # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security | |
323 | # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning | |
324 | # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error | |
325 | with self.assert_cluster_log(log_message): | |
326 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
327 | "mgr", "self-test", "cluster-log", "cluster", | |
328 | priority, message) | |
329 | with self.assert_cluster_log(log_message, watch_channel="audit"): | |
330 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
331 | "mgr", "self-test", "cluster-log", "audit", | |
332 | priority, message) | |
333 | ||
334 | def test_selftest_cluster_log_unknown_channel(self): | |
335 | """ | |
336 | Use the selftest module to test the cluster/audit log interface. | |
337 | """ | |
338 | with self.assertRaises(CommandFailedError) as exc_raised: | |
339 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
340 | "mgr", "self-test", "cluster-log", "xyz", | |
341 | "ERR", "The channel does not exist") | |
342 | self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) |