]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mgr/test_module_selftest.py
import ceph 14.2.5
[ceph.git] / ceph / qa / tasks / mgr / test_module_selftest.py
CommitLineData
3efd9988
FG
1
2import time
3import requests
11fdf7f2
TL
4import errno
5import logging
6from teuthology.exceptions import CommandFailedError
3efd9988
FG
7
8from tasks.mgr.mgr_test_case import MgrTestCase
9
11fdf7f2
TL
10log = logging.getLogger(__name__)
11
3efd9988
FG
12
13class TestModuleSelftest(MgrTestCase):
14 """
15 That modules with a self-test command can be loaded and execute it
16 without errors.
17
18 This is not a substitute for really testing the modules, but it
19 is quick and is designed to catch regressions that could occur
20 if data structures change in a way that breaks how the modules
21 touch them.
22 """
23 MGRS_REQUIRED = 1
24
11fdf7f2
TL
25 def setUp(self):
26 self.setup_mgrs()
27
3efd9988 28 def _selftest_plugin(self, module_name):
11fdf7f2 29 self._load_module("selftest")
3efd9988
FG
30 self._load_module(module_name)
31
11fdf7f2
TL
32 # Execute the module's self_test() method
33 self.mgr_cluster.mon_manager.raw_cluster_cmd(
34 "mgr", "self-test", "module", module_name)
3efd9988
FG
35
36 def test_zabbix(self):
b32b8144
FG
37 # Set these mandatory config fields so that the zabbix module
38 # won't trigger health/log errors on load/serve.
39 self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
40 self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
3efd9988
FG
41 self._selftest_plugin("zabbix")
42
43 def test_prometheus(self):
94b18763 44 self._assign_ports("prometheus", "server_port", min_port=8100)
3efd9988
FG
45 self._selftest_plugin("prometheus")
46
47 def test_influx(self):
48 self._selftest_plugin("influx")
49
11fdf7f2
TL
50 def test_diskprediction_local(self):
51 self._selftest_plugin("diskprediction_local")
52
53 def test_diskprediction_cloud(self):
54 self._selftest_plugin("diskprediction_cloud")
55
56 def test_telegraf(self):
57 self._selftest_plugin("telegraf")
58
59 def test_iostat(self):
60 self._selftest_plugin("iostat")
61
62 def test_devicehealth(self):
63 self._selftest_plugin("devicehealth")
64 # Clean up the pool that the module creates, because otherwise
65 # it's low PG count causes test failures.
66 pool_name = "device_health_metrics"
67 self.mgr_cluster.mon_manager.raw_cluster_cmd(
68 "osd", "pool", "delete", pool_name, pool_name,
69 "--yes-i-really-really-mean-it")
70
3efd9988
FG
71 def test_selftest_run(self):
72 self._load_module("selftest")
73 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
74
11fdf7f2
TL
75 def test_telemetry(self):
76 self._selftest_plugin("telemetry")
77
78 def test_crash(self):
79 self._selftest_plugin("crash")
80
eafe8130
TL
81 def test_orchestrator_cli(self):
82 self._selftest_plugin("orchestrator_cli")
83
84
11fdf7f2
TL
85 def test_selftest_config_update(self):
86 """
87 That configuration updates are seen by running mgr modules
88 """
89 self._load_module("selftest")
90
91 def get_value():
92 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
93 "mgr", "self-test", "config", "get", "testkey").strip()
94
95 self.assertEqual(get_value(), "None")
96 self.mgr_cluster.mon_manager.raw_cluster_cmd(
97 "config", "set", "mgr", "mgr/selftest/testkey", "foo")
98 self.wait_until_equal(get_value, "foo", timeout=10)
99
100 def get_localized_value():
101 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
102 "mgr", "self-test", "config", "get_localized", "testkey").strip()
103
104 self.assertEqual(get_localized_value(), "foo")
105 self.mgr_cluster.mon_manager.raw_cluster_cmd(
106 "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
107 self.mgr_cluster.get_active_id()),
108 "bar")
109 self.wait_until_equal(get_localized_value, "bar", timeout=10)
110
111 def test_selftest_config_upgrade(self):
112 """
113 That pre-mimic config-key config settings are migrated into
114 mimic-style config settings and visible from mgr modules.
115 """
116 self._load_module("selftest")
117
118 def get_value():
119 return self.mgr_cluster.mon_manager.raw_cluster_cmd(
120 "mgr", "self-test", "config", "get", "testkey").strip()
121
122 def get_config():
123 lines = self.mgr_cluster.mon_manager.raw_cluster_cmd(
124 "config", "dump")\
125 .strip().split("\n")
126 result = []
127 for line in lines[1:]:
128 tokens = line.strip().split()
129 log.info("tokens: {0}".format(tokens))
130 subsys, key, value = tokens[0], tokens[2], tokens[3]
131 result.append((subsys, key, value))
132
133 return result
134
135 # Stop ceph-mgr while we synthetically create a pre-mimic
136 # configuration scenario
137 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
138 self.mgr_cluster.mgr_stop(mgr_id)
139 self.mgr_cluster.mgr_fail(mgr_id)
140
141 # Blow away any modern-style mgr module config options
142 # (the ceph-mgr implementation may only do the upgrade if
143 # it doesn't see new style options)
144 stash = []
145 for subsys, key, value in get_config():
146 if subsys == "mgr" and key.startswith("mgr/"):
147 log.info("Removing config key {0} ahead of upgrade".format(
148 key))
149 self.mgr_cluster.mon_manager.raw_cluster_cmd(
150 "config", "rm", subsys, key)
151 stash.append((subsys, key, value))
152
153 # Inject an old-style configuration setting in config-key
154 self.mgr_cluster.mon_manager.raw_cluster_cmd(
155 "config-key", "set", "mgr/selftest/testkey", "testvalue")
156
157 # Inject configuration settings that looks data-ish and should
158 # not be migrated to a config key
159 self.mgr_cluster.mon_manager.raw_cluster_cmd(
160 "config-key", "set", "mgr/selftest/testnewline", "foo\nbar")
161
162 # Inject configuration setting that does not appear in the
163 # module's config schema
164 self.mgr_cluster.mon_manager.raw_cluster_cmd(
165 "config-key", "set", "mgr/selftest/kvitem", "foo\nbar")
166
167 # Bring mgr daemons back online, the one that goes active
168 # should be doing the upgrade.
169 for mgr_id in self.mgr_cluster.mgr_daemons.keys():
170 self.mgr_cluster.mgr_restart(mgr_id)
171
172 # Wait for a new active
173 self.wait_until_true(
174 lambda: self.mgr_cluster.get_active_id() != "", timeout=30)
175
176 # Check that the selftest module sees the upgraded value
177 self.assertEqual(get_value(), "testvalue")
178
179 # Check that the upgraded value is visible in the configuration
180 seen_keys = [k for s,k,v in get_config()]
181 self.assertIn("mgr/selftest/testkey", seen_keys)
182
183 # ...and that the non-config-looking one isn't
184 self.assertNotIn("mgr/selftest/testnewline", seen_keys)
185
186 # ...and that the not-in-schema one isn't
187 self.assertNotIn("mgr/selftest/kvitem", seen_keys)
188
189 # Restore previous configuration
190 for subsys, key, value in stash:
191 self.mgr_cluster.mon_manager.raw_cluster_cmd(
192 "config", "set", subsys, key, value)
193
3efd9988
FG
194 def test_selftest_command_spam(self):
195 # Use the selftest module to stress the mgr daemon
196 self._load_module("selftest")
197
198 # Use the dashboard to test that the mgr is still able to do its job
11fdf7f2 199 self._assign_ports("dashboard", "ssl_server_port")
3efd9988 200 self._load_module("dashboard")
11fdf7f2
TL
201 self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
202 "create-self-signed-cert")
3efd9988
FG
203
204 original_active = self.mgr_cluster.get_active_id()
205 original_standbys = self.mgr_cluster.get_standby_ids()
206
207 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
208 "background", "start",
209 "command_spam")
210
211 dashboard_uri = self._get_uri("dashboard")
212
213 delay = 10
214 periods = 10
215 for i in range(0, periods):
216 t1 = time.time()
217 # Check that an HTTP module remains responsive
11fdf7f2 218 r = requests.get(dashboard_uri, verify=False)
3efd9988
FG
219 self.assertEqual(r.status_code, 200)
220
221 # Check that a native non-module command remains responsive
222 self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
223
224 time.sleep(delay - (time.time() - t1))
225
226 self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
227 "background", "stop")
228
229 # Check that all mgr daemons are still running
230 self.assertEqual(original_active, self.mgr_cluster.get_active_id())
231 self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
11fdf7f2
TL
232
233 def test_module_commands(self):
234 """
235 That module-handled commands have appropriate behavior on
236 disabled/failed/recently-enabled modules.
237 """
238
239 # Calling a command on a disabled module should return the proper
240 # error code.
241 self._load_module("selftest")
242 self.mgr_cluster.mon_manager.raw_cluster_cmd(
243 "mgr", "module", "disable", "selftest")
244 with self.assertRaises(CommandFailedError) as exc_raised:
245 self.mgr_cluster.mon_manager.raw_cluster_cmd(
246 "mgr", "self-test", "run")
247
248 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
249
250 # Calling a command that really doesn't exist should give me EINVAL.
251 with self.assertRaises(CommandFailedError) as exc_raised:
252 self.mgr_cluster.mon_manager.raw_cluster_cmd(
253 "osd", "albatross")
254
255 self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
256
257 # Enabling a module and then immediately using ones of its commands
258 # should work (#21683)
259 self._load_module("selftest")
260 self.mgr_cluster.mon_manager.raw_cluster_cmd(
261 "mgr", "self-test", "config", "get", "testkey")
262
263 # Calling a command for a failed module should return the proper
264 # error code.
265 self.mgr_cluster.mon_manager.raw_cluster_cmd(
266 "mgr", "self-test", "background", "start", "throw_exception")
267 with self.assertRaises(CommandFailedError) as exc_raised:
268 self.mgr_cluster.mon_manager.raw_cluster_cmd(
269 "mgr", "self-test", "run"
270 )
271 self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
272
273 # A health alert should be raised for a module that has thrown
274 # an exception from its serve() method
275 self.wait_for_health(
276 "Module 'selftest' has failed: Synthetic exception in serve",
277 timeout=30)
278
279 self.mgr_cluster.mon_manager.raw_cluster_cmd(
280 "mgr", "module", "disable", "selftest")
281
282 self.wait_for_health_clear(timeout=30)
283
284 def test_module_remote(self):
285 """
286 Use the selftest module to exercise inter-module communication
287 """
288 self._load_module("selftest")
289 # The "self-test remote" operation just happens to call into
290 # influx.
291 self._load_module("influx")
292
293 self.mgr_cluster.mon_manager.raw_cluster_cmd(
294 "mgr", "self-test", "remote")
295
296 def test_selftest_cluster_log(self):
297 """
298 Use the selftest module to test the cluster/audit log interface.
299 """
300 priority_map = {
301 "info": "INF",
302 "security": "SEC",
303 "warning": "WRN",
304 "error": "ERR"
305 }
306 self._load_module("selftest")
307 for priority in priority_map.keys():
308 message = "foo bar {}".format(priority)
309 log_message = "[{}] {}".format(priority_map[priority], message)
310 # Check for cluster/audit logs:
311 # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
312 # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
313 # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
314 # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
315 with self.assert_cluster_log(log_message):
316 self.mgr_cluster.mon_manager.raw_cluster_cmd(
317 "mgr", "self-test", "cluster-log", "cluster",
318 priority, message)
319 with self.assert_cluster_log(log_message, watch_channel="audit"):
320 self.mgr_cluster.mon_manager.raw_cluster_cmd(
321 "mgr", "self-test", "cluster-log", "audit",
322 priority, message)
323
324 def test_selftest_cluster_log_unknown_channel(self):
325 """
326 Use the selftest module to test the cluster/audit log interface.
327 """
328 with self.assertRaises(CommandFailedError) as exc_raised:
329 self.mgr_cluster.mon_manager.raw_cluster_cmd(
330 "mgr", "self-test", "cluster-log", "xyz",
331 "ERR", "The channel does not exist")
332 self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)