]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | import logging |
2 | import json | |
3 | import datetime | |
4 | import time | |
e306af50 | 5 | |
f67539c2 TL |
6 | from .mgr_test_case import MgrTestCase |
7 | ||
11fdf7f2 TL |
8 | |
9 | log = logging.getLogger(__name__) | |
10 | UUID = 'd5775432-0742-44a3-a435-45095e32e6b2' | |
11 | DATEFMT = '%Y-%m-%d %H:%M:%S.%f' | |
12 | ||
13 | class TestInsights(MgrTestCase): | |
14 | def setUp(self): | |
9f95a23c | 15 | super(TestInsights, self).setUp() |
11fdf7f2 TL |
16 | self.setup_mgrs() |
17 | self._load_module("insights") | |
18 | self._load_module("selftest") | |
19 | self.crash_ids = [] | |
20 | ||
21 | def tearDown(self): | |
22 | self._clear_crashes() | |
23 | ||
24 | def _insights(self): | |
25 | retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights") | |
26 | return json.loads(retstr) | |
27 | ||
28 | def _add_crash(self, hours, make_invalid = False): | |
29 | now = datetime.datetime.utcnow() | |
30 | timestamp = now - datetime.timedelta(hours = hours) | |
31 | timestamp = timestamp.strftime(DATEFMT) + 'Z' | |
32 | crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') | |
33 | crash = { | |
34 | 'crash_id': crash_id, | |
35 | 'timestamp': timestamp, | |
36 | } | |
37 | if make_invalid: | |
38 | crash["timestamp"] = "not a timestamp" | |
39 | ||
40 | ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
41 | 'crash', 'post', '-i', '-', | |
42 | stdin=json.dumps(crash) | |
43 | ) | |
44 | self.crash_ids.append(crash_id) | |
45 | self.assertEqual(0, ret) | |
46 | ||
47 | def _clear_crashes(self): | |
48 | for crash_id in self.crash_ids: | |
49 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
50 | 'crash', 'rm', crash_id | |
51 | ) | |
52 | ||
53 | def _wait_for_health_history_checks(self, *args): | |
54 | """Wait for a set of health checks to appear in the health history""" | |
55 | timeout = datetime.datetime.utcnow() + \ | |
56 | datetime.timedelta(seconds = 15) | |
57 | while True: | |
58 | report = self._insights() | |
59 | missing = False | |
60 | for check in args: | |
61 | if check not in report["health"]["history"]["checks"]: | |
62 | missing = True | |
63 | break | |
64 | if not missing: | |
65 | return | |
66 | self.assertGreater(timeout, | |
67 | datetime.datetime.utcnow()) | |
68 | time.sleep(0.25) | |
69 | ||
70 | def _wait_for_curr_health_cleared(self, check): | |
71 | timeout = datetime.datetime.utcnow() + \ | |
72 | datetime.timedelta(seconds = 15) | |
73 | while True: | |
74 | report = self._insights() | |
75 | if check not in report["health"]["current"]["checks"]: | |
76 | return | |
77 | self.assertGreater(timeout, | |
78 | datetime.datetime.utcnow()) | |
79 | time.sleep(0.25) | |
80 | ||
81 | def test_health_history(self): | |
82 | # use empty health history as starting point | |
83 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
84 | "insights", "prune-health", "0") | |
85 | report = self._insights() | |
86 | self.assertFalse(report["health"]["history"]["checks"]) | |
87 | ||
88 | # generate health check history entries. we want to avoid the edge case | |
89 | # of running these tests at _exactly_ the top of the hour so we can | |
90 | # explicitly control when hourly work occurs. for this we use the | |
91 | # current time offset to a half hour. | |
92 | now = datetime.datetime.utcnow() | |
93 | now = datetime.datetime( | |
94 | year = now.year, | |
95 | month = now.month, | |
96 | day = now.day, | |
97 | hour = now.hour, | |
98 | minute = 30) | |
99 | ||
100 | check_names = set() | |
101 | for hours in [-18, -11, -5, -1, 0]: | |
102 | # change the insight module's perception of "now" ... | |
103 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
104 | "mgr", "self-test", "insights_set_now_offset", str(hours)) | |
105 | ||
106 | # ... to simulate health check arrivals in the past | |
107 | unique_check_name = "insights_health_check_{}".format(hours) | |
108 | health_check = { | |
109 | unique_check_name: { | |
110 | "severity": "warning", | |
111 | "summary": "summary", | |
112 | "detail": ["detail"] | |
113 | } | |
114 | } | |
115 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
116 | "mgr", "self-test", "health", "set", | |
117 | json.dumps(health_check)) | |
118 | ||
119 | check_names.add(unique_check_name) | |
120 | ||
121 | # and also set the same health check to test deduplication | |
9f95a23c | 122 | dupe_check_name = "insights_health_check" |
11fdf7f2 TL |
123 | health_check = { |
124 | dupe_check_name: { | |
125 | "severity": "warning", | |
126 | "summary": "summary", | |
127 | "detail": ["detail"] | |
128 | } | |
129 | } | |
130 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
131 | "mgr", "self-test", "health", "set", | |
132 | json.dumps(health_check)) | |
133 | ||
134 | check_names.add(dupe_check_name) | |
135 | ||
136 | # wait for the health check to show up in the history report | |
137 | self._wait_for_health_history_checks(unique_check_name, dupe_check_name) | |
138 | ||
139 | # clear out the current health checks before moving on | |
140 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
141 | "mgr", "self-test", "health", "clear") | |
142 | self._wait_for_curr_health_cleared(unique_check_name) | |
143 | ||
144 | report = self._insights() | |
145 | for check in check_names: | |
146 | self.assertIn(check, report["health"]["history"]["checks"]) | |
147 | ||
148 | # restart the manager | |
149 | active_id = self.mgr_cluster.get_active_id() | |
150 | self.mgr_cluster.mgr_restart(active_id) | |
151 | ||
152 | # ensure that at least one of the checks is present after the restart. | |
153 | # we don't for them all to be present because "earlier" checks may not | |
154 | # have sat in memory long enough to be flushed. | |
155 | all_missing = True | |
156 | report = self._insights() | |
157 | for check in check_names: | |
158 | if check in report["health"]["history"]["checks"]: | |
159 | all_missing = False | |
160 | break | |
161 | self.assertFalse(all_missing) | |
162 | ||
163 | # pruning really removes history | |
164 | self.mgr_cluster.mon_manager.raw_cluster_cmd_result( | |
165 | "insights", "prune-health", "0") | |
166 | report = self._insights() | |
167 | self.assertFalse(report["health"]["history"]["checks"]) | |
168 | ||
11fdf7f2 TL |
169 | def test_schema(self): |
170 | """TODO: assert conformance to a full schema specification?""" | |
171 | report = self._insights() | |
172 | for key in ["osd_metadata", | |
173 | "pg_summary", | |
174 | "mon_status", | |
175 | "manager_map", | |
176 | "service_map", | |
177 | "mon_map", | |
178 | "crush_map", | |
179 | "fs_map", | |
180 | "osd_tree", | |
181 | "df", | |
182 | "osd_dump", | |
183 | "config", | |
184 | "health", | |
185 | "crashes", | |
186 | "version", | |
187 | "errors"]: | |
188 | self.assertIn(key, report) | |
189 | ||
190 | def test_crash_history(self): | |
191 | self._clear_crashes() | |
192 | report = self._insights() | |
193 | self.assertFalse(report["crashes"]["summary"]) | |
194 | self.assertFalse(report["errors"]) | |
195 | ||
196 | # crashes show up in the report | |
197 | self._add_crash(1) | |
198 | report = self._insights() | |
199 | self.assertTrue(report["crashes"]["summary"]) | |
200 | self.assertFalse(report["errors"]) | |
201 | log.warning("{}".format(json.dumps(report["crashes"], indent=2))) | |
202 | ||
11fdf7f2 | 203 | self._clear_crashes() |