]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mgr/test_insights.py
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / qa / tasks / mgr / test_insights.py
CommitLineData
11fdf7f2
TL
1import logging
2import json
3import datetime
4import time
e306af50 5
f67539c2
TL
6from .mgr_test_case import MgrTestCase
7
11fdf7f2
TL
8
9log = logging.getLogger(__name__)
10UUID = 'd5775432-0742-44a3-a435-45095e32e6b2'
11DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
12
13class TestInsights(MgrTestCase):
14 def setUp(self):
9f95a23c 15 super(TestInsights, self).setUp()
11fdf7f2
TL
16 self.setup_mgrs()
17 self._load_module("insights")
18 self._load_module("selftest")
19 self.crash_ids = []
20
21 def tearDown(self):
22 self._clear_crashes()
23
24 def _insights(self):
25 retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights")
26 return json.loads(retstr)
27
28 def _add_crash(self, hours, make_invalid = False):
29 now = datetime.datetime.utcnow()
30 timestamp = now - datetime.timedelta(hours = hours)
31 timestamp = timestamp.strftime(DATEFMT) + 'Z'
32 crash_id = '_'.join((timestamp, UUID)).replace(' ', '_')
33 crash = {
34 'crash_id': crash_id,
35 'timestamp': timestamp,
36 }
37 if make_invalid:
38 crash["timestamp"] = "not a timestamp"
39
40 ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
41 'crash', 'post', '-i', '-',
42 stdin=json.dumps(crash)
43 )
44 self.crash_ids.append(crash_id)
45 self.assertEqual(0, ret)
46
47 def _clear_crashes(self):
48 for crash_id in self.crash_ids:
49 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
50 'crash', 'rm', crash_id
51 )
52
53 def _wait_for_health_history_checks(self, *args):
54 """Wait for a set of health checks to appear in the health history"""
55 timeout = datetime.datetime.utcnow() + \
56 datetime.timedelta(seconds = 15)
57 while True:
58 report = self._insights()
59 missing = False
60 for check in args:
61 if check not in report["health"]["history"]["checks"]:
62 missing = True
63 break
64 if not missing:
65 return
66 self.assertGreater(timeout,
67 datetime.datetime.utcnow())
68 time.sleep(0.25)
69
70 def _wait_for_curr_health_cleared(self, check):
71 timeout = datetime.datetime.utcnow() + \
72 datetime.timedelta(seconds = 15)
73 while True:
74 report = self._insights()
75 if check not in report["health"]["current"]["checks"]:
76 return
77 self.assertGreater(timeout,
78 datetime.datetime.utcnow())
79 time.sleep(0.25)
80
81 def test_health_history(self):
82 # use empty health history as starting point
83 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
84 "insights", "prune-health", "0")
85 report = self._insights()
86 self.assertFalse(report["health"]["history"]["checks"])
87
88 # generate health check history entries. we want to avoid the edge case
89 # of running these tests at _exactly_ the top of the hour so we can
90 # explicitly control when hourly work occurs. for this we use the
91 # current time offset to a half hour.
92 now = datetime.datetime.utcnow()
93 now = datetime.datetime(
94 year = now.year,
95 month = now.month,
96 day = now.day,
97 hour = now.hour,
98 minute = 30)
99
100 check_names = set()
101 for hours in [-18, -11, -5, -1, 0]:
102 # change the insight module's perception of "now" ...
103 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
104 "mgr", "self-test", "insights_set_now_offset", str(hours))
105
106 # ... to simulate health check arrivals in the past
107 unique_check_name = "insights_health_check_{}".format(hours)
108 health_check = {
109 unique_check_name: {
110 "severity": "warning",
111 "summary": "summary",
112 "detail": ["detail"]
113 }
114 }
115 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
116 "mgr", "self-test", "health", "set",
117 json.dumps(health_check))
118
119 check_names.add(unique_check_name)
120
121 # and also set the same health check to test deduplication
9f95a23c 122 dupe_check_name = "insights_health_check"
11fdf7f2
TL
123 health_check = {
124 dupe_check_name: {
125 "severity": "warning",
126 "summary": "summary",
127 "detail": ["detail"]
128 }
129 }
130 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
131 "mgr", "self-test", "health", "set",
132 json.dumps(health_check))
133
134 check_names.add(dupe_check_name)
135
136 # wait for the health check to show up in the history report
137 self._wait_for_health_history_checks(unique_check_name, dupe_check_name)
138
139 # clear out the current health checks before moving on
140 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
141 "mgr", "self-test", "health", "clear")
142 self._wait_for_curr_health_cleared(unique_check_name)
143
144 report = self._insights()
145 for check in check_names:
146 self.assertIn(check, report["health"]["history"]["checks"])
147
148 # restart the manager
149 active_id = self.mgr_cluster.get_active_id()
150 self.mgr_cluster.mgr_restart(active_id)
151
152 # ensure that at least one of the checks is present after the restart.
153 # we don't for them all to be present because "earlier" checks may not
154 # have sat in memory long enough to be flushed.
155 all_missing = True
156 report = self._insights()
157 for check in check_names:
158 if check in report["health"]["history"]["checks"]:
159 all_missing = False
160 break
161 self.assertFalse(all_missing)
162
163 # pruning really removes history
164 self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
165 "insights", "prune-health", "0")
166 report = self._insights()
167 self.assertFalse(report["health"]["history"]["checks"])
168
11fdf7f2
TL
169 def test_schema(self):
170 """TODO: assert conformance to a full schema specification?"""
171 report = self._insights()
172 for key in ["osd_metadata",
173 "pg_summary",
174 "mon_status",
175 "manager_map",
176 "service_map",
177 "mon_map",
178 "crush_map",
179 "fs_map",
180 "osd_tree",
181 "df",
182 "osd_dump",
183 "config",
184 "health",
185 "crashes",
186 "version",
187 "errors"]:
188 self.assertIn(key, report)
189
190 def test_crash_history(self):
191 self._clear_crashes()
192 report = self._insights()
193 self.assertFalse(report["crashes"]["summary"])
194 self.assertFalse(report["errors"])
195
196 # crashes show up in the report
197 self._add_crash(1)
198 report = self._insights()
199 self.assertTrue(report["crashes"]["summary"])
200 self.assertFalse(report["errors"])
201 log.warning("{}".format(json.dumps(report["crashes"], indent=2)))
202
11fdf7f2 203 self._clear_crashes()