]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | """ |
2 | Device health monitoring | |
3 | """ | |
4 | ||
5 | import errno | |
6 | import json | |
20effc67 | 7 | from mgr_module import MgrModule, CommandResult, CLIRequiresDB, CLICommand, CLIReadCommand, Option |
11fdf7f2 TL |
8 | import operator |
9 | import rados | |
20effc67 | 10 | import re |
11fdf7f2 | 11 | from threading import Event |
20effc67 | 12 | from datetime import datetime, timedelta, timezone |
f67539c2 | 13 | from typing import cast, Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union |
11fdf7f2 TL |
14 | |
15 | TIME_FORMAT = '%Y%m%d-%H%M%S' | |
16 | ||
17 | DEVICE_HEALTH = 'DEVICE_HEALTH' | |
18 | DEVICE_HEALTH_IN_USE = 'DEVICE_HEALTH_IN_USE' | |
19 | DEVICE_HEALTH_TOOMANY = 'DEVICE_HEALTH_TOOMANY' | |
20 | HEALTH_MESSAGES = { | |
21 | DEVICE_HEALTH: '%d device(s) expected to fail soon', | |
adb31ebb | 22 | DEVICE_HEALTH_IN_USE: '%d daemon(s) expected to fail soon and still contain data', |
11fdf7f2 TL |
23 | DEVICE_HEALTH_TOOMANY: 'Too many daemons are expected to fail soon', |
24 | } | |
25 | ||
f67539c2 | 26 | |
20effc67 | 27 | def get_ata_wear_level(data: Dict[Any, Any]) -> Optional[float]: |
f67539c2 TL |
28 | """ |
29 | Extract wear level (as float) from smartctl -x --json output for SATA SSD | |
30 | """ | |
31 | for page in data.get("ata_device_statistics", {}).get("pages", []): | |
20effc67 | 32 | if page is None or page.get("number") != 7: |
f67539c2 TL |
33 | continue |
34 | for item in page.get("table", []): | |
35 | if item["offset"] == 8: | |
36 | return item["value"] / 100.0 | |
37 | return None | |
38 | ||
39 | ||
20effc67 | 40 | def get_nvme_wear_level(data: Dict[Any, Any]) -> Optional[float]: |
f67539c2 TL |
41 | """ |
42 | Extract wear level (as float) from smartctl -x --json output for NVME SSD | |
43 | """ | |
44 | pct_used = data.get("nvme_smart_health_information_log", {}).get("percentage_used") | |
45 | if pct_used is None: | |
46 | return None | |
47 | return pct_used / 100.0 | |
eafe8130 | 48 | |
11fdf7f2 TL |
49 | |
50 | class Module(MgrModule): | |
20effc67 TL |
51 | |
52 | # latest (if db does not exist) | |
53 | SCHEMA = """ | |
54 | CREATE TABLE Device ( | |
55 | devid TEXT PRIMARY KEY | |
56 | ) WITHOUT ROWID; | |
57 | CREATE TABLE DeviceHealthMetrics ( | |
58 | time DATETIME DEFAULT (strftime('%s', 'now')), | |
59 | devid TEXT NOT NULL REFERENCES Device (devid), | |
60 | raw_smart TEXT NOT NULL, | |
61 | PRIMARY KEY (time, devid) | |
62 | ); | |
63 | """ | |
64 | ||
65 | SCHEMA_VERSIONED = [ | |
66 | # v1 | |
67 | """ | |
68 | CREATE TABLE Device ( | |
69 | devid TEXT PRIMARY KEY | |
70 | ) WITHOUT ROWID; | |
71 | CREATE TABLE DeviceHealthMetrics ( | |
72 | time DATETIME DEFAULT (strftime('%s', 'now')), | |
73 | devid TEXT NOT NULL REFERENCES Device (devid), | |
74 | raw_smart TEXT NOT NULL, | |
75 | PRIMARY KEY (time, devid) | |
76 | ); | |
77 | """ | |
78 | ] | |
79 | ||
11fdf7f2 | 80 | MODULE_OPTIONS = [ |
f67539c2 TL |
81 | Option( |
82 | name='enable_monitoring', | |
83 | default=True, | |
84 | type='bool', | |
85 | desc='monitor device health metrics', | |
86 | runtime=True, | |
87 | ), | |
88 | Option( | |
89 | name='scrape_frequency', | |
90 | default=86400, | |
91 | type='secs', | |
92 | desc='how frequently to scrape device health metrics', | |
93 | runtime=True, | |
94 | ), | |
95 | Option( | |
96 | name='pool_name', | |
97 | default='device_health_metrics', | |
98 | type='str', | |
99 | desc='name of pool in which to store device health metrics', | |
100 | runtime=True, | |
101 | ), | |
102 | Option( | |
103 | name='retention_period', | |
104 | default=(86400 * 180), | |
105 | type='secs', | |
106 | desc='how long to retain device health metrics', | |
107 | runtime=True, | |
108 | ), | |
109 | Option( | |
110 | name='mark_out_threshold', | |
111 | default=(86400 * 14 * 2), | |
112 | type='secs', | |
113 | desc='automatically mark OSD if it may fail before this long', | |
114 | runtime=True, | |
115 | ), | |
116 | Option( | |
117 | name='warn_threshold', | |
118 | default=(86400 * 14 * 6), | |
119 | type='secs', | |
120 | desc='raise health warning if OSD may fail before this long', | |
121 | runtime=True, | |
122 | ), | |
123 | Option( | |
124 | name='self_heal', | |
125 | default=True, | |
126 | type='bool', | |
127 | desc='preemptively heal cluster around devices that may fail', | |
128 | runtime=True, | |
129 | ), | |
130 | Option( | |
131 | name='sleep_interval', | |
132 | default=600, | |
133 | type='secs', | |
134 | desc='how frequently to wake up and check device health', | |
135 | runtime=True, | |
136 | ), | |
11fdf7f2 TL |
137 | ] |
138 | ||
f67539c2 | 139 | def __init__(self, *args: Any, **kwargs: Any) -> None: |
11fdf7f2 TL |
140 | super(Module, self).__init__(*args, **kwargs) |
141 | ||
142 | # populate options (just until serve() runs) | |
143 | for opt in self.MODULE_OPTIONS: | |
144 | setattr(self, opt['name'], opt['default']) | |
145 | ||
146 | # other | |
147 | self.run = True | |
148 | self.event = Event() | |
149 | ||
f67539c2 TL |
150 | # for mypy which does not run the code |
151 | if TYPE_CHECKING: | |
152 | self.enable_monitoring = True | |
153 | self.scrape_frequency = 0.0 | |
154 | self.pool_name = '' | |
155 | self.device_health_metrics = '' | |
156 | self.retention_period = 0.0 | |
157 | self.mark_out_threshold = 0.0 | |
158 | self.warn_threshold = 0.0 | |
159 | self.self_heal = True | |
160 | self.sleep_interval = 0.0 | |
161 | ||
162 | def is_valid_daemon_name(self, who: str) -> bool: | |
33c7a0ef | 163 | parts = who.split('.', 1) |
f67539c2 | 164 | if len(parts) != 2: |
11fdf7f2 | 165 | return False |
f67539c2 TL |
166 | return parts[0] in ('osd', 'mon') |
167 | ||
20effc67 | 168 | @CLIReadCommand('device query-daemon-health-metrics') |
f67539c2 TL |
169 | def do_query_daemon_health_metrics(self, who: str) -> Tuple[int, str, str]: |
170 | ''' | |
171 | Get device health metrics for a given daemon | |
172 | ''' | |
173 | if not self.is_valid_daemon_name(who): | |
174 | return -errno.EINVAL, '', 'not a valid mon or osd daemon name' | |
175 | (daemon_type, daemon_id) = who.split('.') | |
176 | result = CommandResult('') | |
177 | self.send_command(result, daemon_type, daemon_id, json.dumps({ | |
178 | 'prefix': 'smart', | |
179 | 'format': 'json', | |
180 | }), '') | |
181 | return result.wait() | |
182 | ||
20effc67 TL |
183 | @CLIRequiresDB |
184 | @CLIReadCommand('device scrape-daemon-health-metrics') | |
f67539c2 TL |
185 | def do_scrape_daemon_health_metrics(self, who: str) -> Tuple[int, str, str]: |
186 | ''' | |
187 | Scrape and store device health metrics for a given daemon | |
188 | ''' | |
189 | if not self.is_valid_daemon_name(who): | |
190 | return -errno.EINVAL, '', 'not a valid mon or osd daemon name' | |
191 | (daemon_type, daemon_id) = who.split('.') | |
192 | return self.scrape_daemon(daemon_type, daemon_id) | |
193 | ||
20effc67 TL |
194 | @CLIRequiresDB |
195 | @CLIReadCommand('device scrape-health-metrics') | |
f67539c2 TL |
196 | def do_scrape_health_metrics(self, devid: Optional[str] = None) -> Tuple[int, str, str]: |
197 | ''' | |
198 | Scrape and store device health metrics | |
199 | ''' | |
200 | if devid is None: | |
11fdf7f2 | 201 | return self.scrape_all() |
11fdf7f2 | 202 | else: |
f67539c2 TL |
203 | return self.scrape_device(devid) |
204 | ||
20effc67 TL |
205 | @CLIRequiresDB |
206 | @CLIReadCommand('device get-health-metrics') | |
f67539c2 TL |
207 | def do_get_health_metrics(self, devid: str, sample: Optional[str] = None) -> Tuple[int, str, str]: |
208 | ''' | |
209 | Show stored device metrics for the device | |
210 | ''' | |
211 | return self.show_device_metrics(devid, sample) | |
212 | ||
20effc67 TL |
213 | @CLIRequiresDB |
214 | @CLICommand('device check-health') | |
f67539c2 TL |
215 | def do_check_health(self) -> Tuple[int, str, str]: |
216 | ''' | |
217 | Check life expectancy of devices | |
218 | ''' | |
219 | return self.check_health() | |
220 | ||
20effc67 | 221 | @CLICommand('device monitoring on') |
f67539c2 TL |
222 | def do_monitoring_on(self) -> Tuple[int, str, str]: |
223 | ''' | |
224 | Enable device health monitoring | |
225 | ''' | |
226 | self.set_module_option('enable_monitoring', True) | |
227 | self.event.set() | |
228 | return 0, '', '' | |
229 | ||
20effc67 | 230 | @CLICommand('device monitoring off') |
f67539c2 TL |
231 | def do_monitoring_off(self) -> Tuple[int, str, str]: |
232 | ''' | |
233 | Disable device health monitoring | |
234 | ''' | |
235 | self.set_module_option('enable_monitoring', False) | |
236 | self.set_health_checks({}) # avoid stuck health alerts | |
237 | return 0, '', '' | |
238 | ||
20effc67 TL |
239 | @CLIRequiresDB |
240 | @CLIReadCommand('device predict-life-expectancy') | |
f67539c2 TL |
241 | def do_predict_life_expectancy(self, devid: str) -> Tuple[int, str, str]: |
242 | ''' | |
243 | Predict life expectancy with local predictor | |
244 | ''' | |
245 | return self.predict_lift_expectancy(devid) | |
246 | ||
247 | def self_test(self) -> None: | |
20effc67 | 248 | assert self.db_ready() |
11fdf7f2 TL |
249 | self.config_notify() |
250 | osdmap = self.get('osd_map') | |
251 | osd_id = osdmap['osds'][0]['osd'] | |
252 | osdmeta = self.get('osd_metadata') | |
253 | devs = osdmeta.get(str(osd_id), {}).get('device_ids') | |
254 | if devs: | |
255 | devid = devs.split()[0].split('=')[1] | |
20effc67 TL |
256 | self.log.debug(f"getting devid {devid}") |
257 | (r, before, err) = self.show_device_metrics(devid, None) | |
11fdf7f2 | 258 | assert r == 0 |
20effc67 | 259 | self.log.debug(f"before: {before}") |
11fdf7f2 TL |
260 | (r, out, err) = self.scrape_device(devid) |
261 | assert r == 0 | |
20effc67 | 262 | (r, after, err) = self.show_device_metrics(devid, None) |
11fdf7f2 | 263 | assert r == 0 |
20effc67 | 264 | self.log.debug(f"after: {after}") |
11fdf7f2 TL |
265 | assert before != after |
266 | ||
f67539c2 | 267 | def config_notify(self) -> None: |
11fdf7f2 TL |
268 | for opt in self.MODULE_OPTIONS: |
269 | setattr(self, | |
270 | opt['name'], | |
271 | self.get_module_option(opt['name'])) | |
272 | self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name'])) | |
273 | ||
20effc67 TL |
274 | def _legacy_put_device_metrics(self, t: str, devid: str, data: str) -> None: |
275 | SQL = """ | |
276 | INSERT OR IGNORE INTO DeviceHealthMetrics (time, devid, raw_smart) | |
277 | VALUES (?, ?, ?); | |
278 | """ | |
279 | ||
280 | self._create_device(devid) | |
281 | epoch = self._t2epoch(t) | |
282 | json.loads(data) # valid? | |
283 | self.db.execute(SQL, (epoch, devid, data)) | |
284 | ||
285 | devre = r"[a-zA-Z0-9-]+[_-][a-zA-Z0-9-]+[_-][a-zA-Z0-9-]+" | |
286 | ||
287 | def _load_legacy_object(self, ioctx: rados.Ioctx, oid: str) -> bool: | |
288 | MAX_OMAP = 10000 | |
289 | self.log.debug(f"loading object {oid}") | |
290 | if re.search(self.devre, oid) is None: | |
291 | return False | |
292 | with rados.ReadOpCtx() as op: | |
293 | it, rc = ioctx.get_omap_vals(op, None, None, MAX_OMAP) | |
294 | if rc == 0: | |
295 | ioctx.operate_read_op(op, oid) | |
296 | count = 0 | |
297 | for t, raw_smart in it: | |
298 | self.log.debug(f"putting {oid} {t}") | |
299 | self._legacy_put_device_metrics(t, oid, raw_smart) | |
300 | count += 1 | |
301 | assert count < MAX_OMAP | |
302 | self.log.debug(f"removing object {oid}") | |
303 | ioctx.remove_object(oid) | |
f67539c2 TL |
304 | return True |
305 | ||
20effc67 TL |
306 | def check_legacy_pool(self) -> bool: |
307 | try: | |
308 | # 'device_health_metrics' is automatically renamed '.mgr' in | |
309 | # create_mgr_pool | |
310 | ioctx = self.rados.open_ioctx(self.MGR_POOL_NAME) | |
311 | except rados.ObjectNotFound: | |
312 | return True | |
313 | if not ioctx: | |
314 | return True | |
315 | ||
316 | done = False | |
317 | with ioctx, self._db_lock, self.db: | |
318 | count = 0 | |
319 | for obj in ioctx.list_objects(): | |
320 | try: | |
321 | if self._load_legacy_object(ioctx, obj.key): | |
322 | count += 1 | |
323 | except json.decoder.JSONDecodeError: | |
324 | pass | |
325 | if count >= 10: | |
326 | break | |
327 | done = count < 10 | |
328 | self.log.debug(f"finished reading legacy pool, complete = {done}") | |
329 | return done | |
f91f0fd5 | 330 | |
f67539c2 | 331 | def serve(self) -> None: |
11fdf7f2 TL |
332 | self.log.info("Starting") |
333 | self.config_notify() | |
334 | ||
335 | last_scrape = None | |
20effc67 | 336 | finished_loading_legacy = False |
11fdf7f2 | 337 | while self.run: |
20effc67 | 338 | if self.db_ready() and self.enable_monitoring: |
11fdf7f2 | 339 | self.log.debug('Running') |
20effc67 TL |
340 | |
341 | if not finished_loading_legacy: | |
342 | finished_loading_legacy = self.check_legacy_pool() | |
343 | ||
344 | if last_scrape is None: | |
345 | ls = self.get_kv('last_scrape') | |
346 | if ls: | |
347 | try: | |
348 | last_scrape = datetime.strptime(ls, TIME_FORMAT) | |
349 | except ValueError: | |
350 | pass | |
351 | self.log.debug('Last scrape %s', last_scrape) | |
352 | ||
11fdf7f2 TL |
353 | self.check_health() |
354 | ||
355 | now = datetime.utcnow() | |
356 | if not last_scrape: | |
357 | next_scrape = now | |
358 | else: | |
359 | # align to scrape interval | |
f67539c2 | 360 | scrape_frequency = self.scrape_frequency or 86400 |
11fdf7f2 TL |
361 | seconds = (last_scrape - datetime.utcfromtimestamp(0)).total_seconds() |
362 | seconds -= seconds % scrape_frequency | |
363 | seconds += scrape_frequency | |
364 | next_scrape = datetime.utcfromtimestamp(seconds) | |
365 | if last_scrape: | |
366 | self.log.debug('Last scrape %s, next scrape due %s', | |
367 | last_scrape.strftime(TIME_FORMAT), | |
368 | next_scrape.strftime(TIME_FORMAT)) | |
369 | else: | |
370 | self.log.debug('Last scrape never, next scrape due %s', | |
371 | next_scrape.strftime(TIME_FORMAT)) | |
372 | if now >= next_scrape: | |
373 | self.scrape_all() | |
374 | self.predict_all_devices() | |
375 | last_scrape = now | |
20effc67 | 376 | self.set_kv('last_scrape', last_scrape.strftime(TIME_FORMAT)) |
11fdf7f2 TL |
377 | |
378 | # sleep | |
f67539c2 | 379 | sleep_interval = self.sleep_interval or 60 |
20effc67 TL |
380 | if not finished_loading_legacy: |
381 | sleep_interval = 2 | |
11fdf7f2 | 382 | self.log.debug('Sleeping for %d seconds', sleep_interval) |
20effc67 | 383 | self.event.wait(sleep_interval) |
11fdf7f2 TL |
384 | self.event.clear() |
385 | ||
f67539c2 | 386 | def shutdown(self) -> None: |
11fdf7f2 TL |
387 | self.log.info('Stopping') |
388 | self.run = False | |
389 | self.event.set() | |
390 | ||
f67539c2 | 391 | def scrape_daemon(self, daemon_type: str, daemon_id: str) -> Tuple[int, str, str]: |
20effc67 TL |
392 | if not self.db_ready(): |
393 | return -errno.EAGAIN, "", "mgr db not yet available" | |
11fdf7f2 TL |
394 | raw_smart_data = self.do_scrape_daemon(daemon_type, daemon_id) |
395 | if raw_smart_data: | |
396 | for device, raw_data in raw_smart_data.items(): | |
397 | data = self.extract_smart_features(raw_data) | |
92f5a8d4 | 398 | if device and data: |
20effc67 | 399 | self.put_device_metrics(device, data) |
11fdf7f2 TL |
400 | return 0, "", "" |
401 | ||
f67539c2 | 402 | def scrape_all(self) -> Tuple[int, str, str]: |
20effc67 TL |
403 | if not self.db_ready(): |
404 | return -errno.EAGAIN, "", "mgr db not yet available" | |
11fdf7f2 TL |
405 | osdmap = self.get("osd_map") |
406 | assert osdmap is not None | |
11fdf7f2 TL |
407 | did_device = {} |
408 | ids = [] | |
409 | for osd in osdmap['osds']: | |
410 | ids.append(('osd', str(osd['osd']))) | |
9f95a23c TL |
411 | monmap = self.get("mon_map") |
412 | for mon in monmap['mons']: | |
413 | ids.append(('mon', mon['name'])) | |
11fdf7f2 TL |
414 | for daemon_type, daemon_id in ids: |
415 | raw_smart_data = self.do_scrape_daemon(daemon_type, daemon_id) | |
416 | if not raw_smart_data: | |
417 | continue | |
418 | for device, raw_data in raw_smart_data.items(): | |
419 | if device in did_device: | |
420 | self.log.debug('skipping duplicate %s' % device) | |
421 | continue | |
422 | did_device[device] = 1 | |
423 | data = self.extract_smart_features(raw_data) | |
92f5a8d4 | 424 | if device and data: |
20effc67 | 425 | self.put_device_metrics(device, data) |
11fdf7f2 TL |
426 | return 0, "", "" |
427 | ||
f67539c2 | 428 | def scrape_device(self, devid: str) -> Tuple[int, str, str]: |
20effc67 TL |
429 | if not self.db_ready(): |
430 | return -errno.EAGAIN, "", "mgr db not yet available" | |
11fdf7f2 TL |
431 | r = self.get("device " + devid) |
432 | if not r or 'device' not in r.keys(): | |
433 | return -errno.ENOENT, '', 'device ' + devid + ' not found' | |
9f95a23c | 434 | daemons = r['device'].get('daemons', []) |
11fdf7f2 TL |
435 | if not daemons: |
436 | return (-errno.EAGAIN, '', | |
9f95a23c | 437 | 'device ' + devid + ' not claimed by any active daemons') |
11fdf7f2 | 438 | (daemon_type, daemon_id) = daemons[0].split('.') |
11fdf7f2 TL |
439 | raw_smart_data = self.do_scrape_daemon(daemon_type, daemon_id, |
440 | devid=devid) | |
441 | if raw_smart_data: | |
442 | for device, raw_data in raw_smart_data.items(): | |
443 | data = self.extract_smart_features(raw_data) | |
92f5a8d4 | 444 | if device and data: |
20effc67 | 445 | self.put_device_metrics(device, data) |
11fdf7f2 TL |
446 | return 0, "", "" |
447 | ||
f67539c2 TL |
448 | def do_scrape_daemon(self, |
449 | daemon_type: str, | |
450 | daemon_id: str, | |
451 | devid: str = '') -> Optional[Dict[str, Any]]: | |
11fdf7f2 TL |
452 | """ |
453 | :return: a dict, or None if the scrape failed. | |
454 | """ | |
455 | self.log.debug('do_scrape_daemon %s.%s' % (daemon_type, daemon_id)) | |
456 | result = CommandResult('') | |
457 | self.send_command(result, daemon_type, daemon_id, json.dumps({ | |
458 | 'prefix': 'smart', | |
459 | 'format': 'json', | |
460 | 'devid': devid, | |
461 | }), '') | |
462 | r, outb, outs = result.wait() | |
463 | ||
464 | try: | |
465 | return json.loads(outb) | |
466 | except (IndexError, ValueError): | |
467 | self.log.error( | |
468 | "Fail to parse JSON result from daemon {0}.{1} ({2})".format( | |
469 | daemon_type, daemon_id, outb)) | |
f67539c2 | 470 | return None |
11fdf7f2 | 471 | |
20effc67 TL |
472 | def _prune_device_metrics(self) -> None: |
473 | SQL = """ | |
474 | DELETE FROM DeviceHealthMetrics | |
475 | WHERE time < (strftime('%s', 'now') - ?); | |
476 | """ | |
477 | ||
478 | cursor = self.db.execute(SQL, (self.retention_period,)) | |
479 | if cursor.rowcount >= 1: | |
480 | self.log.info(f"pruned {cursor.rowcount} metrics") | |
481 | ||
482 | def _create_device(self, devid: str) -> None: | |
483 | SQL = """ | |
484 | INSERT OR IGNORE INTO Device VALUES (?); | |
485 | """ | |
486 | ||
487 | cursor = self.db.execute(SQL, (devid,)) | |
488 | if cursor.rowcount >= 1: | |
489 | self.log.info(f"created device {devid}") | |
490 | else: | |
491 | self.log.debug(f"device {devid} already exists") | |
492 | ||
493 | def put_device_metrics(self, devid: str, data: Any) -> None: | |
494 | SQL = """ | |
495 | INSERT INTO DeviceHealthMetrics (devid, raw_smart) | |
496 | VALUES (?, ?); | |
497 | """ | |
498 | ||
499 | with self._db_lock, self.db: | |
500 | self._create_device(devid) | |
501 | self.db.execute(SQL, (devid, json.dumps(data))) | |
502 | self._prune_device_metrics() | |
11fdf7f2 | 503 | |
f67539c2 TL |
504 | # extract wear level? |
505 | wear_level = get_ata_wear_level(data) | |
506 | if wear_level is None: | |
507 | wear_level = get_nvme_wear_level(data) | |
508 | dev_data = self.get(f"device {devid}") or {} | |
509 | if wear_level is not None: | |
510 | if dev_data.get(wear_level) != str(wear_level): | |
511 | dev_data["wear_level"] = str(wear_level) | |
512 | self.log.debug(f"updating {devid} wear level to {wear_level}") | |
513 | self.set_device_wear_level(devid, wear_level) | |
514 | else: | |
515 | if "wear_level" in dev_data: | |
516 | del dev_data["wear_level"] | |
517 | self.log.debug(f"removing {devid} wear level") | |
518 | self.set_device_wear_level(devid, -1.0) | |
519 | ||
20effc67 TL |
520 | def _t2epoch(self, t: Optional[str]) -> int: |
521 | if not t: | |
522 | return 0 | |
523 | else: | |
524 | return int(datetime.strptime(t, TIME_FORMAT).strftime("%s")) | |
525 | ||
f67539c2 TL |
526 | def _get_device_metrics(self, devid: str, |
527 | sample: Optional[str] = None, | |
528 | min_sample: Optional[str] = None) -> Dict[str, Dict[str, Any]]: | |
11fdf7f2 | 529 | res = {} |
20effc67 TL |
530 | |
531 | SQL_EXACT = """ | |
532 | SELECT time, raw_smart | |
533 | FROM DeviceHealthMetrics | |
534 | WHERE devid = ? AND time = ? | |
535 | ORDER BY time DESC; | |
536 | """ | |
537 | SQL_MIN = """ | |
538 | SELECT time, raw_smart | |
539 | FROM DeviceHealthMetrics | |
540 | WHERE devid = ? AND ? <= time | |
541 | ORDER BY time DESC; | |
542 | """ | |
543 | ||
544 | isample = None | |
545 | imin_sample = None | |
546 | if sample: | |
547 | isample = self._t2epoch(sample) | |
548 | else: | |
549 | imin_sample = self._t2epoch(min_sample) | |
550 | ||
551 | self.log.debug(f"_get_device_metrics: {devid} {sample} {min_sample}") | |
552 | ||
553 | with self._db_lock, self.db: | |
554 | if isample: | |
555 | cursor = self.db.execute(SQL_EXACT, (devid, isample)) | |
556 | else: | |
557 | cursor = self.db.execute(SQL_MIN, (devid, imin_sample)) | |
558 | for row in cursor: | |
559 | t = row['time'] | |
560 | dt = datetime.utcfromtimestamp(t).strftime(TIME_FORMAT) | |
11fdf7f2 | 561 | try: |
20effc67 TL |
562 | res[dt] = json.loads(row['raw_smart']) |
563 | except (ValueError, IndexError): | |
564 | self.log.debug(f"unable to parse value for {devid}:{t}") | |
11fdf7f2 | 565 | pass |
92f5a8d4 | 566 | return res |
11fdf7f2 | 567 | |
f67539c2 | 568 | def show_device_metrics(self, devid: str, sample: Optional[str]) -> Tuple[int, str, str]: |
92f5a8d4 TL |
569 | # verify device exists |
570 | r = self.get("device " + devid) | |
571 | if not r or 'device' not in r.keys(): | |
572 | return -errno.ENOENT, '', 'device ' + devid + ' not found' | |
573 | # fetch metrics | |
574 | res = self._get_device_metrics(devid, sample=sample) | |
575 | return 0, json.dumps(res, indent=4, sort_keys=True), '' | |
11fdf7f2 | 576 | |
f67539c2 | 577 | def check_health(self) -> Tuple[int, str, str]: |
11fdf7f2 TL |
578 | self.log.info('Check health') |
579 | config = self.get('config') | |
580 | min_in_ratio = float(config.get('mon_osd_min_in_ratio')) | |
f67539c2 TL |
581 | mark_out_threshold_td = timedelta(seconds=self.mark_out_threshold) |
582 | warn_threshold_td = timedelta(seconds=self.warn_threshold) | |
583 | checks: Dict[str, Dict[str, Union[int, str, Sequence[str]]]] = {} | |
584 | health_warnings: Dict[str, List[str]] = { | |
11fdf7f2 TL |
585 | DEVICE_HEALTH: [], |
586 | DEVICE_HEALTH_IN_USE: [], | |
20effc67 | 587 | } |
11fdf7f2 TL |
588 | devs = self.get("devices") |
589 | osds_in = {} | |
590 | osds_out = {} | |
20effc67 | 591 | now = datetime.now(timezone.utc) # e.g. '2021-09-22 13:18:45.021712+00:00' |
11fdf7f2 TL |
592 | osdmap = self.get("osd_map") |
593 | assert osdmap is not None | |
594 | for dev in devs['devices']: | |
11fdf7f2 TL |
595 | if 'life_expectancy_max' not in dev: |
596 | continue | |
597 | # ignore devices that are not consumed by any daemons | |
598 | if not dev['daemons']: | |
599 | continue | |
600 | if not dev['life_expectancy_max'] or \ | |
601 | dev['life_expectancy_max'] == '0.000000': | |
602 | continue | |
603 | # life_expectancy_(min/max) is in the format of: | |
9f95a23c | 604 | # '%Y-%m-%dT%H:%M:%S.%f%z', e.g.: |
20effc67 | 605 | # '2019-01-20 21:12:12.000000+00:00' |
11fdf7f2 TL |
606 | life_expectancy_max = datetime.strptime( |
607 | dev['life_expectancy_max'], | |
9f95a23c | 608 | '%Y-%m-%dT%H:%M:%S.%f%z') |
11fdf7f2 TL |
609 | self.log.debug('device %s expectancy max %s', dev, |
610 | life_expectancy_max) | |
611 | ||
612 | if life_expectancy_max - now <= mark_out_threshold_td: | |
613 | if self.self_heal: | |
614 | # dev['daemons'] == ["osd.0","osd.1","osd.2"] | |
615 | if dev['daemons']: | |
616 | osds = [x for x in dev['daemons'] | |
617 | if x.startswith('osd.')] | |
618 | osd_ids = map(lambda x: x[4:], osds) | |
619 | for _id in osd_ids: | |
620 | if self.is_osd_in(osdmap, _id): | |
621 | osds_in[_id] = life_expectancy_max | |
622 | else: | |
623 | osds_out[_id] = 1 | |
624 | ||
625 | if life_expectancy_max - now <= warn_threshold_td: | |
626 | # device can appear in more than one location in case | |
627 | # of SCSI multipath | |
628 | device_locations = map(lambda x: x['host'] + ':' + x['dev'], | |
629 | dev['location']) | |
630 | health_warnings[DEVICE_HEALTH].append( | |
631 | '%s (%s); daemons %s; life expectancy between %s and %s' | |
632 | % (dev['devid'], | |
633 | ','.join(device_locations), | |
634 | ','.join(dev.get('daemons', ['none'])), | |
635 | dev['life_expectancy_max'], | |
636 | dev.get('life_expectancy_max', 'unknown'))) | |
637 | ||
638 | # OSD might be marked 'out' (which means it has no | |
639 | # data), however PGs are still attached to it. | |
640 | for _id in osds_out: | |
641 | num_pgs = self.get_osd_num_pgs(_id) | |
642 | if num_pgs > 0: | |
643 | health_warnings[DEVICE_HEALTH_IN_USE].append( | |
644 | 'osd.%s is marked out ' | |
645 | 'but still has %s PG(s)' % | |
646 | (_id, num_pgs)) | |
647 | if osds_in: | |
648 | self.log.debug('osds_in %s' % osds_in) | |
649 | # calculate target in ratio | |
650 | num_osds = len(osdmap['osds']) | |
651 | num_in = len([x for x in osdmap['osds'] if x['in']]) | |
652 | num_bad = len(osds_in) | |
653 | # sort with next-to-fail first | |
654 | bad_osds = sorted(osds_in.items(), key=operator.itemgetter(1)) | |
655 | did = 0 | |
656 | to_mark_out = [] | |
657 | for osd_id, when in bad_osds: | |
658 | ratio = float(num_in - did - 1) / float(num_osds) | |
659 | if ratio < min_in_ratio: | |
660 | final_ratio = float(num_in - num_bad) / float(num_osds) | |
661 | checks[DEVICE_HEALTH_TOOMANY] = { | |
662 | 'severity': 'warning', | |
663 | 'summary': HEALTH_MESSAGES[DEVICE_HEALTH_TOOMANY], | |
664 | 'detail': [ | |
20effc67 TL |
665 | '%d OSDs with failing device(s) would bring "in" ratio to %f < mon_osd_min_in_ratio %f' % ( |
666 | num_bad - did, final_ratio, min_in_ratio) | |
11fdf7f2 TL |
667 | ] |
668 | } | |
669 | break | |
670 | to_mark_out.append(osd_id) | |
671 | did += 1 | |
672 | if to_mark_out: | |
673 | self.mark_out_etc(to_mark_out) | |
f67539c2 | 674 | for warning, ls in health_warnings.items(): |
11fdf7f2 TL |
675 | n = len(ls) |
676 | if n: | |
677 | checks[warning] = { | |
678 | 'severity': 'warning', | |
679 | 'summary': HEALTH_MESSAGES[warning] % n, | |
9f95a23c | 680 | 'count': len(ls), |
11fdf7f2 TL |
681 | 'detail': ls, |
682 | } | |
683 | self.set_health_checks(checks) | |
684 | return 0, "", "" | |
685 | ||
f67539c2 | 686 | def is_osd_in(self, osdmap: Dict[str, Any], osd_id: str) -> bool: |
11fdf7f2 | 687 | for osd in osdmap['osds']: |
f67539c2 | 688 | if osd_id == str(osd['osd']): |
11fdf7f2 TL |
689 | return bool(osd['in']) |
690 | return False | |
691 | ||
f67539c2 | 692 | def get_osd_num_pgs(self, osd_id: str) -> int: |
11fdf7f2 TL |
693 | stats = self.get('osd_stats') |
694 | assert stats is not None | |
695 | for stat in stats['osd_stats']: | |
f67539c2 | 696 | if osd_id == str(stat['osd']): |
11fdf7f2 TL |
697 | return stat['num_pgs'] |
698 | return -1 | |
699 | ||
f67539c2 | 700 | def mark_out_etc(self, osd_ids: List[str]) -> None: |
11fdf7f2 TL |
701 | self.log.info('Marking out OSDs: %s' % osd_ids) |
702 | result = CommandResult('') | |
703 | self.send_command(result, 'mon', '', json.dumps({ | |
704 | 'prefix': 'osd out', | |
705 | 'format': 'json', | |
706 | 'ids': osd_ids, | |
707 | }), '') | |
708 | r, outb, outs = result.wait() | |
709 | if r != 0: | |
f67539c2 TL |
710 | self.log.warning('Could not mark OSD %s out. r: [%s], outb: [%s], outs: [%s]', |
711 | osd_ids, r, outb, outs) | |
11fdf7f2 TL |
712 | for osd_id in osd_ids: |
713 | result = CommandResult('') | |
714 | self.send_command(result, 'mon', '', json.dumps({ | |
715 | 'prefix': 'osd primary-affinity', | |
716 | 'format': 'json', | |
717 | 'id': int(osd_id), | |
718 | 'weight': 0.0, | |
719 | }), '') | |
720 | r, outb, outs = result.wait() | |
721 | if r != 0: | |
f67539c2 TL |
722 | self.log.warning('Could not set osd.%s primary-affinity, ' |
723 | 'r: [%s], outb: [%s], outs: [%s]', | |
724 | osd_id, r, outb, outs) | |
11fdf7f2 | 725 | |
f67539c2 | 726 | def extract_smart_features(self, raw: Any) -> Any: |
11fdf7f2 TL |
727 | # FIXME: extract and normalize raw smartctl --json output and |
728 | # generate a dict of the fields we care about. | |
729 | return raw | |
730 | ||
f67539c2 | 731 | def predict_lift_expectancy(self, devid: str) -> Tuple[int, str, str]: |
11fdf7f2 TL |
732 | plugin_name = '' |
733 | model = self.get_ceph_option('device_failure_prediction_mode') | |
f67539c2 | 734 | if cast(str, model).lower() == 'local': |
11fdf7f2 TL |
735 | plugin_name = 'diskprediction_local' |
736 | else: | |
737 | return -1, '', 'unable to enable any disk prediction model[local/cloud]' | |
738 | try: | |
739 | can_run, _ = self.remote(plugin_name, 'can_run') | |
740 | if can_run: | |
741 | return self.remote(plugin_name, 'predict_life_expectancy', devid=devid) | |
f67539c2 TL |
742 | else: |
743 | return -1, '', f'{plugin_name} is not available' | |
20effc67 | 744 | except Exception: |
11fdf7f2 TL |
745 | return -1, '', 'unable to invoke diskprediction local or remote plugin' |
746 | ||
f67539c2 | 747 | def predict_all_devices(self) -> Tuple[int, str, str]: |
11fdf7f2 TL |
748 | plugin_name = '' |
749 | model = self.get_ceph_option('device_failure_prediction_mode') | |
f67539c2 | 750 | if cast(str, model).lower() == 'local': |
11fdf7f2 TL |
751 | plugin_name = 'diskprediction_local' |
752 | else: | |
753 | return -1, '', 'unable to enable any disk prediction model[local/cloud]' | |
754 | try: | |
755 | can_run, _ = self.remote(plugin_name, 'can_run') | |
756 | if can_run: | |
757 | return self.remote(plugin_name, 'predict_all_devices') | |
f67539c2 TL |
758 | else: |
759 | return -1, '', f'{plugin_name} is not available' | |
20effc67 | 760 | except Exception: |
11fdf7f2 | 761 | return -1, '', 'unable to invoke diskprediction local or remote plugin' |
eafe8130 | 762 | |
f67539c2 | 763 | def get_recent_device_metrics(self, devid: str, min_sample: str) -> Dict[str, Dict[str, Any]]: |
eafe8130 TL |
764 | return self._get_device_metrics(devid, min_sample=min_sample) |
765 | ||
f67539c2 | 766 | def get_time_format(self) -> str: |
eafe8130 | 767 | return TIME_FORMAT |