import errno
import json
-from mgr_module import MgrModule, CommandResult, CLIRequiresDB, CLICommand, CLIReadCommand, Option
+from mgr_module import MgrModule, CommandResult, MgrModuleRecoverDB, CLIRequiresDB, CLICommand, CLIReadCommand, Option, MgrDBNotReady
import operator
import rados
import re
@CLIRequiresDB
@CLIReadCommand('device scrape-daemon-health-metrics')
+ @MgrModuleRecoverDB
def do_scrape_daemon_health_metrics(self, who: str) -> Tuple[int, str, str]:
'''
Scrape and store device health metrics for a given daemon
@CLIRequiresDB
@CLIReadCommand('device scrape-health-metrics')
+ @MgrModuleRecoverDB
def do_scrape_health_metrics(self, devid: Optional[str] = None) -> Tuple[int, str, str]:
'''
Scrape and store device health metrics
@CLIRequiresDB
@CLIReadCommand('device get-health-metrics')
+ @MgrModuleRecoverDB
def do_get_health_metrics(self, devid: str, sample: Optional[str] = None) -> Tuple[int, str, str]:
'''
Show stored device metrics for the device
@CLIRequiresDB
@CLICommand('device check-health')
+ @MgrModuleRecoverDB
def do_check_health(self) -> Tuple[int, str, str]:
'''
Check life expectancy of devices
@CLIRequiresDB
@CLIReadCommand('device predict-life-expectancy')
+ @MgrModuleRecoverDB
def do_predict_life_expectancy(self, devid: str) -> Tuple[int, str, str]:
'''
Predict life expectancy with local predictor
self.log.debug(f"finished reading legacy pool, complete = {done}")
return done
- def serve(self) -> None:
- self.log.info("Starting")
- self.config_notify()
-
+ @MgrModuleRecoverDB
+ def _do_serve(self) -> None:
last_scrape = None
finished_loading_legacy = False
+
while self.run:
+ # sleep first, in case of exceptions causing retry:
+ sleep_interval = self.sleep_interval or 60
+ if not finished_loading_legacy:
+ sleep_interval = 2
+ self.log.debug('Sleeping for %d seconds', sleep_interval)
+ self.event.wait(sleep_interval)
+ self.event.clear()
+
if self.db_ready() and self.enable_monitoring:
self.log.debug('Running')
last_scrape = now
self.set_kv('last_scrape', last_scrape.strftime(TIME_FORMAT))
- # sleep
- sleep_interval = self.sleep_interval or 60
- if not finished_loading_legacy:
- sleep_interval = 2
- self.log.debug('Sleeping for %d seconds', sleep_interval)
- self.event.wait(sleep_interval)
- self.event.clear()
+ def serve(self) -> None:
+ self.log.info("Starting")
+ self.config_notify()
+
+ self._do_serve()
def shutdown(self) -> None:
self.log.info('Stopping')
return -1, '', 'unable to invoke diskprediction local or remote plugin'
def get_recent_device_metrics(self, devid: str, min_sample: str) -> Dict[str, Dict[str, Any]]:
- return self._get_device_metrics(devid, min_sample=min_sample)
+ try:
+ return self._get_device_metrics(devid, min_sample=min_sample)
+ except MgrDBNotReady:
+ return dict()
def get_time_format(self) -> str:
return TIME_FORMAT