ceph/src/pybind/mgr/dashboard/services/ceph_service.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import absolute_import
   3 import json
   4 import logging
   5
   6 import rados
   7
   8 from mgr_module import CommandResult
   9 from mgr_util import get_time_series_rates, get_most_recent_rate
  10
  11 from .. import mgr
  12 from ..exceptions import DashboardException
  13
  14 try:
  15     from typing import Dict, Any, Union  # pylint: disable=unused-import
  16 except ImportError:
  17     pass  # For typing only
  18
  19 logger = logging.getLogger('ceph_service')
  20
  21
  22 class SendCommandError(rados.Error):
  23     def __init__(self, err, prefix, argdict, errno):
  24         self.prefix = prefix
  25         self.argdict = argdict
  26         super(SendCommandError, self).__init__(err, errno)
  27
  28
  29 class CephService(object):
  30
  31     OSD_FLAG_NO_SCRUB = 'noscrub'
  32     OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub'
  33
  34     PG_STATUS_SCRUBBING = 'scrubbing'
  35     PG_STATUS_DEEP_SCRUBBING = 'deep'
  36
  37     SCRUB_STATUS_DISABLED = 'Disabled'
  38     SCRUB_STATUS_ACTIVE = 'Active'
  39     SCRUB_STATUS_INACTIVE = 'Inactive'
  40
  41     @classmethod
  42     def get_service_map(cls, service_name):
  43         service_map = {}  # type: Dict[str, dict]
  44         for server in mgr.list_servers():
  45             for service in server['services']:
  46                 if service['type'] == service_name:
  47                     if server['hostname'] not in service_map:
  48                         service_map[server['hostname']] = {
  49                             'server': server,
  50                             'services': []
  51                         }
  52                     inst_id = service['id']
  53                     metadata = mgr.get_metadata(service_name, inst_id)
  54                     status = mgr.get_daemon_status(service_name, inst_id)
  55                     service_map[server['hostname']]['services'].append({
  56                         'id': inst_id,
  57                         'type': service_name,
  58                         'hostname': server['hostname'],
  59                         'metadata': metadata,
  60                         'status': status
  61                     })
  62         return service_map
  63
  64     @classmethod
  65     def get_service_list(cls, service_name):
  66         service_map = cls.get_service_map(service_name)
  67         return [svc for _, svcs in service_map.items() for svc in svcs['services']]
  68
  69     @classmethod
  70     def get_service(cls, service_name, service_id):
  71         for server in mgr.list_servers():
  72             for service in server['services']:
  73                 if service['type'] == service_name:
  74                     inst_id = service['id']
  75                     if inst_id == service_id:
  76                         metadata = mgr.get_metadata(service_name, inst_id)
  77                         status = mgr.get_daemon_status(service_name, inst_id)
  78                         return {
  79                             'id': inst_id,
  80                             'type': service_name,
  81                             'hostname': server['hostname'],
  82                             'metadata': metadata,
  83                             'status': status
  84                         }
  85         return None
  86
  87     @classmethod
  88     def get_pool_list(cls, application=None):
  89         osd_map = mgr.get('osd_map')
  90         if not application:
  91             return osd_map['pools']
  92         return [pool for pool in osd_map['pools']
  93                 if application in pool.get('application_metadata', {})]
  94
  95     @classmethod
  96     def get_pool_list_with_stats(cls, application=None):
  97         # pylint: disable=too-many-locals
  98         pools = cls.get_pool_list(application)
  99
 100         pools_w_stats = []
 101
 102         pg_summary = mgr.get("pg_summary")
 103         pool_stats = mgr.get_updated_pool_stats()
 104
 105         for pool in pools:
 106             pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()]
 107             stats = pool_stats[pool['pool']]
 108             s = {}
 109
 110             for stat_name, stat_series in stats.items():
 111                 rates = get_time_series_rates(stat_series)
 112                 s[stat_name] = {
 113                     'latest': stat_series[0][1],
 114                     'rate': get_most_recent_rate(rates),
 115                     'rates': rates
 116                 }
 117             pool['stats'] = s
 118             pools_w_stats.append(pool)
 119         return pools_w_stats
 120
 121     @classmethod
 122     def get_erasure_code_profiles(cls):
 123         def _serialize_ecp(name, ecp):
 124             def serialize_numbers(key):
 125                 value = ecp.get(key)
 126                 if value is not None:
 127                     ecp[key] = int(value)
 128
 129             ecp['name'] = name
 130             serialize_numbers('k')
 131             serialize_numbers('m')
 132             return ecp
 133
 134         ret = []
 135         for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items():
 136             ret.append(_serialize_ecp(name, ecp))
 137         return ret
 138
 139     @classmethod
 140     def get_pool_name_from_id(cls, pool_id):
 141         # type: (int) -> Union[str, None]
 142         pool = cls.get_pool_by_attribute('pool', pool_id)
 143         return pool['pool_name'] if pool is not None else None
 144
 145     @classmethod
 146     def get_pool_by_attribute(cls, attribute, value):
 147         # type: (str, Any) -> Union[dict, None]
 148         pool_list = cls.get_pool_list()
 149         for pool in pool_list:
 150             if attribute in pool and pool[attribute] == value:
 151                 return pool
 152         return None
 153
 154     @classmethod
 155     def get_pool_pg_status(cls, pool_name):
 156         # type: (str) -> dict
 157         pool = cls.get_pool_by_attribute('pool_name', pool_name)
 158         if pool is None:
 159             return {}
 160         return mgr.get("pg_summary")['by_pool'][pool['pool'].__str__()]
 161
 162     @classmethod
 163     def send_command(cls, srv_type, prefix, srv_spec='', **kwargs):
 164         """
 165         :type prefix: str
 166         :param srv_type: mon |
 167         :param kwargs: will be added to argdict
 168         :param srv_spec: typically empty. or something like "<fs_id>:0"
 169
 170         :raises PermissionError: See rados.make_ex
 171         :raises ObjectNotFound: See rados.make_ex
 172         :raises IOError: See rados.make_ex
 173         :raises NoSpace: See rados.make_ex
 174         :raises ObjectExists: See rados.make_ex
 175         :raises ObjectBusy: See rados.make_ex
 176         :raises NoData: See rados.make_ex
 177         :raises InterruptedOrTimeoutError: See rados.make_ex
 178         :raises TimedOut: See rados.make_ex
 179         :raises ValueError: return code != 0
 180         """
 181         argdict = {
 182             "prefix": prefix,
 183             "format": "json",
 184         }
 185         argdict.update({k: v for k, v in kwargs.items() if v is not None})
 186         result = CommandResult("")
 187         mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "")
 188         r, outb, outs = result.wait()
 189         if r != 0:
 190             logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r,
 191                          outs, kwargs)
 192
 193             raise SendCommandError(outs, prefix, argdict, r)
 194
 195         try:
 196             return json.loads(outb or outs)
 197         except Exception:  # pylint: disable=broad-except
 198             return outb
 199
 200     @staticmethod
 201     def _get_smart_data_by_device(device):
 202         # type: (dict) -> Dict[str, dict]
 203         # Check whether the device is associated with daemons.
 204         if 'daemons' in device and device['daemons']:
 205             dev_smart_data = None
 206
 207             # The daemons associated with the device. Note, the list may
 208             # contain daemons that are 'down' or 'destroyed'.
 209             daemons = device.get('daemons')
 210
 211             # Get a list of all OSD daemons on all hosts that are 'up'
 212             # because SMART data can not be retrieved from daemons that
 213             # are 'down' or 'destroyed'.
 214             osd_tree = CephService.send_command('mon', 'osd tree')
 215             osd_daemons_up = [
 216                 node['name'] for node in osd_tree.get('nodes', {})
 217                 if node.get('status') == 'up'
 218             ]
 219
 220             # Finally get the daemons on the host of the given device
 221             # that are 'up'. All daemons on the same host can deliver
 222             # SMART data, thus it is not relevant for us which daemon
 223             # we are using.
 224             daemons = list(set(daemons) & set(osd_daemons_up))  # type: ignore
 225
 226             for daemon in daemons:
 227                 svc_type, svc_id = daemon.split('.')
 228                 try:
 229                     dev_smart_data = CephService.send_command(
 230                         svc_type, 'smart', svc_id, devid=device['devid'])
 231                 except SendCommandError:
 232                     # Try to retrieve SMART data from another daemon.
 233                     continue
 234                 for dev_id, dev_data in dev_smart_data.items():
 235                     if 'error' in dev_data:
 236                         logger.warning(
 237                             '[SMART] Error retrieving smartctl data for device ID "%s": %s',
 238                             dev_id, dev_data)
 239                 break
 240             if dev_smart_data is None:
 241                 raise DashboardException(
 242                     'Failed to retrieve SMART data for device ID "{}"'.format(
 243                         device['devid']))
 244             return dev_smart_data
 245         logger.warning('[SMART] No daemons associated with device ID "%s"',
 246                        device['devid'])
 247         return {}
 248
 249     @staticmethod
 250     def get_devices_by_host(hostname):
 251         # (str) -> dict
 252         return CephService.send_command('mon',
 253                                         'device ls-by-host',
 254                                         host=hostname)
 255
 256     @staticmethod
 257     def get_devices_by_daemon(daemon_type, daemon_id):
 258         # (str, str) -> dict
 259         return CephService.send_command('mon',
 260                                         'device ls-by-daemon',
 261                                         who='{}.{}'.format(
 262                                             daemon_type, daemon_id))
 263
 264     @staticmethod
 265     def get_smart_data_by_host(hostname):
 266         # type: (str) -> dict
 267         """
 268         Get the SMART data of all devices on the given host, regardless
 269         of the daemon (osd, mon, ...).
 270         :param hostname: The name of the host.
 271         :return: A dictionary containing the SMART data of every device
 272           on the given host. The device name is used as the key in the
 273           dictionary.
 274         """
 275         devices = CephService.get_devices_by_host(hostname)
 276         smart_data = {}  # type: dict
 277         if devices:
 278             for device in devices:
 279                 if device['devid'] not in smart_data:
 280                     smart_data.update(
 281                         CephService._get_smart_data_by_device(device))
 282         return smart_data
 283
 284     @staticmethod
 285     def get_smart_data_by_daemon(daemon_type, daemon_id):
 286         # type: (str, str) -> Dict[str, dict]
 287         """
 288         Get the SMART data of the devices associated with the given daemon.
 289         :param daemon_type: The daemon type, e.g. 'osd' or 'mon'.
 290         :param daemon_id: The daemon identifier.
 291         :return: A dictionary containing the SMART data of every device
 292           associated with the given daemon. The device name is used as the
 293           key in the dictionary.
 294         """
 295         devices = CephService.get_devices_by_daemon(daemon_type, daemon_id)
 296         smart_data = {}  # type: Dict[str, dict]
 297         if devices:
 298             for device in devices:
 299                 if device['devid'] not in smart_data:
 300                     smart_data.update(
 301                         CephService._get_smart_data_by_device(device))
 302         return smart_data
 303
 304     @classmethod
 305     def get_rates(cls, svc_type, svc_name, path):
 306         """
 307         :return: the derivative of mgr.get_counter()
 308         :rtype: list[tuple[int, float]]"""
 309         data = mgr.get_counter(svc_type, svc_name, path)[path]
 310         return get_time_series_rates(data)
 311
 312     @classmethod
 313     def get_rate(cls, svc_type, svc_name, path):
 314         """returns most recent rate"""
 315         return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))
 316
 317     @classmethod
 318     def get_client_perf(cls):
 319         pools_stats = mgr.get('osd_pool_stats')['pool_stats']
 320
 321         io_stats = {
 322             'read_bytes_sec': 0,
 323             'read_op_per_sec': 0,
 324             'write_bytes_sec': 0,
 325             'write_op_per_sec': 0,
 326         }
 327         recovery_stats = {'recovering_bytes_per_sec': 0}
 328
 329         for pool_stats in pools_stats:
 330             client_io = pool_stats['client_io_rate']
 331             for stat in list(io_stats.keys()):
 332                 if stat in client_io:
 333                     io_stats[stat] += client_io[stat]
 334
 335             client_recovery = pool_stats['recovery_rate']
 336             for stat in list(recovery_stats.keys()):
 337                 if stat in client_recovery:
 338                     recovery_stats[stat] += client_recovery[stat]
 339
 340         client_perf = io_stats.copy()
 341         client_perf.update(recovery_stats)
 342
 343         return client_perf
 344
 345     @classmethod
 346     def get_scrub_status(cls):
 347         enabled_flags = mgr.get('osd_map')['flags_set']
 348         if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags:
 349             return cls.SCRUB_STATUS_DISABLED
 350
 351         grouped_pg_statuses = mgr.get('pg_summary')['all']
 352         for grouped_pg_status in grouped_pg_statuses.keys():
 353             if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \
 354                     or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1:
 355                 return cls.SCRUB_STATUS_ACTIVE
 356
 357         return cls.SCRUB_STATUS_INACTIVE
 358
 359     @classmethod
 360     def get_pg_info(cls):
 361         pg_summary = mgr.get('pg_summary')
 362         object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [
 363             'num_objects', 'num_object_copies', 'num_objects_degraded',
 364             'num_objects_misplaced', 'num_objects_unfound']}
 365
 366         pgs_per_osd = 0.0
 367         total_osds = len(pg_summary['by_osd'])
 368         if total_osds > 0:
 369             total_pgs = 0.0
 370             for _, osd_pg_statuses in pg_summary['by_osd'].items():
 371                 for _, pg_amount in osd_pg_statuses.items():
 372                     total_pgs += pg_amount
 373
 374             pgs_per_osd = total_pgs / total_osds
 375
 376         return {
 377             'object_stats': object_stats,
 378             'statuses': pg_summary['all'],
 379             'pgs_per_osd': pgs_per_osd,
 380         }