]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/dashboard/services/ceph_service.py
import ceph 16.2.7
[ceph.git] / ceph / src / pybind / mgr / dashboard / services / ceph_service.py
CommitLineData
11fdf7f2
TL
1# -*- coding: utf-8 -*-
2from __future__ import absolute_import
f67539c2 3
11fdf7f2 4import json
9f95a23c 5import logging
11fdf7f2
TL
6
7import rados
11fdf7f2 8from mgr_module import CommandResult
f67539c2 9from mgr_util import get_most_recent_rate, get_time_series_rates
11fdf7f2 10
9f95a23c
TL
11from .. import mgr
12from ..exceptions import DashboardException
11fdf7f2 13
81eedcae 14try:
f67539c2 15 from typing import Any, Dict, Optional, Union
81eedcae
TL
16except ImportError:
17 pass # For typing only
18
9f95a23c
TL
19logger = logging.getLogger('ceph_service')
20
11fdf7f2
TL
21
22class SendCommandError(rados.Error):
23 def __init__(self, err, prefix, argdict, errno):
24 self.prefix = prefix
25 self.argdict = argdict
26 super(SendCommandError, self).__init__(err, errno)
27
28
522d829b 29# pylint: disable=too-many-public-methods
11fdf7f2
TL
30class CephService(object):
31
32 OSD_FLAG_NO_SCRUB = 'noscrub'
33 OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub'
34
35 PG_STATUS_SCRUBBING = 'scrubbing'
36 PG_STATUS_DEEP_SCRUBBING = 'deep'
37
38 SCRUB_STATUS_DISABLED = 'Disabled'
39 SCRUB_STATUS_ACTIVE = 'Active'
40 SCRUB_STATUS_INACTIVE = 'Inactive'
41
42 @classmethod
43 def get_service_map(cls, service_name):
9f95a23c 44 service_map = {} # type: Dict[str, dict]
11fdf7f2
TL
45 for server in mgr.list_servers():
46 for service in server['services']:
47 if service['type'] == service_name:
48 if server['hostname'] not in service_map:
49 service_map[server['hostname']] = {
50 'server': server,
51 'services': []
52 }
53 inst_id = service['id']
54 metadata = mgr.get_metadata(service_name, inst_id)
55 status = mgr.get_daemon_status(service_name, inst_id)
56 service_map[server['hostname']]['services'].append({
57 'id': inst_id,
58 'type': service_name,
59 'hostname': server['hostname'],
60 'metadata': metadata,
61 'status': status
62 })
63 return service_map
64
65 @classmethod
66 def get_service_list(cls, service_name):
67 service_map = cls.get_service_map(service_name)
68 return [svc for _, svcs in service_map.items() for svc in svcs['services']]
69
70 @classmethod
f67539c2
TL
71 def get_service_data_by_metadata_id(cls,
72 service_type: str,
73 metadata_id: str) -> Optional[Dict[str, Any]]:
11fdf7f2
TL
74 for server in mgr.list_servers():
75 for service in server['services']:
f67539c2
TL
76 if service['type'] == service_type:
77 metadata = mgr.get_metadata(service_type, service['id'])
78 if metadata_id == metadata['id']:
11fdf7f2 79 return {
f67539c2
TL
80 'id': metadata['id'],
81 'service_map_id': str(service['id']),
82 'type': service_type,
11fdf7f2 83 'hostname': server['hostname'],
f67539c2 84 'metadata': metadata
11fdf7f2
TL
85 }
86 return None
87
f67539c2
TL
88 @classmethod
89 def get_service(cls, service_type: str, metadata_id: str) -> Optional[Dict[str, Any]]:
90 svc_data = cls.get_service_data_by_metadata_id(service_type, metadata_id)
91 if svc_data:
92 svc_data['status'] = mgr.get_daemon_status(svc_data['type'], svc_data['service_map_id'])
93 return svc_data
94
522d829b
TL
95 @classmethod
96 def get_service_perf_counters(cls, service_type: str, service_id: str) -> Dict[str, Any]:
97 schema_dict = mgr.get_perf_schema(service_type, service_id)
98 schema = schema_dict["{}.{}".format(service_type, service_id)]
99 counters = []
100 for key, value in sorted(schema.items()):
101 counter = {'name': str(key), 'description': value['description']}
102 # pylint: disable=W0212
103 if mgr._stattype_to_str(value['type']) == 'counter':
104 counter['value'] = cls.get_rate(
105 service_type, service_id, key)
106 counter['unit'] = mgr._unit_to_str(value['units'])
107 else:
108 counter['value'] = mgr.get_latest(
109 service_type, service_id, key)
110 counter['unit'] = ''
111 counters.append(counter)
112
113 return {
114 'service': {
115 'type': service_type,
116 'id': str(service_id)
117 },
118 'counters': counters
119 }
120
11fdf7f2
TL
121 @classmethod
122 def get_pool_list(cls, application=None):
123 osd_map = mgr.get('osd_map')
124 if not application:
125 return osd_map['pools']
126 return [pool for pool in osd_map['pools']
127 if application in pool.get('application_metadata', {})]
128
129 @classmethod
130 def get_pool_list_with_stats(cls, application=None):
131 # pylint: disable=too-many-locals
132 pools = cls.get_pool_list(application)
133
134 pools_w_stats = []
135
136 pg_summary = mgr.get("pg_summary")
137 pool_stats = mgr.get_updated_pool_stats()
138
139 for pool in pools:
140 pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()]
141 stats = pool_stats[pool['pool']]
142 s = {}
143
11fdf7f2 144 for stat_name, stat_series in stats.items():
9f95a23c 145 rates = get_time_series_rates(stat_series)
11fdf7f2
TL
146 s[stat_name] = {
147 'latest': stat_series[0][1],
9f95a23c
TL
148 'rate': get_most_recent_rate(rates),
149 'rates': rates
11fdf7f2
TL
150 }
151 pool['stats'] = s
152 pools_w_stats.append(pool)
153 return pools_w_stats
154
9f95a23c
TL
155 @classmethod
156 def get_erasure_code_profiles(cls):
157 def _serialize_ecp(name, ecp):
158 def serialize_numbers(key):
159 value = ecp.get(key)
160 if value is not None:
161 ecp[key] = int(value)
162
163 ecp['name'] = name
164 serialize_numbers('k')
165 serialize_numbers('m')
166 return ecp
167
168 ret = []
169 for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items():
170 ret.append(_serialize_ecp(name, ecp))
171 return ret
172
11fdf7f2
TL
173 @classmethod
174 def get_pool_name_from_id(cls, pool_id):
e306af50 175 # type: (int) -> Union[str, None]
f6b5b4d7 176 return mgr.rados.pool_reverse_lookup(pool_id)
e306af50
TL
177
178 @classmethod
179 def get_pool_by_attribute(cls, attribute, value):
180 # type: (str, Any) -> Union[dict, None]
11fdf7f2
TL
181 pool_list = cls.get_pool_list()
182 for pool in pool_list:
e306af50
TL
183 if attribute in pool and pool[attribute] == value:
184 return pool
11fdf7f2
TL
185 return None
186
e306af50
TL
187 @classmethod
188 def get_pool_pg_status(cls, pool_name):
189 # type: (str) -> dict
190 pool = cls.get_pool_by_attribute('pool_name', pool_name)
191 if pool is None:
192 return {}
193 return mgr.get("pg_summary")['by_pool'][pool['pool'].__str__()]
194
f67539c2
TL
195 @staticmethod
196 def send_command(srv_type, prefix, srv_spec='', **kwargs):
197 # type: (str, str, Optional[str], Any) -> Any
11fdf7f2
TL
198 """
199 :type prefix: str
200 :param srv_type: mon |
201 :param kwargs: will be added to argdict
202 :param srv_spec: typically empty. or something like "<fs_id>:0"
203
204 :raises PermissionError: See rados.make_ex
205 :raises ObjectNotFound: See rados.make_ex
206 :raises IOError: See rados.make_ex
207 :raises NoSpace: See rados.make_ex
208 :raises ObjectExists: See rados.make_ex
209 :raises ObjectBusy: See rados.make_ex
210 :raises NoData: See rados.make_ex
211 :raises InterruptedOrTimeoutError: See rados.make_ex
212 :raises TimedOut: See rados.make_ex
213 :raises ValueError: return code != 0
214 """
215 argdict = {
216 "prefix": prefix,
217 "format": "json",
218 }
219 argdict.update({k: v for k, v in kwargs.items() if v is not None})
220 result = CommandResult("")
221 mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "")
222 r, outb, outs = result.wait()
223 if r != 0:
9f95a23c
TL
224 logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r,
225 outs, kwargs)
226
11fdf7f2 227 raise SendCommandError(outs, prefix, argdict, r)
9f95a23c
TL
228
229 try:
230 return json.loads(outb or outs)
231 except Exception: # pylint: disable=broad-except
232 return outb
233
234 @staticmethod
235 def _get_smart_data_by_device(device):
236 # type: (dict) -> Dict[str, dict]
237 # Check whether the device is associated with daemons.
238 if 'daemons' in device and device['daemons']:
239 dev_smart_data = None
240
241 # The daemons associated with the device. Note, the list may
242 # contain daemons that are 'down' or 'destroyed'.
243 daemons = device.get('daemons')
244
245 # Get a list of all OSD daemons on all hosts that are 'up'
246 # because SMART data can not be retrieved from daemons that
247 # are 'down' or 'destroyed'.
248 osd_tree = CephService.send_command('mon', 'osd tree')
249 osd_daemons_up = [
250 node['name'] for node in osd_tree.get('nodes', {})
251 if node.get('status') == 'up'
252 ]
253
254 # Finally get the daemons on the host of the given device
255 # that are 'up'. All daemons on the same host can deliver
256 # SMART data, thus it is not relevant for us which daemon
257 # we are using.
258 daemons = list(set(daemons) & set(osd_daemons_up)) # type: ignore
259
260 for daemon in daemons:
261 svc_type, svc_id = daemon.split('.')
f67539c2
TL
262 if 'osd' in svc_type:
263 try:
264 dev_smart_data = CephService.send_command(
265 svc_type, 'smart', svc_id, devid=device['devid'])
266 except SendCommandError:
267 # Try to retrieve SMART data from another daemon.
268 continue
a4b75251 269 elif 'mon' in svc_type:
f67539c2
TL
270 try:
271 dev_smart_data = CephService.send_command(
a4b75251 272 svc_type, 'device query-daemon-health-metrics', who=daemon)
f67539c2
TL
273 except SendCommandError:
274 # Try to retrieve SMART data from another daemon.
275 continue
a4b75251
TL
276 else:
277 dev_smart_data = {}
9f95a23c
TL
278 for dev_id, dev_data in dev_smart_data.items():
279 if 'error' in dev_data:
280 logger.warning(
281 '[SMART] Error retrieving smartctl data for device ID "%s": %s',
282 dev_id, dev_data)
283 break
284 if dev_smart_data is None:
285 raise DashboardException(
286 'Failed to retrieve SMART data for device ID "{}"'.format(
287 device['devid']))
288 return dev_smart_data
289 logger.warning('[SMART] No daemons associated with device ID "%s"',
290 device['devid'])
291 return {}
292
293 @staticmethod
294 def get_devices_by_host(hostname):
f67539c2 295 # type: (str) -> dict
9f95a23c
TL
296 return CephService.send_command('mon',
297 'device ls-by-host',
298 host=hostname)
299
300 @staticmethod
301 def get_devices_by_daemon(daemon_type, daemon_id):
f67539c2 302 # type: (str, str) -> dict
9f95a23c
TL
303 return CephService.send_command('mon',
304 'device ls-by-daemon',
305 who='{}.{}'.format(
306 daemon_type, daemon_id))
307
308 @staticmethod
309 def get_smart_data_by_host(hostname):
310 # type: (str) -> dict
311 """
312 Get the SMART data of all devices on the given host, regardless
313 of the daemon (osd, mon, ...).
314 :param hostname: The name of the host.
315 :return: A dictionary containing the SMART data of every device
316 on the given host. The device name is used as the key in the
317 dictionary.
318 """
319 devices = CephService.get_devices_by_host(hostname)
320 smart_data = {} # type: dict
321 if devices:
322 for device in devices:
323 if device['devid'] not in smart_data:
324 smart_data.update(
325 CephService._get_smart_data_by_device(device))
f67539c2
TL
326 else:
327 logger.debug('[SMART] could not retrieve device list from host %s', hostname)
9f95a23c
TL
328 return smart_data
329
330 @staticmethod
331 def get_smart_data_by_daemon(daemon_type, daemon_id):
332 # type: (str, str) -> Dict[str, dict]
333 """
334 Get the SMART data of the devices associated with the given daemon.
335 :param daemon_type: The daemon type, e.g. 'osd' or 'mon'.
336 :param daemon_id: The daemon identifier.
337 :return: A dictionary containing the SMART data of every device
338 associated with the given daemon. The device name is used as the
339 key in the dictionary.
340 """
341 devices = CephService.get_devices_by_daemon(daemon_type, daemon_id)
342 smart_data = {} # type: Dict[str, dict]
343 if devices:
344 for device in devices:
345 if device['devid'] not in smart_data:
346 smart_data.update(
347 CephService._get_smart_data_by_device(device))
f67539c2
TL
348 else:
349 msg = '[SMART] could not retrieve device list from daemon with type %s and ' +\
350 'with ID %s'
351 logger.debug(msg, daemon_type, daemon_id)
9f95a23c 352 return smart_data
11fdf7f2
TL
353
354 @classmethod
355 def get_rates(cls, svc_type, svc_name, path):
356 """
357 :return: the derivative of mgr.get_counter()
358 :rtype: list[tuple[int, float]]"""
359 data = mgr.get_counter(svc_type, svc_name, path)[path]
9f95a23c 360 return get_time_series_rates(data)
11fdf7f2
TL
361
362 @classmethod
363 def get_rate(cls, svc_type, svc_name, path):
364 """returns most recent rate"""
9f95a23c 365 return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))
11fdf7f2
TL
366
367 @classmethod
368 def get_client_perf(cls):
369 pools_stats = mgr.get('osd_pool_stats')['pool_stats']
370
371 io_stats = {
372 'read_bytes_sec': 0,
373 'read_op_per_sec': 0,
374 'write_bytes_sec': 0,
375 'write_op_per_sec': 0,
376 }
377 recovery_stats = {'recovering_bytes_per_sec': 0}
378
379 for pool_stats in pools_stats:
380 client_io = pool_stats['client_io_rate']
381 for stat in list(io_stats.keys()):
382 if stat in client_io:
383 io_stats[stat] += client_io[stat]
384
385 client_recovery = pool_stats['recovery_rate']
386 for stat in list(recovery_stats.keys()):
387 if stat in client_recovery:
388 recovery_stats[stat] += client_recovery[stat]
389
390 client_perf = io_stats.copy()
391 client_perf.update(recovery_stats)
392
393 return client_perf
394
395 @classmethod
396 def get_scrub_status(cls):
397 enabled_flags = mgr.get('osd_map')['flags_set']
398 if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags:
399 return cls.SCRUB_STATUS_DISABLED
400
401 grouped_pg_statuses = mgr.get('pg_summary')['all']
402 for grouped_pg_status in grouped_pg_statuses.keys():
403 if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \
404 or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1:
405 return cls.SCRUB_STATUS_ACTIVE
406
407 return cls.SCRUB_STATUS_INACTIVE
408
409 @classmethod
410 def get_pg_info(cls):
411 pg_summary = mgr.get('pg_summary')
81eedcae
TL
412 object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [
413 'num_objects', 'num_object_copies', 'num_objects_degraded',
414 'num_objects_misplaced', 'num_objects_unfound']}
11fdf7f2
TL
415
416 pgs_per_osd = 0.0
417 total_osds = len(pg_summary['by_osd'])
418 if total_osds > 0:
419 total_pgs = 0.0
420 for _, osd_pg_statuses in pg_summary['by_osd'].items():
421 for _, pg_amount in osd_pg_statuses.items():
422 total_pgs += pg_amount
423
424 pgs_per_osd = total_pgs / total_osds
425
426 return {
81eedcae 427 'object_stats': object_stats,
11fdf7f2
TL
428 'statuses': pg_summary['all'],
429 'pgs_per_osd': pgs_per_osd,
430 }