]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/services/ceph_service.py
bump version to 15.2.4-pve1
[ceph.git] / ceph / src / pybind / mgr / dashboard / services / ceph_service.py
1 # -*- coding: utf-8 -*-
2 from __future__ import absolute_import
3 import json
4 import logging
5
6 import rados
7
8 from mgr_module import CommandResult
9 from mgr_util import get_time_series_rates, get_most_recent_rate
10
11 from .. import mgr
12 from ..exceptions import DashboardException
13
14 try:
15 from typing import Dict, Any, Union # pylint: disable=unused-import
16 except ImportError:
17 pass # For typing only
18
19 logger = logging.getLogger('ceph_service')
20
21
22 class SendCommandError(rados.Error):
23 def __init__(self, err, prefix, argdict, errno):
24 self.prefix = prefix
25 self.argdict = argdict
26 super(SendCommandError, self).__init__(err, errno)
27
28
29 class CephService(object):
30
31 OSD_FLAG_NO_SCRUB = 'noscrub'
32 OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub'
33
34 PG_STATUS_SCRUBBING = 'scrubbing'
35 PG_STATUS_DEEP_SCRUBBING = 'deep'
36
37 SCRUB_STATUS_DISABLED = 'Disabled'
38 SCRUB_STATUS_ACTIVE = 'Active'
39 SCRUB_STATUS_INACTIVE = 'Inactive'
40
41 @classmethod
42 def get_service_map(cls, service_name):
43 service_map = {} # type: Dict[str, dict]
44 for server in mgr.list_servers():
45 for service in server['services']:
46 if service['type'] == service_name:
47 if server['hostname'] not in service_map:
48 service_map[server['hostname']] = {
49 'server': server,
50 'services': []
51 }
52 inst_id = service['id']
53 metadata = mgr.get_metadata(service_name, inst_id)
54 status = mgr.get_daemon_status(service_name, inst_id)
55 service_map[server['hostname']]['services'].append({
56 'id': inst_id,
57 'type': service_name,
58 'hostname': server['hostname'],
59 'metadata': metadata,
60 'status': status
61 })
62 return service_map
63
64 @classmethod
65 def get_service_list(cls, service_name):
66 service_map = cls.get_service_map(service_name)
67 return [svc for _, svcs in service_map.items() for svc in svcs['services']]
68
69 @classmethod
70 def get_service(cls, service_name, service_id):
71 for server in mgr.list_servers():
72 for service in server['services']:
73 if service['type'] == service_name:
74 inst_id = service['id']
75 if inst_id == service_id:
76 metadata = mgr.get_metadata(service_name, inst_id)
77 status = mgr.get_daemon_status(service_name, inst_id)
78 return {
79 'id': inst_id,
80 'type': service_name,
81 'hostname': server['hostname'],
82 'metadata': metadata,
83 'status': status
84 }
85 return None
86
87 @classmethod
88 def get_pool_list(cls, application=None):
89 osd_map = mgr.get('osd_map')
90 if not application:
91 return osd_map['pools']
92 return [pool for pool in osd_map['pools']
93 if application in pool.get('application_metadata', {})]
94
95 @classmethod
96 def get_pool_list_with_stats(cls, application=None):
97 # pylint: disable=too-many-locals
98 pools = cls.get_pool_list(application)
99
100 pools_w_stats = []
101
102 pg_summary = mgr.get("pg_summary")
103 pool_stats = mgr.get_updated_pool_stats()
104
105 for pool in pools:
106 pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()]
107 stats = pool_stats[pool['pool']]
108 s = {}
109
110 for stat_name, stat_series in stats.items():
111 rates = get_time_series_rates(stat_series)
112 s[stat_name] = {
113 'latest': stat_series[0][1],
114 'rate': get_most_recent_rate(rates),
115 'rates': rates
116 }
117 pool['stats'] = s
118 pools_w_stats.append(pool)
119 return pools_w_stats
120
121 @classmethod
122 def get_erasure_code_profiles(cls):
123 def _serialize_ecp(name, ecp):
124 def serialize_numbers(key):
125 value = ecp.get(key)
126 if value is not None:
127 ecp[key] = int(value)
128
129 ecp['name'] = name
130 serialize_numbers('k')
131 serialize_numbers('m')
132 return ecp
133
134 ret = []
135 for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items():
136 ret.append(_serialize_ecp(name, ecp))
137 return ret
138
139 @classmethod
140 def get_pool_name_from_id(cls, pool_id):
141 # type: (int) -> Union[str, None]
142 pool = cls.get_pool_by_attribute('pool', pool_id)
143 return pool['pool_name'] if pool is not None else None
144
145 @classmethod
146 def get_pool_by_attribute(cls, attribute, value):
147 # type: (str, Any) -> Union[dict, None]
148 pool_list = cls.get_pool_list()
149 for pool in pool_list:
150 if attribute in pool and pool[attribute] == value:
151 return pool
152 return None
153
154 @classmethod
155 def get_pool_pg_status(cls, pool_name):
156 # type: (str) -> dict
157 pool = cls.get_pool_by_attribute('pool_name', pool_name)
158 if pool is None:
159 return {}
160 return mgr.get("pg_summary")['by_pool'][pool['pool'].__str__()]
161
162 @classmethod
163 def send_command(cls, srv_type, prefix, srv_spec='', **kwargs):
164 """
165 :type prefix: str
166 :param srv_type: mon |
167 :param kwargs: will be added to argdict
168 :param srv_spec: typically empty. or something like "<fs_id>:0"
169
170 :raises PermissionError: See rados.make_ex
171 :raises ObjectNotFound: See rados.make_ex
172 :raises IOError: See rados.make_ex
173 :raises NoSpace: See rados.make_ex
174 :raises ObjectExists: See rados.make_ex
175 :raises ObjectBusy: See rados.make_ex
176 :raises NoData: See rados.make_ex
177 :raises InterruptedOrTimeoutError: See rados.make_ex
178 :raises TimedOut: See rados.make_ex
179 :raises ValueError: return code != 0
180 """
181 argdict = {
182 "prefix": prefix,
183 "format": "json",
184 }
185 argdict.update({k: v for k, v in kwargs.items() if v is not None})
186 result = CommandResult("")
187 mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "")
188 r, outb, outs = result.wait()
189 if r != 0:
190 logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r,
191 outs, kwargs)
192
193 raise SendCommandError(outs, prefix, argdict, r)
194
195 try:
196 return json.loads(outb or outs)
197 except Exception: # pylint: disable=broad-except
198 return outb
199
200 @staticmethod
201 def _get_smart_data_by_device(device):
202 # type: (dict) -> Dict[str, dict]
203 # Check whether the device is associated with daemons.
204 if 'daemons' in device and device['daemons']:
205 dev_smart_data = None
206
207 # The daemons associated with the device. Note, the list may
208 # contain daemons that are 'down' or 'destroyed'.
209 daemons = device.get('daemons')
210
211 # Get a list of all OSD daemons on all hosts that are 'up'
212 # because SMART data can not be retrieved from daemons that
213 # are 'down' or 'destroyed'.
214 osd_tree = CephService.send_command('mon', 'osd tree')
215 osd_daemons_up = [
216 node['name'] for node in osd_tree.get('nodes', {})
217 if node.get('status') == 'up'
218 ]
219
220 # Finally get the daemons on the host of the given device
221 # that are 'up'. All daemons on the same host can deliver
222 # SMART data, thus it is not relevant for us which daemon
223 # we are using.
224 daemons = list(set(daemons) & set(osd_daemons_up)) # type: ignore
225
226 for daemon in daemons:
227 svc_type, svc_id = daemon.split('.')
228 try:
229 dev_smart_data = CephService.send_command(
230 svc_type, 'smart', svc_id, devid=device['devid'])
231 except SendCommandError:
232 # Try to retrieve SMART data from another daemon.
233 continue
234 for dev_id, dev_data in dev_smart_data.items():
235 if 'error' in dev_data:
236 logger.warning(
237 '[SMART] Error retrieving smartctl data for device ID "%s": %s',
238 dev_id, dev_data)
239 break
240 if dev_smart_data is None:
241 raise DashboardException(
242 'Failed to retrieve SMART data for device ID "{}"'.format(
243 device['devid']))
244 return dev_smart_data
245 logger.warning('[SMART] No daemons associated with device ID "%s"',
246 device['devid'])
247 return {}
248
249 @staticmethod
250 def get_devices_by_host(hostname):
251 # (str) -> dict
252 return CephService.send_command('mon',
253 'device ls-by-host',
254 host=hostname)
255
256 @staticmethod
257 def get_devices_by_daemon(daemon_type, daemon_id):
258 # (str, str) -> dict
259 return CephService.send_command('mon',
260 'device ls-by-daemon',
261 who='{}.{}'.format(
262 daemon_type, daemon_id))
263
264 @staticmethod
265 def get_smart_data_by_host(hostname):
266 # type: (str) -> dict
267 """
268 Get the SMART data of all devices on the given host, regardless
269 of the daemon (osd, mon, ...).
270 :param hostname: The name of the host.
271 :return: A dictionary containing the SMART data of every device
272 on the given host. The device name is used as the key in the
273 dictionary.
274 """
275 devices = CephService.get_devices_by_host(hostname)
276 smart_data = {} # type: dict
277 if devices:
278 for device in devices:
279 if device['devid'] not in smart_data:
280 smart_data.update(
281 CephService._get_smart_data_by_device(device))
282 return smart_data
283
284 @staticmethod
285 def get_smart_data_by_daemon(daemon_type, daemon_id):
286 # type: (str, str) -> Dict[str, dict]
287 """
288 Get the SMART data of the devices associated with the given daemon.
289 :param daemon_type: The daemon type, e.g. 'osd' or 'mon'.
290 :param daemon_id: The daemon identifier.
291 :return: A dictionary containing the SMART data of every device
292 associated with the given daemon. The device name is used as the
293 key in the dictionary.
294 """
295 devices = CephService.get_devices_by_daemon(daemon_type, daemon_id)
296 smart_data = {} # type: Dict[str, dict]
297 if devices:
298 for device in devices:
299 if device['devid'] not in smart_data:
300 smart_data.update(
301 CephService._get_smart_data_by_device(device))
302 return smart_data
303
304 @classmethod
305 def get_rates(cls, svc_type, svc_name, path):
306 """
307 :return: the derivative of mgr.get_counter()
308 :rtype: list[tuple[int, float]]"""
309 data = mgr.get_counter(svc_type, svc_name, path)[path]
310 return get_time_series_rates(data)
311
312 @classmethod
313 def get_rate(cls, svc_type, svc_name, path):
314 """returns most recent rate"""
315 return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))
316
317 @classmethod
318 def get_client_perf(cls):
319 pools_stats = mgr.get('osd_pool_stats')['pool_stats']
320
321 io_stats = {
322 'read_bytes_sec': 0,
323 'read_op_per_sec': 0,
324 'write_bytes_sec': 0,
325 'write_op_per_sec': 0,
326 }
327 recovery_stats = {'recovering_bytes_per_sec': 0}
328
329 for pool_stats in pools_stats:
330 client_io = pool_stats['client_io_rate']
331 for stat in list(io_stats.keys()):
332 if stat in client_io:
333 io_stats[stat] += client_io[stat]
334
335 client_recovery = pool_stats['recovery_rate']
336 for stat in list(recovery_stats.keys()):
337 if stat in client_recovery:
338 recovery_stats[stat] += client_recovery[stat]
339
340 client_perf = io_stats.copy()
341 client_perf.update(recovery_stats)
342
343 return client_perf
344
345 @classmethod
346 def get_scrub_status(cls):
347 enabled_flags = mgr.get('osd_map')['flags_set']
348 if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags:
349 return cls.SCRUB_STATUS_DISABLED
350
351 grouped_pg_statuses = mgr.get('pg_summary')['all']
352 for grouped_pg_status in grouped_pg_statuses.keys():
353 if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \
354 or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1:
355 return cls.SCRUB_STATUS_ACTIVE
356
357 return cls.SCRUB_STATUS_INACTIVE
358
359 @classmethod
360 def get_pg_info(cls):
361 pg_summary = mgr.get('pg_summary')
362 object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [
363 'num_objects', 'num_object_copies', 'num_objects_degraded',
364 'num_objects_misplaced', 'num_objects_unfound']}
365
366 pgs_per_osd = 0.0
367 total_osds = len(pg_summary['by_osd'])
368 if total_osds > 0:
369 total_pgs = 0.0
370 for _, osd_pg_statuses in pg_summary['by_osd'].items():
371 for _, pg_amount in osd_pg_statuses.items():
372 total_pgs += pg_amount
373
374 pgs_per_osd = total_pgs / total_osds
375
376 return {
377 'object_stats': object_stats,
378 'statuses': pg_summary['all'],
379 'pgs_per_osd': pgs_per_osd,
380 }