]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/services/ceph_service.py
e0d583d8c01e78f90f8b40a16c00f7ba14de3671
[ceph.git] / ceph / src / pybind / mgr / dashboard / services / ceph_service.py
1 # -*- coding: utf-8 -*-
2 from __future__ import absolute_import
3 import json
4 import logging
5
6 import rados
7
8 from mgr_module import CommandResult
9 from mgr_util import get_time_series_rates, get_most_recent_rate
10
11 from .. import mgr
12 from ..exceptions import DashboardException
13
14 try:
15 from typing import Dict # pylint: disable=unused-import
16 except ImportError:
17 pass # For typing only
18
19 logger = logging.getLogger('ceph_service')
20
21
22 class SendCommandError(rados.Error):
23 def __init__(self, err, prefix, argdict, errno):
24 self.prefix = prefix
25 self.argdict = argdict
26 super(SendCommandError, self).__init__(err, errno)
27
28
29 class CephService(object):
30
31 OSD_FLAG_NO_SCRUB = 'noscrub'
32 OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub'
33
34 PG_STATUS_SCRUBBING = 'scrubbing'
35 PG_STATUS_DEEP_SCRUBBING = 'deep'
36
37 SCRUB_STATUS_DISABLED = 'Disabled'
38 SCRUB_STATUS_ACTIVE = 'Active'
39 SCRUB_STATUS_INACTIVE = 'Inactive'
40
41 @classmethod
42 def get_service_map(cls, service_name):
43 service_map = {} # type: Dict[str, dict]
44 for server in mgr.list_servers():
45 for service in server['services']:
46 if service['type'] == service_name:
47 if server['hostname'] not in service_map:
48 service_map[server['hostname']] = {
49 'server': server,
50 'services': []
51 }
52 inst_id = service['id']
53 metadata = mgr.get_metadata(service_name, inst_id)
54 status = mgr.get_daemon_status(service_name, inst_id)
55 service_map[server['hostname']]['services'].append({
56 'id': inst_id,
57 'type': service_name,
58 'hostname': server['hostname'],
59 'metadata': metadata,
60 'status': status
61 })
62 return service_map
63
64 @classmethod
65 def get_service_list(cls, service_name):
66 service_map = cls.get_service_map(service_name)
67 return [svc for _, svcs in service_map.items() for svc in svcs['services']]
68
69 @classmethod
70 def get_service(cls, service_name, service_id):
71 for server in mgr.list_servers():
72 for service in server['services']:
73 if service['type'] == service_name:
74 inst_id = service['id']
75 if inst_id == service_id:
76 metadata = mgr.get_metadata(service_name, inst_id)
77 status = mgr.get_daemon_status(service_name, inst_id)
78 return {
79 'id': inst_id,
80 'type': service_name,
81 'hostname': server['hostname'],
82 'metadata': metadata,
83 'status': status
84 }
85 return None
86
87 @classmethod
88 def get_pool_list(cls, application=None):
89 osd_map = mgr.get('osd_map')
90 if not application:
91 return osd_map['pools']
92 return [pool for pool in osd_map['pools']
93 if application in pool.get('application_metadata', {})]
94
95 @classmethod
96 def get_pool_list_with_stats(cls, application=None):
97 # pylint: disable=too-many-locals
98 pools = cls.get_pool_list(application)
99
100 pools_w_stats = []
101
102 pg_summary = mgr.get("pg_summary")
103 pool_stats = mgr.get_updated_pool_stats()
104
105 for pool in pools:
106 pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()]
107 stats = pool_stats[pool['pool']]
108 s = {}
109
110 for stat_name, stat_series in stats.items():
111 rates = get_time_series_rates(stat_series)
112 s[stat_name] = {
113 'latest': stat_series[0][1],
114 'rate': get_most_recent_rate(rates),
115 'rates': rates
116 }
117 pool['stats'] = s
118 pools_w_stats.append(pool)
119 return pools_w_stats
120
121 @classmethod
122 def get_erasure_code_profiles(cls):
123 def _serialize_ecp(name, ecp):
124 def serialize_numbers(key):
125 value = ecp.get(key)
126 if value is not None:
127 ecp[key] = int(value)
128
129 ecp['name'] = name
130 serialize_numbers('k')
131 serialize_numbers('m')
132 return ecp
133
134 ret = []
135 for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items():
136 ret.append(_serialize_ecp(name, ecp))
137 return ret
138
139 @classmethod
140 def get_pool_name_from_id(cls, pool_id):
141 pool_list = cls.get_pool_list()
142 for pool in pool_list:
143 if pool['pool'] == pool_id:
144 return pool['pool_name']
145 return None
146
147 @classmethod
148 def send_command(cls, srv_type, prefix, srv_spec='', **kwargs):
149 """
150 :type prefix: str
151 :param srv_type: mon |
152 :param kwargs: will be added to argdict
153 :param srv_spec: typically empty. or something like "<fs_id>:0"
154
155 :raises PermissionError: See rados.make_ex
156 :raises ObjectNotFound: See rados.make_ex
157 :raises IOError: See rados.make_ex
158 :raises NoSpace: See rados.make_ex
159 :raises ObjectExists: See rados.make_ex
160 :raises ObjectBusy: See rados.make_ex
161 :raises NoData: See rados.make_ex
162 :raises InterruptedOrTimeoutError: See rados.make_ex
163 :raises TimedOut: See rados.make_ex
164 :raises ValueError: return code != 0
165 """
166 argdict = {
167 "prefix": prefix,
168 "format": "json",
169 }
170 argdict.update({k: v for k, v in kwargs.items() if v is not None})
171 result = CommandResult("")
172 mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "")
173 r, outb, outs = result.wait()
174 if r != 0:
175 logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r,
176 outs, kwargs)
177
178 raise SendCommandError(outs, prefix, argdict, r)
179
180 try:
181 return json.loads(outb or outs)
182 except Exception: # pylint: disable=broad-except
183 return outb
184
185 @staticmethod
186 def _get_smart_data_by_device(device):
187 # type: (dict) -> Dict[str, dict]
188 # Check whether the device is associated with daemons.
189 if 'daemons' in device and device['daemons']:
190 dev_smart_data = None
191
192 # The daemons associated with the device. Note, the list may
193 # contain daemons that are 'down' or 'destroyed'.
194 daemons = device.get('daemons')
195
196 # Get a list of all OSD daemons on all hosts that are 'up'
197 # because SMART data can not be retrieved from daemons that
198 # are 'down' or 'destroyed'.
199 osd_tree = CephService.send_command('mon', 'osd tree')
200 osd_daemons_up = [
201 node['name'] for node in osd_tree.get('nodes', {})
202 if node.get('status') == 'up'
203 ]
204
205 # Finally get the daemons on the host of the given device
206 # that are 'up'. All daemons on the same host can deliver
207 # SMART data, thus it is not relevant for us which daemon
208 # we are using.
209 daemons = list(set(daemons) & set(osd_daemons_up)) # type: ignore
210
211 for daemon in daemons:
212 svc_type, svc_id = daemon.split('.')
213 try:
214 dev_smart_data = CephService.send_command(
215 svc_type, 'smart', svc_id, devid=device['devid'])
216 except SendCommandError:
217 # Try to retrieve SMART data from another daemon.
218 continue
219 for dev_id, dev_data in dev_smart_data.items():
220 if 'error' in dev_data:
221 logger.warning(
222 '[SMART] Error retrieving smartctl data for device ID "%s": %s',
223 dev_id, dev_data)
224 break
225 if dev_smart_data is None:
226 raise DashboardException(
227 'Failed to retrieve SMART data for device ID "{}"'.format(
228 device['devid']))
229 return dev_smart_data
230 logger.warning('[SMART] No daemons associated with device ID "%s"',
231 device['devid'])
232 return {}
233
234 @staticmethod
235 def get_devices_by_host(hostname):
236 # (str) -> dict
237 return CephService.send_command('mon',
238 'device ls-by-host',
239 host=hostname)
240
241 @staticmethod
242 def get_devices_by_daemon(daemon_type, daemon_id):
243 # (str, str) -> dict
244 return CephService.send_command('mon',
245 'device ls-by-daemon',
246 who='{}.{}'.format(
247 daemon_type, daemon_id))
248
249 @staticmethod
250 def get_smart_data_by_host(hostname):
251 # type: (str) -> dict
252 """
253 Get the SMART data of all devices on the given host, regardless
254 of the daemon (osd, mon, ...).
255 :param hostname: The name of the host.
256 :return: A dictionary containing the SMART data of every device
257 on the given host. The device name is used as the key in the
258 dictionary.
259 """
260 devices = CephService.get_devices_by_host(hostname)
261 smart_data = {} # type: dict
262 if devices:
263 for device in devices:
264 if device['devid'] not in smart_data:
265 smart_data.update(
266 CephService._get_smart_data_by_device(device))
267 return smart_data
268
269 @staticmethod
270 def get_smart_data_by_daemon(daemon_type, daemon_id):
271 # type: (str, str) -> Dict[str, dict]
272 """
273 Get the SMART data of the devices associated with the given daemon.
274 :param daemon_type: The daemon type, e.g. 'osd' or 'mon'.
275 :param daemon_id: The daemon identifier.
276 :return: A dictionary containing the SMART data of every device
277 associated with the given daemon. The device name is used as the
278 key in the dictionary.
279 """
280 devices = CephService.get_devices_by_daemon(daemon_type, daemon_id)
281 smart_data = {} # type: Dict[str, dict]
282 if devices:
283 for device in devices:
284 if device['devid'] not in smart_data:
285 smart_data.update(
286 CephService._get_smart_data_by_device(device))
287 return smart_data
288
289 @classmethod
290 def get_rates(cls, svc_type, svc_name, path):
291 """
292 :return: the derivative of mgr.get_counter()
293 :rtype: list[tuple[int, float]]"""
294 data = mgr.get_counter(svc_type, svc_name, path)[path]
295 return get_time_series_rates(data)
296
297 @classmethod
298 def get_rate(cls, svc_type, svc_name, path):
299 """returns most recent rate"""
300 return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))
301
302 @classmethod
303 def get_client_perf(cls):
304 pools_stats = mgr.get('osd_pool_stats')['pool_stats']
305
306 io_stats = {
307 'read_bytes_sec': 0,
308 'read_op_per_sec': 0,
309 'write_bytes_sec': 0,
310 'write_op_per_sec': 0,
311 }
312 recovery_stats = {'recovering_bytes_per_sec': 0}
313
314 for pool_stats in pools_stats:
315 client_io = pool_stats['client_io_rate']
316 for stat in list(io_stats.keys()):
317 if stat in client_io:
318 io_stats[stat] += client_io[stat]
319
320 client_recovery = pool_stats['recovery_rate']
321 for stat in list(recovery_stats.keys()):
322 if stat in client_recovery:
323 recovery_stats[stat] += client_recovery[stat]
324
325 client_perf = io_stats.copy()
326 client_perf.update(recovery_stats)
327
328 return client_perf
329
330 @classmethod
331 def get_scrub_status(cls):
332 enabled_flags = mgr.get('osd_map')['flags_set']
333 if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags:
334 return cls.SCRUB_STATUS_DISABLED
335
336 grouped_pg_statuses = mgr.get('pg_summary')['all']
337 for grouped_pg_status in grouped_pg_statuses.keys():
338 if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \
339 or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1:
340 return cls.SCRUB_STATUS_ACTIVE
341
342 return cls.SCRUB_STATUS_INACTIVE
343
344 @classmethod
345 def get_pg_info(cls):
346 pg_summary = mgr.get('pg_summary')
347 object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [
348 'num_objects', 'num_object_copies', 'num_objects_degraded',
349 'num_objects_misplaced', 'num_objects_unfound']}
350
351 pgs_per_osd = 0.0
352 total_osds = len(pg_summary['by_osd'])
353 if total_osds > 0:
354 total_pgs = 0.0
355 for _, osd_pg_statuses in pg_summary['by_osd'].items():
356 for _, pg_amount in osd_pg_statuses.items():
357 total_pgs += pg_amount
358
359 pgs_per_osd = total_pgs / total_osds
360
361 return {
362 'object_stats': object_stats,
363 'statuses': pg_summary['all'],
364 'pgs_per_osd': pgs_per_osd,
365 }