]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | # -*- coding: utf-8 -*- |
f67539c2 | 2 | |
11fdf7f2 | 3 | import json |
9f95a23c | 4 | import logging |
11fdf7f2 TL |
5 | |
6 | import rados | |
11fdf7f2 | 7 | from mgr_module import CommandResult |
f67539c2 | 8 | from mgr_util import get_most_recent_rate, get_time_series_rates |
11fdf7f2 | 9 | |
9f95a23c | 10 | from .. import mgr |
11fdf7f2 | 11 | |
81eedcae | 12 | try: |
f67539c2 | 13 | from typing import Any, Dict, Optional, Union |
81eedcae TL |
14 | except ImportError: |
15 | pass # For typing only | |
16 | ||
9f95a23c TL |
17 | logger = logging.getLogger('ceph_service') |
18 | ||
11fdf7f2 TL |
19 | |
20 | class SendCommandError(rados.Error): | |
21 | def __init__(self, err, prefix, argdict, errno): | |
22 | self.prefix = prefix | |
23 | self.argdict = argdict | |
24 | super(SendCommandError, self).__init__(err, errno) | |
25 | ||
26 | ||
522d829b | 27 | # pylint: disable=too-many-public-methods |
11fdf7f2 TL |
28 | class CephService(object): |
29 | ||
30 | OSD_FLAG_NO_SCRUB = 'noscrub' | |
31 | OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub' | |
32 | ||
33 | PG_STATUS_SCRUBBING = 'scrubbing' | |
34 | PG_STATUS_DEEP_SCRUBBING = 'deep' | |
35 | ||
36 | SCRUB_STATUS_DISABLED = 'Disabled' | |
37 | SCRUB_STATUS_ACTIVE = 'Active' | |
38 | SCRUB_STATUS_INACTIVE = 'Inactive' | |
39 | ||
40 | @classmethod | |
41 | def get_service_map(cls, service_name): | |
9f95a23c | 42 | service_map = {} # type: Dict[str, dict] |
11fdf7f2 TL |
43 | for server in mgr.list_servers(): |
44 | for service in server['services']: | |
45 | if service['type'] == service_name: | |
46 | if server['hostname'] not in service_map: | |
47 | service_map[server['hostname']] = { | |
48 | 'server': server, | |
49 | 'services': [] | |
50 | } | |
51 | inst_id = service['id'] | |
52 | metadata = mgr.get_metadata(service_name, inst_id) | |
53 | status = mgr.get_daemon_status(service_name, inst_id) | |
54 | service_map[server['hostname']]['services'].append({ | |
55 | 'id': inst_id, | |
56 | 'type': service_name, | |
57 | 'hostname': server['hostname'], | |
58 | 'metadata': metadata, | |
59 | 'status': status | |
60 | }) | |
61 | return service_map | |
62 | ||
63 | @classmethod | |
64 | def get_service_list(cls, service_name): | |
65 | service_map = cls.get_service_map(service_name) | |
66 | return [svc for _, svcs in service_map.items() for svc in svcs['services']] | |
67 | ||
68 | @classmethod | |
f67539c2 TL |
69 | def get_service_data_by_metadata_id(cls, |
70 | service_type: str, | |
71 | metadata_id: str) -> Optional[Dict[str, Any]]: | |
11fdf7f2 TL |
72 | for server in mgr.list_servers(): |
73 | for service in server['services']: | |
f67539c2 TL |
74 | if service['type'] == service_type: |
75 | metadata = mgr.get_metadata(service_type, service['id']) | |
76 | if metadata_id == metadata['id']: | |
11fdf7f2 | 77 | return { |
f67539c2 TL |
78 | 'id': metadata['id'], |
79 | 'service_map_id': str(service['id']), | |
80 | 'type': service_type, | |
11fdf7f2 | 81 | 'hostname': server['hostname'], |
f67539c2 | 82 | 'metadata': metadata |
11fdf7f2 TL |
83 | } |
84 | return None | |
85 | ||
f67539c2 TL |
86 | @classmethod |
87 | def get_service(cls, service_type: str, metadata_id: str) -> Optional[Dict[str, Any]]: | |
88 | svc_data = cls.get_service_data_by_metadata_id(service_type, metadata_id) | |
89 | if svc_data: | |
90 | svc_data['status'] = mgr.get_daemon_status(svc_data['type'], svc_data['service_map_id']) | |
91 | return svc_data | |
92 | ||
522d829b TL |
93 | @classmethod |
94 | def get_service_perf_counters(cls, service_type: str, service_id: str) -> Dict[str, Any]: | |
95 | schema_dict = mgr.get_perf_schema(service_type, service_id) | |
96 | schema = schema_dict["{}.{}".format(service_type, service_id)] | |
97 | counters = [] | |
98 | for key, value in sorted(schema.items()): | |
99 | counter = {'name': str(key), 'description': value['description']} | |
100 | # pylint: disable=W0212 | |
101 | if mgr._stattype_to_str(value['type']) == 'counter': | |
102 | counter['value'] = cls.get_rate( | |
103 | service_type, service_id, key) | |
104 | counter['unit'] = mgr._unit_to_str(value['units']) | |
105 | else: | |
106 | counter['value'] = mgr.get_latest( | |
107 | service_type, service_id, key) | |
108 | counter['unit'] = '' | |
109 | counters.append(counter) | |
110 | ||
111 | return { | |
112 | 'service': { | |
113 | 'type': service_type, | |
114 | 'id': str(service_id) | |
115 | }, | |
116 | 'counters': counters | |
117 | } | |
118 | ||
11fdf7f2 TL |
119 | @classmethod |
120 | def get_pool_list(cls, application=None): | |
121 | osd_map = mgr.get('osd_map') | |
122 | if not application: | |
123 | return osd_map['pools'] | |
124 | return [pool for pool in osd_map['pools'] | |
125 | if application in pool.get('application_metadata', {})] | |
126 | ||
127 | @classmethod | |
128 | def get_pool_list_with_stats(cls, application=None): | |
129 | # pylint: disable=too-many-locals | |
130 | pools = cls.get_pool_list(application) | |
131 | ||
132 | pools_w_stats = [] | |
133 | ||
134 | pg_summary = mgr.get("pg_summary") | |
135 | pool_stats = mgr.get_updated_pool_stats() | |
136 | ||
137 | for pool in pools: | |
138 | pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()] | |
139 | stats = pool_stats[pool['pool']] | |
140 | s = {} | |
141 | ||
11fdf7f2 | 142 | for stat_name, stat_series in stats.items(): |
9f95a23c | 143 | rates = get_time_series_rates(stat_series) |
11fdf7f2 TL |
144 | s[stat_name] = { |
145 | 'latest': stat_series[0][1], | |
9f95a23c TL |
146 | 'rate': get_most_recent_rate(rates), |
147 | 'rates': rates | |
11fdf7f2 TL |
148 | } |
149 | pool['stats'] = s | |
150 | pools_w_stats.append(pool) | |
151 | return pools_w_stats | |
152 | ||
9f95a23c TL |
153 | @classmethod |
154 | def get_erasure_code_profiles(cls): | |
155 | def _serialize_ecp(name, ecp): | |
156 | def serialize_numbers(key): | |
157 | value = ecp.get(key) | |
158 | if value is not None: | |
159 | ecp[key] = int(value) | |
160 | ||
161 | ecp['name'] = name | |
162 | serialize_numbers('k') | |
163 | serialize_numbers('m') | |
164 | return ecp | |
165 | ||
166 | ret = [] | |
167 | for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items(): | |
168 | ret.append(_serialize_ecp(name, ecp)) | |
169 | return ret | |
170 | ||
11fdf7f2 TL |
171 | @classmethod |
172 | def get_pool_name_from_id(cls, pool_id): | |
e306af50 | 173 | # type: (int) -> Union[str, None] |
f6b5b4d7 | 174 | return mgr.rados.pool_reverse_lookup(pool_id) |
e306af50 TL |
175 | |
176 | @classmethod | |
177 | def get_pool_by_attribute(cls, attribute, value): | |
178 | # type: (str, Any) -> Union[dict, None] | |
11fdf7f2 TL |
179 | pool_list = cls.get_pool_list() |
180 | for pool in pool_list: | |
e306af50 TL |
181 | if attribute in pool and pool[attribute] == value: |
182 | return pool | |
11fdf7f2 TL |
183 | return None |
184 | ||
e306af50 TL |
185 | @classmethod |
186 | def get_pool_pg_status(cls, pool_name): | |
187 | # type: (str) -> dict | |
188 | pool = cls.get_pool_by_attribute('pool_name', pool_name) | |
189 | if pool is None: | |
190 | return {} | |
191 | return mgr.get("pg_summary")['by_pool'][pool['pool'].__str__()] | |
192 | ||
f67539c2 TL |
193 | @staticmethod |
194 | def send_command(srv_type, prefix, srv_spec='', **kwargs): | |
195 | # type: (str, str, Optional[str], Any) -> Any | |
11fdf7f2 TL |
196 | """ |
197 | :type prefix: str | |
198 | :param srv_type: mon | | |
199 | :param kwargs: will be added to argdict | |
200 | :param srv_spec: typically empty. or something like "<fs_id>:0" | |
201 | ||
202 | :raises PermissionError: See rados.make_ex | |
203 | :raises ObjectNotFound: See rados.make_ex | |
204 | :raises IOError: See rados.make_ex | |
205 | :raises NoSpace: See rados.make_ex | |
206 | :raises ObjectExists: See rados.make_ex | |
207 | :raises ObjectBusy: See rados.make_ex | |
208 | :raises NoData: See rados.make_ex | |
209 | :raises InterruptedOrTimeoutError: See rados.make_ex | |
210 | :raises TimedOut: See rados.make_ex | |
211 | :raises ValueError: return code != 0 | |
212 | """ | |
213 | argdict = { | |
214 | "prefix": prefix, | |
215 | "format": "json", | |
216 | } | |
217 | argdict.update({k: v for k, v in kwargs.items() if v is not None}) | |
218 | result = CommandResult("") | |
219 | mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") | |
220 | r, outb, outs = result.wait() | |
221 | if r != 0: | |
9f95a23c TL |
222 | logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r, |
223 | outs, kwargs) | |
224 | ||
11fdf7f2 | 225 | raise SendCommandError(outs, prefix, argdict, r) |
9f95a23c TL |
226 | |
227 | try: | |
228 | return json.loads(outb or outs) | |
229 | except Exception: # pylint: disable=broad-except | |
230 | return outb | |
231 | ||
232 | @staticmethod | |
233 | def _get_smart_data_by_device(device): | |
234 | # type: (dict) -> Dict[str, dict] | |
235 | # Check whether the device is associated with daemons. | |
236 | if 'daemons' in device and device['daemons']: | |
20effc67 | 237 | dev_smart_data: Dict[str, Any] = {} |
9f95a23c TL |
238 | |
239 | # Get a list of all OSD daemons on all hosts that are 'up' | |
240 | # because SMART data can not be retrieved from daemons that | |
241 | # are 'down' or 'destroyed'. | |
242 | osd_tree = CephService.send_command('mon', 'osd tree') | |
243 | osd_daemons_up = [ | |
244 | node['name'] for node in osd_tree.get('nodes', {}) | |
245 | if node.get('status') == 'up' | |
246 | ] | |
247 | ||
20effc67 TL |
248 | # All daemons on the same host can deliver SMART data, |
249 | # thus it is not relevant for us which daemon we are using. | |
250 | # NOTE: the list may contain daemons that are 'down' or 'destroyed'. | |
251 | for daemon in device['daemons']: | |
9f95a23c | 252 | svc_type, svc_id = daemon.split('.') |
f67539c2 | 253 | if 'osd' in svc_type: |
20effc67 TL |
254 | if daemon not in osd_daemons_up: |
255 | continue | |
f67539c2 TL |
256 | try: |
257 | dev_smart_data = CephService.send_command( | |
258 | svc_type, 'smart', svc_id, devid=device['devid']) | |
20effc67 TL |
259 | except SendCommandError as error: |
260 | logger.warning(str(error)) | |
f67539c2 TL |
261 | # Try to retrieve SMART data from another daemon. |
262 | continue | |
a4b75251 | 263 | elif 'mon' in svc_type: |
f67539c2 TL |
264 | try: |
265 | dev_smart_data = CephService.send_command( | |
a4b75251 | 266 | svc_type, 'device query-daemon-health-metrics', who=daemon) |
20effc67 TL |
267 | except SendCommandError as error: |
268 | logger.warning(str(error)) | |
f67539c2 TL |
269 | # Try to retrieve SMART data from another daemon. |
270 | continue | |
a4b75251 TL |
271 | else: |
272 | dev_smart_data = {} | |
9f95a23c TL |
273 | for dev_id, dev_data in dev_smart_data.items(): |
274 | if 'error' in dev_data: | |
275 | logger.warning( | |
276 | '[SMART] Error retrieving smartctl data for device ID "%s": %s', | |
277 | dev_id, dev_data) | |
278 | break | |
20effc67 | 279 | |
9f95a23c TL |
280 | return dev_smart_data |
281 | logger.warning('[SMART] No daemons associated with device ID "%s"', | |
282 | device['devid']) | |
283 | return {} | |
284 | ||
285 | @staticmethod | |
286 | def get_devices_by_host(hostname): | |
f67539c2 | 287 | # type: (str) -> dict |
9f95a23c TL |
288 | return CephService.send_command('mon', |
289 | 'device ls-by-host', | |
290 | host=hostname) | |
291 | ||
292 | @staticmethod | |
293 | def get_devices_by_daemon(daemon_type, daemon_id): | |
f67539c2 | 294 | # type: (str, str) -> dict |
9f95a23c TL |
295 | return CephService.send_command('mon', |
296 | 'device ls-by-daemon', | |
297 | who='{}.{}'.format( | |
298 | daemon_type, daemon_id)) | |
299 | ||
300 | @staticmethod | |
301 | def get_smart_data_by_host(hostname): | |
302 | # type: (str) -> dict | |
303 | """ | |
304 | Get the SMART data of all devices on the given host, regardless | |
305 | of the daemon (osd, mon, ...). | |
306 | :param hostname: The name of the host. | |
307 | :return: A dictionary containing the SMART data of every device | |
308 | on the given host. The device name is used as the key in the | |
309 | dictionary. | |
310 | """ | |
311 | devices = CephService.get_devices_by_host(hostname) | |
312 | smart_data = {} # type: dict | |
313 | if devices: | |
314 | for device in devices: | |
315 | if device['devid'] not in smart_data: | |
316 | smart_data.update( | |
317 | CephService._get_smart_data_by_device(device)) | |
f67539c2 TL |
318 | else: |
319 | logger.debug('[SMART] could not retrieve device list from host %s', hostname) | |
9f95a23c TL |
320 | return smart_data |
321 | ||
322 | @staticmethod | |
323 | def get_smart_data_by_daemon(daemon_type, daemon_id): | |
324 | # type: (str, str) -> Dict[str, dict] | |
325 | """ | |
326 | Get the SMART data of the devices associated with the given daemon. | |
327 | :param daemon_type: The daemon type, e.g. 'osd' or 'mon'. | |
328 | :param daemon_id: The daemon identifier. | |
329 | :return: A dictionary containing the SMART data of every device | |
330 | associated with the given daemon. The device name is used as the | |
331 | key in the dictionary. | |
332 | """ | |
333 | devices = CephService.get_devices_by_daemon(daemon_type, daemon_id) | |
334 | smart_data = {} # type: Dict[str, dict] | |
335 | if devices: | |
336 | for device in devices: | |
337 | if device['devid'] not in smart_data: | |
338 | smart_data.update( | |
339 | CephService._get_smart_data_by_device(device)) | |
f67539c2 TL |
340 | else: |
341 | msg = '[SMART] could not retrieve device list from daemon with type %s and ' +\ | |
342 | 'with ID %s' | |
343 | logger.debug(msg, daemon_type, daemon_id) | |
9f95a23c | 344 | return smart_data |
11fdf7f2 TL |
345 | |
346 | @classmethod | |
347 | def get_rates(cls, svc_type, svc_name, path): | |
348 | """ | |
349 | :return: the derivative of mgr.get_counter() | |
350 | :rtype: list[tuple[int, float]]""" | |
351 | data = mgr.get_counter(svc_type, svc_name, path)[path] | |
9f95a23c | 352 | return get_time_series_rates(data) |
11fdf7f2 TL |
353 | |
354 | @classmethod | |
355 | def get_rate(cls, svc_type, svc_name, path): | |
356 | """returns most recent rate""" | |
9f95a23c | 357 | return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path)) |
11fdf7f2 TL |
358 | |
359 | @classmethod | |
360 | def get_client_perf(cls): | |
361 | pools_stats = mgr.get('osd_pool_stats')['pool_stats'] | |
362 | ||
363 | io_stats = { | |
364 | 'read_bytes_sec': 0, | |
365 | 'read_op_per_sec': 0, | |
366 | 'write_bytes_sec': 0, | |
367 | 'write_op_per_sec': 0, | |
368 | } | |
369 | recovery_stats = {'recovering_bytes_per_sec': 0} | |
370 | ||
371 | for pool_stats in pools_stats: | |
372 | client_io = pool_stats['client_io_rate'] | |
373 | for stat in list(io_stats.keys()): | |
374 | if stat in client_io: | |
375 | io_stats[stat] += client_io[stat] | |
376 | ||
377 | client_recovery = pool_stats['recovery_rate'] | |
378 | for stat in list(recovery_stats.keys()): | |
379 | if stat in client_recovery: | |
380 | recovery_stats[stat] += client_recovery[stat] | |
381 | ||
382 | client_perf = io_stats.copy() | |
383 | client_perf.update(recovery_stats) | |
384 | ||
385 | return client_perf | |
386 | ||
387 | @classmethod | |
388 | def get_scrub_status(cls): | |
389 | enabled_flags = mgr.get('osd_map')['flags_set'] | |
390 | if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags: | |
391 | return cls.SCRUB_STATUS_DISABLED | |
392 | ||
393 | grouped_pg_statuses = mgr.get('pg_summary')['all'] | |
394 | for grouped_pg_status in grouped_pg_statuses.keys(): | |
395 | if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \ | |
396 | or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1: | |
397 | return cls.SCRUB_STATUS_ACTIVE | |
398 | ||
399 | return cls.SCRUB_STATUS_INACTIVE | |
400 | ||
401 | @classmethod | |
402 | def get_pg_info(cls): | |
403 | pg_summary = mgr.get('pg_summary') | |
81eedcae TL |
404 | object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [ |
405 | 'num_objects', 'num_object_copies', 'num_objects_degraded', | |
406 | 'num_objects_misplaced', 'num_objects_unfound']} | |
11fdf7f2 TL |
407 | |
408 | pgs_per_osd = 0.0 | |
409 | total_osds = len(pg_summary['by_osd']) | |
410 | if total_osds > 0: | |
411 | total_pgs = 0.0 | |
412 | for _, osd_pg_statuses in pg_summary['by_osd'].items(): | |
413 | for _, pg_amount in osd_pg_statuses.items(): | |
414 | total_pgs += pg_amount | |
415 | ||
416 | pgs_per_osd = total_pgs / total_osds | |
417 | ||
418 | return { | |
81eedcae | 419 | 'object_stats': object_stats, |
11fdf7f2 TL |
420 | 'statuses': pg_summary['all'], |
421 | 'pgs_per_osd': pgs_per_osd, | |
422 | } |