]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | # -*- coding: utf-8 -*- |
2 | from __future__ import absolute_import | |
11fdf7f2 | 3 | import json |
9f95a23c | 4 | import logging |
11fdf7f2 TL |
5 | |
6 | import rados | |
7 | ||
8 | from mgr_module import CommandResult | |
9f95a23c | 9 | from mgr_util import get_time_series_rates, get_most_recent_rate |
11fdf7f2 | 10 | |
9f95a23c TL |
11 | from .. import mgr |
12 | from ..exceptions import DashboardException | |
11fdf7f2 | 13 | |
81eedcae | 14 | try: |
9f95a23c | 15 | from typing import Dict # pylint: disable=unused-import |
81eedcae TL |
16 | except ImportError: |
17 | pass # For typing only | |
18 | ||
9f95a23c TL |
19 | logger = logging.getLogger('ceph_service') |
20 | ||
11fdf7f2 TL |
21 | |
22 | class SendCommandError(rados.Error): | |
23 | def __init__(self, err, prefix, argdict, errno): | |
24 | self.prefix = prefix | |
25 | self.argdict = argdict | |
26 | super(SendCommandError, self).__init__(err, errno) | |
27 | ||
28 | ||
29 | class CephService(object): | |
30 | ||
31 | OSD_FLAG_NO_SCRUB = 'noscrub' | |
32 | OSD_FLAG_NO_DEEP_SCRUB = 'nodeep-scrub' | |
33 | ||
34 | PG_STATUS_SCRUBBING = 'scrubbing' | |
35 | PG_STATUS_DEEP_SCRUBBING = 'deep' | |
36 | ||
37 | SCRUB_STATUS_DISABLED = 'Disabled' | |
38 | SCRUB_STATUS_ACTIVE = 'Active' | |
39 | SCRUB_STATUS_INACTIVE = 'Inactive' | |
40 | ||
41 | @classmethod | |
42 | def get_service_map(cls, service_name): | |
9f95a23c | 43 | service_map = {} # type: Dict[str, dict] |
11fdf7f2 TL |
44 | for server in mgr.list_servers(): |
45 | for service in server['services']: | |
46 | if service['type'] == service_name: | |
47 | if server['hostname'] not in service_map: | |
48 | service_map[server['hostname']] = { | |
49 | 'server': server, | |
50 | 'services': [] | |
51 | } | |
52 | inst_id = service['id'] | |
53 | metadata = mgr.get_metadata(service_name, inst_id) | |
54 | status = mgr.get_daemon_status(service_name, inst_id) | |
55 | service_map[server['hostname']]['services'].append({ | |
56 | 'id': inst_id, | |
57 | 'type': service_name, | |
58 | 'hostname': server['hostname'], | |
59 | 'metadata': metadata, | |
60 | 'status': status | |
61 | }) | |
62 | return service_map | |
63 | ||
64 | @classmethod | |
65 | def get_service_list(cls, service_name): | |
66 | service_map = cls.get_service_map(service_name) | |
67 | return [svc for _, svcs in service_map.items() for svc in svcs['services']] | |
68 | ||
69 | @classmethod | |
70 | def get_service(cls, service_name, service_id): | |
71 | for server in mgr.list_servers(): | |
72 | for service in server['services']: | |
73 | if service['type'] == service_name: | |
74 | inst_id = service['id'] | |
75 | if inst_id == service_id: | |
76 | metadata = mgr.get_metadata(service_name, inst_id) | |
77 | status = mgr.get_daemon_status(service_name, inst_id) | |
78 | return { | |
79 | 'id': inst_id, | |
80 | 'type': service_name, | |
81 | 'hostname': server['hostname'], | |
82 | 'metadata': metadata, | |
83 | 'status': status | |
84 | } | |
85 | return None | |
86 | ||
87 | @classmethod | |
88 | def get_pool_list(cls, application=None): | |
89 | osd_map = mgr.get('osd_map') | |
90 | if not application: | |
91 | return osd_map['pools'] | |
92 | return [pool for pool in osd_map['pools'] | |
93 | if application in pool.get('application_metadata', {})] | |
94 | ||
95 | @classmethod | |
96 | def get_pool_list_with_stats(cls, application=None): | |
97 | # pylint: disable=too-many-locals | |
98 | pools = cls.get_pool_list(application) | |
99 | ||
100 | pools_w_stats = [] | |
101 | ||
102 | pg_summary = mgr.get("pg_summary") | |
103 | pool_stats = mgr.get_updated_pool_stats() | |
104 | ||
105 | for pool in pools: | |
106 | pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()] | |
107 | stats = pool_stats[pool['pool']] | |
108 | s = {} | |
109 | ||
11fdf7f2 | 110 | for stat_name, stat_series in stats.items(): |
9f95a23c | 111 | rates = get_time_series_rates(stat_series) |
11fdf7f2 TL |
112 | s[stat_name] = { |
113 | 'latest': stat_series[0][1], | |
9f95a23c TL |
114 | 'rate': get_most_recent_rate(rates), |
115 | 'rates': rates | |
11fdf7f2 TL |
116 | } |
117 | pool['stats'] = s | |
118 | pools_w_stats.append(pool) | |
119 | return pools_w_stats | |
120 | ||
9f95a23c TL |
121 | @classmethod |
122 | def get_erasure_code_profiles(cls): | |
123 | def _serialize_ecp(name, ecp): | |
124 | def serialize_numbers(key): | |
125 | value = ecp.get(key) | |
126 | if value is not None: | |
127 | ecp[key] = int(value) | |
128 | ||
129 | ecp['name'] = name | |
130 | serialize_numbers('k') | |
131 | serialize_numbers('m') | |
132 | return ecp | |
133 | ||
134 | ret = [] | |
135 | for name, ecp in mgr.get('osd_map').get('erasure_code_profiles', {}).items(): | |
136 | ret.append(_serialize_ecp(name, ecp)) | |
137 | return ret | |
138 | ||
11fdf7f2 TL |
139 | @classmethod |
140 | def get_pool_name_from_id(cls, pool_id): | |
141 | pool_list = cls.get_pool_list() | |
142 | for pool in pool_list: | |
143 | if pool['pool'] == pool_id: | |
144 | return pool['pool_name'] | |
145 | return None | |
146 | ||
147 | @classmethod | |
148 | def send_command(cls, srv_type, prefix, srv_spec='', **kwargs): | |
149 | """ | |
150 | :type prefix: str | |
151 | :param srv_type: mon | | |
152 | :param kwargs: will be added to argdict | |
153 | :param srv_spec: typically empty. or something like "<fs_id>:0" | |
154 | ||
155 | :raises PermissionError: See rados.make_ex | |
156 | :raises ObjectNotFound: See rados.make_ex | |
157 | :raises IOError: See rados.make_ex | |
158 | :raises NoSpace: See rados.make_ex | |
159 | :raises ObjectExists: See rados.make_ex | |
160 | :raises ObjectBusy: See rados.make_ex | |
161 | :raises NoData: See rados.make_ex | |
162 | :raises InterruptedOrTimeoutError: See rados.make_ex | |
163 | :raises TimedOut: See rados.make_ex | |
164 | :raises ValueError: return code != 0 | |
165 | """ | |
166 | argdict = { | |
167 | "prefix": prefix, | |
168 | "format": "json", | |
169 | } | |
170 | argdict.update({k: v for k, v in kwargs.items() if v is not None}) | |
171 | result = CommandResult("") | |
172 | mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") | |
173 | r, outb, outs = result.wait() | |
174 | if r != 0: | |
9f95a23c TL |
175 | logger.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r, |
176 | outs, kwargs) | |
177 | ||
11fdf7f2 | 178 | raise SendCommandError(outs, prefix, argdict, r) |
9f95a23c TL |
179 | |
180 | try: | |
181 | return json.loads(outb or outs) | |
182 | except Exception: # pylint: disable=broad-except | |
183 | return outb | |
184 | ||
185 | @staticmethod | |
186 | def _get_smart_data_by_device(device): | |
187 | # type: (dict) -> Dict[str, dict] | |
188 | # Check whether the device is associated with daemons. | |
189 | if 'daemons' in device and device['daemons']: | |
190 | dev_smart_data = None | |
191 | ||
192 | # The daemons associated with the device. Note, the list may | |
193 | # contain daemons that are 'down' or 'destroyed'. | |
194 | daemons = device.get('daemons') | |
195 | ||
196 | # Get a list of all OSD daemons on all hosts that are 'up' | |
197 | # because SMART data can not be retrieved from daemons that | |
198 | # are 'down' or 'destroyed'. | |
199 | osd_tree = CephService.send_command('mon', 'osd tree') | |
200 | osd_daemons_up = [ | |
201 | node['name'] for node in osd_tree.get('nodes', {}) | |
202 | if node.get('status') == 'up' | |
203 | ] | |
204 | ||
205 | # Finally get the daemons on the host of the given device | |
206 | # that are 'up'. All daemons on the same host can deliver | |
207 | # SMART data, thus it is not relevant for us which daemon | |
208 | # we are using. | |
209 | daemons = list(set(daemons) & set(osd_daemons_up)) # type: ignore | |
210 | ||
211 | for daemon in daemons: | |
212 | svc_type, svc_id = daemon.split('.') | |
213 | try: | |
214 | dev_smart_data = CephService.send_command( | |
215 | svc_type, 'smart', svc_id, devid=device['devid']) | |
216 | except SendCommandError: | |
217 | # Try to retrieve SMART data from another daemon. | |
218 | continue | |
219 | for dev_id, dev_data in dev_smart_data.items(): | |
220 | if 'error' in dev_data: | |
221 | logger.warning( | |
222 | '[SMART] Error retrieving smartctl data for device ID "%s": %s', | |
223 | dev_id, dev_data) | |
224 | break | |
225 | if dev_smart_data is None: | |
226 | raise DashboardException( | |
227 | 'Failed to retrieve SMART data for device ID "{}"'.format( | |
228 | device['devid'])) | |
229 | return dev_smart_data | |
230 | logger.warning('[SMART] No daemons associated with device ID "%s"', | |
231 | device['devid']) | |
232 | return {} | |
233 | ||
234 | @staticmethod | |
235 | def get_devices_by_host(hostname): | |
236 | # (str) -> dict | |
237 | return CephService.send_command('mon', | |
238 | 'device ls-by-host', | |
239 | host=hostname) | |
240 | ||
241 | @staticmethod | |
242 | def get_devices_by_daemon(daemon_type, daemon_id): | |
243 | # (str, str) -> dict | |
244 | return CephService.send_command('mon', | |
245 | 'device ls-by-daemon', | |
246 | who='{}.{}'.format( | |
247 | daemon_type, daemon_id)) | |
248 | ||
249 | @staticmethod | |
250 | def get_smart_data_by_host(hostname): | |
251 | # type: (str) -> dict | |
252 | """ | |
253 | Get the SMART data of all devices on the given host, regardless | |
254 | of the daemon (osd, mon, ...). | |
255 | :param hostname: The name of the host. | |
256 | :return: A dictionary containing the SMART data of every device | |
257 | on the given host. The device name is used as the key in the | |
258 | dictionary. | |
259 | """ | |
260 | devices = CephService.get_devices_by_host(hostname) | |
261 | smart_data = {} # type: dict | |
262 | if devices: | |
263 | for device in devices: | |
264 | if device['devid'] not in smart_data: | |
265 | smart_data.update( | |
266 | CephService._get_smart_data_by_device(device)) | |
267 | return smart_data | |
268 | ||
269 | @staticmethod | |
270 | def get_smart_data_by_daemon(daemon_type, daemon_id): | |
271 | # type: (str, str) -> Dict[str, dict] | |
272 | """ | |
273 | Get the SMART data of the devices associated with the given daemon. | |
274 | :param daemon_type: The daemon type, e.g. 'osd' or 'mon'. | |
275 | :param daemon_id: The daemon identifier. | |
276 | :return: A dictionary containing the SMART data of every device | |
277 | associated with the given daemon. The device name is used as the | |
278 | key in the dictionary. | |
279 | """ | |
280 | devices = CephService.get_devices_by_daemon(daemon_type, daemon_id) | |
281 | smart_data = {} # type: Dict[str, dict] | |
282 | if devices: | |
283 | for device in devices: | |
284 | if device['devid'] not in smart_data: | |
285 | smart_data.update( | |
286 | CephService._get_smart_data_by_device(device)) | |
287 | return smart_data | |
11fdf7f2 TL |
288 | |
289 | @classmethod | |
290 | def get_rates(cls, svc_type, svc_name, path): | |
291 | """ | |
292 | :return: the derivative of mgr.get_counter() | |
293 | :rtype: list[tuple[int, float]]""" | |
294 | data = mgr.get_counter(svc_type, svc_name, path)[path] | |
9f95a23c | 295 | return get_time_series_rates(data) |
11fdf7f2 TL |
296 | |
297 | @classmethod | |
298 | def get_rate(cls, svc_type, svc_name, path): | |
299 | """returns most recent rate""" | |
9f95a23c | 300 | return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path)) |
11fdf7f2 TL |
301 | |
302 | @classmethod | |
303 | def get_client_perf(cls): | |
304 | pools_stats = mgr.get('osd_pool_stats')['pool_stats'] | |
305 | ||
306 | io_stats = { | |
307 | 'read_bytes_sec': 0, | |
308 | 'read_op_per_sec': 0, | |
309 | 'write_bytes_sec': 0, | |
310 | 'write_op_per_sec': 0, | |
311 | } | |
312 | recovery_stats = {'recovering_bytes_per_sec': 0} | |
313 | ||
314 | for pool_stats in pools_stats: | |
315 | client_io = pool_stats['client_io_rate'] | |
316 | for stat in list(io_stats.keys()): | |
317 | if stat in client_io: | |
318 | io_stats[stat] += client_io[stat] | |
319 | ||
320 | client_recovery = pool_stats['recovery_rate'] | |
321 | for stat in list(recovery_stats.keys()): | |
322 | if stat in client_recovery: | |
323 | recovery_stats[stat] += client_recovery[stat] | |
324 | ||
325 | client_perf = io_stats.copy() | |
326 | client_perf.update(recovery_stats) | |
327 | ||
328 | return client_perf | |
329 | ||
330 | @classmethod | |
331 | def get_scrub_status(cls): | |
332 | enabled_flags = mgr.get('osd_map')['flags_set'] | |
333 | if cls.OSD_FLAG_NO_SCRUB in enabled_flags or cls.OSD_FLAG_NO_DEEP_SCRUB in enabled_flags: | |
334 | return cls.SCRUB_STATUS_DISABLED | |
335 | ||
336 | grouped_pg_statuses = mgr.get('pg_summary')['all'] | |
337 | for grouped_pg_status in grouped_pg_statuses.keys(): | |
338 | if len(grouped_pg_status.split(cls.PG_STATUS_SCRUBBING)) > 1 \ | |
339 | or len(grouped_pg_status.split(cls.PG_STATUS_DEEP_SCRUBBING)) > 1: | |
340 | return cls.SCRUB_STATUS_ACTIVE | |
341 | ||
342 | return cls.SCRUB_STATUS_INACTIVE | |
343 | ||
344 | @classmethod | |
345 | def get_pg_info(cls): | |
346 | pg_summary = mgr.get('pg_summary') | |
81eedcae TL |
347 | object_stats = {stat: pg_summary['pg_stats_sum']['stat_sum'][stat] for stat in [ |
348 | 'num_objects', 'num_object_copies', 'num_objects_degraded', | |
349 | 'num_objects_misplaced', 'num_objects_unfound']} | |
11fdf7f2 TL |
350 | |
351 | pgs_per_osd = 0.0 | |
352 | total_osds = len(pg_summary['by_osd']) | |
353 | if total_osds > 0: | |
354 | total_pgs = 0.0 | |
355 | for _, osd_pg_statuses in pg_summary['by_osd'].items(): | |
356 | for _, pg_amount in osd_pg_statuses.items(): | |
357 | total_pgs += pg_amount | |
358 | ||
359 | pgs_per_osd = total_pgs / total_osds | |
360 | ||
361 | return { | |
81eedcae | 362 | 'object_stats': object_stats, |
11fdf7f2 TL |
363 | 'statuses': pg_summary['all'], |
364 | 'pgs_per_osd': pgs_per_osd, | |
365 | } |