]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/dashboard/services/ceph_service.py
1 # -*- coding: utf-8 -*-
2 from __future__
import absolute_import
8 from mgr_module
import CommandResult
9 from mgr_util
import get_time_series_rates
, get_most_recent_rate
12 from ..exceptions
import DashboardException
15 from typing
import Dict
, Any
, Union
# pylint: disable=unused-import
17 pass # For typing only
19 logger
= logging
.getLogger('ceph_service')
22 class SendCommandError(rados
.Error
):
23 def __init__(self
, err
, prefix
, argdict
, errno
):
25 self
.argdict
= argdict
26 super(SendCommandError
, self
).__init
__(err
, errno
)
29 class CephService(object):
31 OSD_FLAG_NO_SCRUB
= 'noscrub'
32 OSD_FLAG_NO_DEEP_SCRUB
= 'nodeep-scrub'
34 PG_STATUS_SCRUBBING
= 'scrubbing'
35 PG_STATUS_DEEP_SCRUBBING
= 'deep'
37 SCRUB_STATUS_DISABLED
= 'Disabled'
38 SCRUB_STATUS_ACTIVE
= 'Active'
39 SCRUB_STATUS_INACTIVE
= 'Inactive'
42 def get_service_map(cls
, service_name
):
43 service_map
= {} # type: Dict[str, dict]
44 for server
in mgr
.list_servers():
45 for service
in server
['services']:
46 if service
['type'] == service_name
:
47 if server
['hostname'] not in service_map
:
48 service_map
[server
['hostname']] = {
52 inst_id
= service
['id']
53 metadata
= mgr
.get_metadata(service_name
, inst_id
)
54 status
= mgr
.get_daemon_status(service_name
, inst_id
)
55 service_map
[server
['hostname']]['services'].append({
58 'hostname': server
['hostname'],
65 def get_service_list(cls
, service_name
):
66 service_map
= cls
.get_service_map(service_name
)
67 return [svc
for _
, svcs
in service_map
.items() for svc
in svcs
['services']]
70 def get_service(cls
, service_name
, service_id
):
71 for server
in mgr
.list_servers():
72 for service
in server
['services']:
73 if service
['type'] == service_name
:
74 inst_id
= service
['id']
75 if inst_id
== service_id
:
76 metadata
= mgr
.get_metadata(service_name
, inst_id
)
77 status
= mgr
.get_daemon_status(service_name
, inst_id
)
81 'hostname': server
['hostname'],
88 def get_pool_list(cls
, application
=None):
89 osd_map
= mgr
.get('osd_map')
91 return osd_map
['pools']
92 return [pool
for pool
in osd_map
['pools']
93 if application
in pool
.get('application_metadata', {})]
96 def get_pool_list_with_stats(cls
, application
=None):
97 # pylint: disable=too-many-locals
98 pools
= cls
.get_pool_list(application
)
102 pg_summary
= mgr
.get("pg_summary")
103 pool_stats
= mgr
.get_updated_pool_stats()
106 pool
['pg_status'] = pg_summary
['by_pool'][pool
['pool'].__str
__()]
107 stats
= pool_stats
[pool
['pool']]
110 for stat_name
, stat_series
in stats
.items():
111 rates
= get_time_series_rates(stat_series
)
113 'latest': stat_series
[0][1],
114 'rate': get_most_recent_rate(rates
),
118 pools_w_stats
.append(pool
)
122 def get_erasure_code_profiles(cls
):
123 def _serialize_ecp(name
, ecp
):
124 def serialize_numbers(key
):
126 if value
is not None:
127 ecp
[key
] = int(value
)
130 serialize_numbers('k')
131 serialize_numbers('m')
135 for name
, ecp
in mgr
.get('osd_map').get('erasure_code_profiles', {}).items():
136 ret
.append(_serialize_ecp(name
, ecp
))
140 def get_pool_name_from_id(cls
, pool_id
):
141 # type: (int) -> Union[str, None]
142 pool
= cls
.get_pool_by_attribute('pool', pool_id
)
143 return pool
['pool_name'] if pool
is not None else None
146 def get_pool_by_attribute(cls
, attribute
, value
):
147 # type: (str, Any) -> Union[dict, None]
148 pool_list
= cls
.get_pool_list()
149 for pool
in pool_list
:
150 if attribute
in pool
and pool
[attribute
] == value
:
155 def get_pool_pg_status(cls
, pool_name
):
156 # type: (str) -> dict
157 pool
= cls
.get_pool_by_attribute('pool_name', pool_name
)
160 return mgr
.get("pg_summary")['by_pool'][pool
['pool'].__str
__()]
163 def send_command(cls
, srv_type
, prefix
, srv_spec
='', **kwargs
):
166 :param srv_type: mon |
167 :param kwargs: will be added to argdict
168 :param srv_spec: typically empty. or something like "<fs_id>:0"
170 :raises PermissionError: See rados.make_ex
171 :raises ObjectNotFound: See rados.make_ex
172 :raises IOError: See rados.make_ex
173 :raises NoSpace: See rados.make_ex
174 :raises ObjectExists: See rados.make_ex
175 :raises ObjectBusy: See rados.make_ex
176 :raises NoData: See rados.make_ex
177 :raises InterruptedOrTimeoutError: See rados.make_ex
178 :raises TimedOut: See rados.make_ex
179 :raises ValueError: return code != 0
185 argdict
.update({k
: v
for k
, v
in kwargs
.items() if v
is not None})
186 result
= CommandResult("")
187 mgr
.send_command(result
, srv_type
, srv_spec
, json
.dumps(argdict
), "")
188 r
, outb
, outs
= result
.wait()
190 logger
.error("send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix
, r
,
193 raise SendCommandError(outs
, prefix
, argdict
, r
)
196 return json
.loads(outb
or outs
)
197 except Exception: # pylint: disable=broad-except
201 def _get_smart_data_by_device(device
):
202 # type: (dict) -> Dict[str, dict]
203 # Check whether the device is associated with daemons.
204 if 'daemons' in device
and device
['daemons']:
205 dev_smart_data
= None
207 # The daemons associated with the device. Note, the list may
208 # contain daemons that are 'down' or 'destroyed'.
209 daemons
= device
.get('daemons')
211 # Get a list of all OSD daemons on all hosts that are 'up'
212 # because SMART data can not be retrieved from daemons that
213 # are 'down' or 'destroyed'.
214 osd_tree
= CephService
.send_command('mon', 'osd tree')
216 node
['name'] for node
in osd_tree
.get('nodes', {})
217 if node
.get('status') == 'up'
220 # Finally get the daemons on the host of the given device
221 # that are 'up'. All daemons on the same host can deliver
222 # SMART data, thus it is not relevant for us which daemon
224 daemons
= list(set(daemons
) & set(osd_daemons_up
)) # type: ignore
226 for daemon
in daemons
:
227 svc_type
, svc_id
= daemon
.split('.')
229 dev_smart_data
= CephService
.send_command(
230 svc_type
, 'smart', svc_id
, devid
=device
['devid'])
231 except SendCommandError
:
232 # Try to retrieve SMART data from another daemon.
234 for dev_id
, dev_data
in dev_smart_data
.items():
235 if 'error' in dev_data
:
237 '[SMART] Error retrieving smartctl data for device ID "%s": %s',
240 if dev_smart_data
is None:
241 raise DashboardException(
242 'Failed to retrieve SMART data for device ID "{}"'.format(
244 return dev_smart_data
245 logger
.warning('[SMART] No daemons associated with device ID "%s"',
250 def get_devices_by_host(hostname
):
252 return CephService
.send_command('mon',
257 def get_devices_by_daemon(daemon_type
, daemon_id
):
259 return CephService
.send_command('mon',
260 'device ls-by-daemon',
262 daemon_type
, daemon_id
))
265 def get_smart_data_by_host(hostname
):
266 # type: (str) -> dict
268 Get the SMART data of all devices on the given host, regardless
269 of the daemon (osd, mon, ...).
270 :param hostname: The name of the host.
271 :return: A dictionary containing the SMART data of every device
272 on the given host. The device name is used as the key in the
275 devices
= CephService
.get_devices_by_host(hostname
)
276 smart_data
= {} # type: dict
278 for device
in devices
:
279 if device
['devid'] not in smart_data
:
281 CephService
._get
_smart
_data
_by
_device
(device
))
285 def get_smart_data_by_daemon(daemon_type
, daemon_id
):
286 # type: (str, str) -> Dict[str, dict]
288 Get the SMART data of the devices associated with the given daemon.
289 :param daemon_type: The daemon type, e.g. 'osd' or 'mon'.
290 :param daemon_id: The daemon identifier.
291 :return: A dictionary containing the SMART data of every device
292 associated with the given daemon. The device name is used as the
293 key in the dictionary.
295 devices
= CephService
.get_devices_by_daemon(daemon_type
, daemon_id
)
296 smart_data
= {} # type: Dict[str, dict]
298 for device
in devices
:
299 if device
['devid'] not in smart_data
:
301 CephService
._get
_smart
_data
_by
_device
(device
))
305 def get_rates(cls
, svc_type
, svc_name
, path
):
307 :return: the derivative of mgr.get_counter()
308 :rtype: list[tuple[int, float]]"""
309 data
= mgr
.get_counter(svc_type
, svc_name
, path
)[path
]
310 return get_time_series_rates(data
)
313 def get_rate(cls
, svc_type
, svc_name
, path
):
314 """returns most recent rate"""
315 return get_most_recent_rate(cls
.get_rates(svc_type
, svc_name
, path
))
318 def get_client_perf(cls
):
319 pools_stats
= mgr
.get('osd_pool_stats')['pool_stats']
323 'read_op_per_sec': 0,
324 'write_bytes_sec': 0,
325 'write_op_per_sec': 0,
327 recovery_stats
= {'recovering_bytes_per_sec': 0}
329 for pool_stats
in pools_stats
:
330 client_io
= pool_stats
['client_io_rate']
331 for stat
in list(io_stats
.keys()):
332 if stat
in client_io
:
333 io_stats
[stat
] += client_io
[stat
]
335 client_recovery
= pool_stats
['recovery_rate']
336 for stat
in list(recovery_stats
.keys()):
337 if stat
in client_recovery
:
338 recovery_stats
[stat
] += client_recovery
[stat
]
340 client_perf
= io_stats
.copy()
341 client_perf
.update(recovery_stats
)
346 def get_scrub_status(cls
):
347 enabled_flags
= mgr
.get('osd_map')['flags_set']
348 if cls
.OSD_FLAG_NO_SCRUB
in enabled_flags
or cls
.OSD_FLAG_NO_DEEP_SCRUB
in enabled_flags
:
349 return cls
.SCRUB_STATUS_DISABLED
351 grouped_pg_statuses
= mgr
.get('pg_summary')['all']
352 for grouped_pg_status
in grouped_pg_statuses
.keys():
353 if len(grouped_pg_status
.split(cls
.PG_STATUS_SCRUBBING
)) > 1 \
354 or len(grouped_pg_status
.split(cls
.PG_STATUS_DEEP_SCRUBBING
)) > 1:
355 return cls
.SCRUB_STATUS_ACTIVE
357 return cls
.SCRUB_STATUS_INACTIVE
360 def get_pg_info(cls
):
361 pg_summary
= mgr
.get('pg_summary')
362 object_stats
= {stat
: pg_summary
['pg_stats_sum']['stat_sum'][stat
] for stat
in [
363 'num_objects', 'num_object_copies', 'num_objects_degraded',
364 'num_objects_misplaced', 'num_objects_unfound']}
367 total_osds
= len(pg_summary
['by_osd'])
370 for _
, osd_pg_statuses
in pg_summary
['by_osd'].items():
371 for _
, pg_amount
in osd_pg_statuses
.items():
372 total_pgs
+= pg_amount
374 pgs_per_osd
= total_pgs
/ total_osds
377 'object_stats': object_stats
,
378 'statuses': pg_summary
['all'],
379 'pgs_per_osd': pgs_per_osd
,