]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
4 from typing
import List
, Any
, Tuple
, Dict
, Optional
, cast
5 from urllib
.parse
import urlparse
7 from mgr_module
import HandleCommandResult
9 from orchestrator
import DaemonDescription
10 from ceph
.deployment
.service_spec
import AlertManagerSpec
, GrafanaSpec
, ServiceSpec
, SNMPGatewaySpec
11 from cephadm
.services
.cephadmservice
import CephadmService
, CephadmDaemonDeploySpec
12 from cephadm
.services
.ingress
import IngressSpec
13 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
, build_url
15 logger
= logging
.getLogger(__name__
)
18 class GrafanaService(CephadmService
):
20 DEFAULT_SERVICE_PORT
= 3000
22 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
23 assert self
.TYPE
== daemon_spec
.daemon_type
24 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
27 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
28 assert self
.TYPE
== daemon_spec
.daemon_type
29 deps
= [] # type: List[str]
31 prom_services
= [] # type: List[str]
32 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
33 assert dd
.hostname
is not None
34 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
35 port
= dd
.ports
[0] if dd
.ports
else 9095
36 prom_services
.append(build_url(scheme
='http', host
=addr
, port
=port
))
38 deps
.append(dd
.name())
39 grafana_data_sources
= self
.mgr
.template
.render(
40 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
})
42 cert
= self
.mgr
.get_store('grafana_crt')
43 pkey
= self
.mgr
.get_store('grafana_key')
46 verify_tls(cert
, pkey
)
47 except ServerConfigException
as e
:
48 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
49 cert
, pkey
= None, None
50 if not (cert
and pkey
):
51 cert
, pkey
= create_self_signed_cert('Ceph', 'cephadm')
52 self
.mgr
.set_store('grafana_crt', cert
)
53 self
.mgr
.set_store('grafana_key', pkey
)
54 if 'dashboard' in self
.mgr
.get('mgr_map')['modules']:
55 self
.mgr
.check_mon_command({
56 'prefix': 'dashboard set-grafana-api-ssl-verify',
60 spec
: GrafanaSpec
= cast(
61 GrafanaSpec
, self
.mgr
.spec_store
.active_specs
[daemon_spec
.service_name
])
62 grafana_ini
= self
.mgr
.template
.render(
63 'services/grafana/grafana.ini.j2', {
64 'initial_admin_password': spec
.initial_admin_password
,
65 'http_port': daemon_spec
.ports
[0] if daemon_spec
.ports
else self
.DEFAULT_SERVICE_PORT
,
66 'http_addr': daemon_spec
.ip
if daemon_spec
.ip
else ''
71 "grafana.ini": grafana_ini
,
72 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
73 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
74 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
77 return config_file
, sorted(deps
)
79 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
80 # Use the least-created one as the active daemon
82 return daemon_descrs
[-1]
83 # if empty list provided, return empty Daemon Desc
84 return DaemonDescription()
86 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
88 dd
= self
.get_active_daemon(daemon_descrs
)
89 assert dd
.hostname
is not None
90 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
91 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
92 service_url
= build_url(scheme
='https', host
=addr
, port
=port
)
93 self
._set
_service
_url
_on
_dashboard
(
95 'dashboard get-grafana-api-url',
96 'dashboard set-grafana-api-url',
101 daemon_ids
: List
[str],
103 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
104 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Grafana', 1)
105 if warn
and not force
:
106 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
107 return HandleCommandResult(0, warn_message
, '')
110 class AlertmanagerService(CephadmService
):
111 TYPE
= 'alertmanager'
112 DEFAULT_SERVICE_PORT
= 9093
114 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
115 assert self
.TYPE
== daemon_spec
.daemon_type
116 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
119 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
120 assert self
.TYPE
== daemon_spec
.daemon_type
122 default_webhook_urls
: List
[str] = []
124 spec
= cast(AlertManagerSpec
, self
.mgr
.spec_store
[daemon_spec
.service_name
].spec
)
125 user_data
= spec
.user_data
126 if 'default_webhook_urls' in user_data
and isinstance(
127 user_data
['default_webhook_urls'], list):
128 default_webhook_urls
.extend(user_data
['default_webhook_urls'])
131 dashboard_urls
: List
[str] = []
132 snmp_gateway_urls
: List
[str] = []
133 mgr_map
= self
.mgr
.get('mgr_map')
135 proto
= None # http: or https:
136 url
= mgr_map
.get('services', {}).get('dashboard', None)
138 dashboard_urls
.append(url
)
139 p_result
= urlparse(url
)
140 proto
= p_result
.scheme
142 # scan all mgrs to generate deps and to get standbys too.
143 # assume that they are all on the same port as the active mgr.
144 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
145 # we consider mgr a dep even if the dashboard is disabled
146 # in order to be consistent with _calc_daemon_deps().
147 deps
.append(dd
.name())
150 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
152 assert dd
.hostname
is not None
153 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
154 dashboard_urls
.append(build_url(scheme
=proto
, host
=addr
, port
=port
))
156 for dd
in self
.mgr
.cache
.get_daemons_by_service('snmp-gateway'):
157 assert dd
.hostname
is not None
159 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
160 deps
.append(dd
.name())
162 snmp_gateway_urls
.append(build_url(scheme
='http', host
=addr
,
163 port
=dd
.ports
[0], path
='/alerts'))
166 'dashboard_urls': dashboard_urls
,
167 'default_webhook_urls': default_webhook_urls
,
168 'snmp_gateway_urls': snmp_gateway_urls
,
170 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
174 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
175 assert dd
.hostname
is not None
176 deps
.append(dd
.name())
177 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
178 peers
.append(build_url(host
=addr
, port
=port
).lstrip('/'))
182 "alertmanager.yml": yml
187 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
188 # TODO: if there are multiple daemons, who is the active one?
190 return daemon_descrs
[0]
191 # if empty list provided, return empty Daemon Desc
192 return DaemonDescription()
194 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
195 dd
= self
.get_active_daemon(daemon_descrs
)
196 assert dd
.hostname
is not None
197 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
198 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
199 service_url
= build_url(scheme
='http', host
=addr
, port
=port
)
200 self
._set
_service
_url
_on
_dashboard
(
202 'dashboard get-alertmanager-api-host',
203 'dashboard set-alertmanager-api-host',
208 daemon_ids
: List
[str],
210 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
211 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Alertmanager', 1)
212 if warn
and not force
:
213 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
214 return HandleCommandResult(0, warn_message
, '')
217 class PrometheusService(CephadmService
):
219 DEFAULT_SERVICE_PORT
= 9095
221 def config(self
, spec
: ServiceSpec
) -> None:
222 # make sure module is enabled
223 mgr_map
= self
.mgr
.get('mgr_map')
224 if 'prometheus' not in mgr_map
.get('services', {}):
225 self
.mgr
.check_mon_command({
226 'prefix': 'mgr module enable',
227 'module': 'prometheus'
229 # we shouldn't get here (mon will tell the mgr to respawn), but no
230 # harm done if we do.
234 daemon_spec
: CephadmDaemonDeploySpec
,
235 ) -> CephadmDaemonDeploySpec
:
236 assert self
.TYPE
== daemon_spec
.daemon_type
237 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
242 daemon_spec
: CephadmDaemonDeploySpec
,
243 ) -> Tuple
[Dict
[str, Any
], List
[str]]:
244 assert self
.TYPE
== daemon_spec
.daemon_type
245 deps
= [] # type: List[str]
249 mgr_map
= self
.mgr
.get('mgr_map')
251 t
= mgr_map
.get('services', {}).get('prometheus', None)
253 p_result
= urlparse(t
)
255 mgr_scrape_list
.append(t
)
256 port
= p_result
.port
or 9283
257 # scan all mgrs to generate deps and to get standbys too.
258 # assume that they are all on the same port as the active mgr.
259 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
260 # we consider the mgr a dep even if the prometheus module is
261 # disabled in order to be consistent with _calc_daemon_deps().
262 deps
.append(dd
.name())
265 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
267 assert dd
.hostname
is not None
268 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
269 mgr_scrape_list
.append(build_url(host
=addr
, port
=port
).lstrip('/'))
271 # scrape node exporters
273 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
274 assert dd
.hostname
is not None
275 deps
.append(dd
.name())
276 addr
= dd
.ip
if dd
.ip
else self
.mgr
.inventory
.get_addr(dd
.hostname
)
277 port
= dd
.ports
[0] if dd
.ports
else 9100
279 'hostname': dd
.hostname
,
280 'url': build_url(host
=addr
, port
=port
).lstrip('/')
283 # scrape alert managers
284 alertmgr_targets
= []
285 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
286 assert dd
.hostname
is not None
287 deps
.append(dd
.name())
288 addr
= dd
.ip
if dd
.ip
else self
.mgr
.inventory
.get_addr(dd
.hostname
)
289 port
= dd
.ports
[0] if dd
.ports
else 9093
290 alertmgr_targets
.append("'{}'".format(build_url(host
=addr
, port
=port
).lstrip('/')))
294 for dd
in self
.mgr
.cache
.get_daemons_by_type('ingress'):
295 if dd
.service_name() in self
.mgr
.spec_store
:
296 spec
= cast(IngressSpec
, self
.mgr
.spec_store
[dd
.service_name()].spec
)
297 assert dd
.hostname
is not None
298 deps
.append(dd
.name())
299 if dd
.daemon_type
== 'haproxy':
300 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
301 haproxy_targets
.append({
302 "url": f
"'{build_url(host=addr, port=spec.monitor_port).lstrip('/')}'",
303 "service": dd
.service_name(),
306 # generate the prometheus configuration
308 'alertmgr_targets': alertmgr_targets
,
309 'mgr_scrape_list': mgr_scrape_list
,
310 'haproxy_targets': haproxy_targets
,
316 self
.mgr
.template
.render(
317 'services/prometheus/prometheus.yml.j2', context
)
321 # include alerts, if present in the container
322 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
323 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
325 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
327 return r
, sorted(deps
)
329 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
330 # TODO: if there are multiple daemons, who is the active one?
332 return daemon_descrs
[0]
333 # if empty list provided, return empty Daemon Desc
334 return DaemonDescription()
336 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
337 dd
= self
.get_active_daemon(daemon_descrs
)
338 assert dd
.hostname
is not None
339 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
340 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
341 service_url
= build_url(scheme
='http', host
=addr
, port
=port
)
342 self
._set
_service
_url
_on
_dashboard
(
344 'dashboard get-prometheus-api-host',
345 'dashboard set-prometheus-api-host',
350 daemon_ids
: List
[str],
352 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
353 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Prometheus', 1)
354 if warn
and not force
:
355 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
356 return HandleCommandResult(0, warn_message
, '')
359 class NodeExporterService(CephadmService
):
360 TYPE
= 'node-exporter'
362 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
363 assert self
.TYPE
== daemon_spec
.daemon_type
364 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
367 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
368 assert self
.TYPE
== daemon_spec
.daemon_type
372 daemon_ids
: List
[str],
374 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
375 # since node exporter runs on each host and cannot compromise data, no extra checks required
376 names
= [f
'{self.TYPE}.{d_id}' for d_id
in daemon_ids
]
377 out
= f
'It is presumed safe to stop {names}'
378 return HandleCommandResult(0, out
, '')
381 class SNMPGatewayService(CephadmService
):
382 TYPE
= 'snmp-gateway'
384 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
385 assert self
.TYPE
== daemon_spec
.daemon_type
386 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
389 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
390 assert self
.TYPE
== daemon_spec
.daemon_type
393 spec
= cast(SNMPGatewaySpec
, self
.mgr
.spec_store
[daemon_spec
.service_name
].spec
)
395 "destination": spec
.snmp_destination
,
396 "snmp_version": spec
.snmp_version
,
398 if spec
.snmp_version
== 'V2c':
399 community
= spec
.credentials
.get('snmp_community', None)
400 assert community
is not None
403 "snmp_community": community
406 # SNMP v3 settings can be either authNoPriv or authPriv
407 auth_protocol
= 'SHA' if not spec
.auth_protocol
else spec
.auth_protocol
409 auth_username
= spec
.credentials
.get('snmp_v3_auth_username', None)
410 auth_password
= spec
.credentials
.get('snmp_v3_auth_password', None)
411 assert auth_username
is not None
412 assert auth_password
is not None
413 assert spec
.engine_id
is not None
416 "snmp_v3_auth_protocol": auth_protocol
,
417 "snmp_v3_auth_username": auth_username
,
418 "snmp_v3_auth_password": auth_password
,
419 "snmp_v3_engine_id": spec
.engine_id
,
421 # authPriv adds encryption
422 if spec
.privacy_protocol
:
423 priv_password
= spec
.credentials
.get('snmp_v3_priv_password', None)
424 assert priv_password
is not None
427 "snmp_v3_priv_protocol": spec
.privacy_protocol
,
428 "snmp_v3_priv_password": priv_password
,
432 f
"Generated configuration for '{self.TYPE}' service. Dependencies={deps}")
434 return config
, sorted(deps
)