6 from typing
import List
, Any
, Tuple
, Dict
, Optional
, cast
7 from urllib
.parse
import urlparse
9 from mgr_module
import HandleCommandResult
11 from orchestrator
import DaemonDescription
12 from ceph
.deployment
.service_spec
import AlertManagerSpec
, GrafanaSpec
, ServiceSpec
, SNMPGatewaySpec
13 from cephadm
.services
.cephadmservice
import CephadmService
, CephadmDaemonDeploySpec
14 from cephadm
.services
.ingress
import IngressSpec
15 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
, build_url
17 logger
= logging
.getLogger(__name__
)
20 class GrafanaService(CephadmService
):
22 DEFAULT_SERVICE_PORT
= 3000
24 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
25 assert self
.TYPE
== daemon_spec
.daemon_type
26 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
29 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
30 assert self
.TYPE
== daemon_spec
.daemon_type
31 deps
= [] # type: List[str]
33 prom_services
= [] # type: List[str]
34 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
35 assert dd
.hostname
is not None
36 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
37 port
= dd
.ports
[0] if dd
.ports
else 9095
38 prom_services
.append(build_url(scheme
='http', host
=addr
, port
=port
))
40 deps
.append(dd
.name())
42 daemons
= self
.mgr
.cache
.get_daemons_by_service('loki')
44 for i
, dd
in enumerate(daemons
):
45 assert dd
.hostname
is not None
47 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
48 loki_host
= build_url(scheme
='http', host
=addr
, port
=3100)
50 deps
.append(dd
.name())
52 grafana_data_sources
= self
.mgr
.template
.render(
53 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
, 'loki_host': loki_host
})
55 cert_path
= f
'{daemon_spec.host}/grafana_crt'
56 key_path
= f
'{daemon_spec.host}/grafana_key'
57 cert
= self
.mgr
.get_store(cert_path
)
58 pkey
= self
.mgr
.get_store(key_path
)
61 verify_tls(cert
, pkey
)
62 except ServerConfigException
as e
:
63 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
64 cert
, pkey
= None, None
65 if not (cert
and pkey
):
66 cert
, pkey
= create_self_signed_cert('Ceph', daemon_spec
.host
)
67 self
.mgr
.set_store(cert_path
, cert
)
68 self
.mgr
.set_store(key_path
, pkey
)
69 if 'dashboard' in self
.mgr
.get('mgr_map')['modules']:
70 self
.mgr
.check_mon_command({
71 'prefix': 'dashboard set-grafana-api-ssl-verify',
75 spec
: GrafanaSpec
= cast(
76 GrafanaSpec
, self
.mgr
.spec_store
.active_specs
[daemon_spec
.service_name
])
77 grafana_ini
= self
.mgr
.template
.render(
78 'services/grafana/grafana.ini.j2', {
79 'initial_admin_password': spec
.initial_admin_password
,
80 'http_port': daemon_spec
.ports
[0] if daemon_spec
.ports
else self
.DEFAULT_SERVICE_PORT
,
81 'http_addr': daemon_spec
.ip
if daemon_spec
.ip
else ''
84 if 'dashboard' in self
.mgr
.get('mgr_map')['modules'] and spec
.initial_admin_password
:
85 self
.mgr
.check_mon_command(
86 {'prefix': 'dashboard set-grafana-api-password'}, inbuf
=spec
.initial_admin_password
)
90 "grafana.ini": grafana_ini
,
91 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
92 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
93 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
96 return config_file
, sorted(deps
)
98 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
99 # Use the least-created one as the active daemon
101 return daemon_descrs
[-1]
102 # if empty list provided, return empty Daemon Desc
103 return DaemonDescription()
105 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
107 dd
= self
.get_active_daemon(daemon_descrs
)
108 assert dd
.hostname
is not None
109 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
110 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
111 service_url
= build_url(scheme
='https', host
=addr
, port
=port
)
112 self
._set
_service
_url
_on
_dashboard
(
114 'dashboard get-grafana-api-url',
115 'dashboard set-grafana-api-url',
119 def pre_remove(self
, daemon
: DaemonDescription
) -> None:
121 Called before grafana daemon is removed.
123 if daemon
.hostname
is not None:
124 # delete cert/key entires for this grafana daemon
125 cert_path
= f
'{daemon.hostname}/grafana_crt'
126 key_path
= f
'{daemon.hostname}/grafana_key'
127 self
.mgr
.set_store(cert_path
, None)
128 self
.mgr
.set_store(key_path
, None)
131 daemon_ids
: List
[str],
133 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
134 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Grafana', 1)
135 if warn
and not force
:
136 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
137 return HandleCommandResult(0, warn_message
, '')
140 class AlertmanagerService(CephadmService
):
141 TYPE
= 'alertmanager'
142 DEFAULT_SERVICE_PORT
= 9093
144 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
145 assert self
.TYPE
== daemon_spec
.daemon_type
146 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
149 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
150 assert self
.TYPE
== daemon_spec
.daemon_type
152 default_webhook_urls
: List
[str] = []
154 spec
= cast(AlertManagerSpec
, self
.mgr
.spec_store
[daemon_spec
.service_name
].spec
)
157 except AttributeError:
159 user_data
= spec
.user_data
160 if 'default_webhook_urls' in user_data
and isinstance(
161 user_data
['default_webhook_urls'], list):
162 default_webhook_urls
.extend(user_data
['default_webhook_urls'])
165 dashboard_urls
: List
[str] = []
166 snmp_gateway_urls
: List
[str] = []
167 mgr_map
= self
.mgr
.get('mgr_map')
169 proto
= None # http: or https:
170 url
= mgr_map
.get('services', {}).get('dashboard', None)
172 p_result
= urlparse(url
.rstrip('/'))
173 hostname
= socket
.getfqdn(p_result
.hostname
)
176 ip
= ipaddress
.ip_address(hostname
)
181 hostname
= f
'[{hostname}]'
183 dashboard_urls
.append(
184 f
'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
185 proto
= p_result
.scheme
187 # scan all mgrs to generate deps and to get standbys too.
188 # assume that they are all on the same port as the active mgr.
189 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
190 # we consider mgr a dep even if the dashboard is disabled
191 # in order to be consistent with _calc_daemon_deps().
192 deps
.append(dd
.name())
195 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
197 assert dd
.hostname
is not None
198 addr
= self
._inventory
_get
_fqdn
(dd
.hostname
)
199 dashboard_urls
.append(build_url(scheme
=proto
, host
=addr
, port
=port
).rstrip('/'))
201 for dd
in self
.mgr
.cache
.get_daemons_by_service('snmp-gateway'):
202 assert dd
.hostname
is not None
204 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
205 deps
.append(dd
.name())
207 snmp_gateway_urls
.append(build_url(scheme
='http', host
=addr
,
208 port
=dd
.ports
[0], path
='/alerts'))
211 'dashboard_urls': dashboard_urls
,
212 'default_webhook_urls': default_webhook_urls
,
213 'snmp_gateway_urls': snmp_gateway_urls
,
216 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
220 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
221 assert dd
.hostname
is not None
222 deps
.append(dd
.name())
223 addr
= self
._inventory
_get
_fqdn
(dd
.hostname
)
224 peers
.append(build_url(host
=addr
, port
=port
).lstrip('/'))
228 "alertmanager.yml": yml
233 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
234 # TODO: if there are multiple daemons, who is the active one?
236 return daemon_descrs
[0]
237 # if empty list provided, return empty Daemon Desc
238 return DaemonDescription()
240 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
241 dd
= self
.get_active_daemon(daemon_descrs
)
242 assert dd
.hostname
is not None
243 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
244 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
245 service_url
= build_url(scheme
='http', host
=addr
, port
=port
)
246 self
._set
_service
_url
_on
_dashboard
(
248 'dashboard get-alertmanager-api-host',
249 'dashboard set-alertmanager-api-host',
254 daemon_ids
: List
[str],
256 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
257 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Alertmanager', 1)
258 if warn
and not force
:
259 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
260 return HandleCommandResult(0, warn_message
, '')
263 class PrometheusService(CephadmService
):
265 DEFAULT_SERVICE_PORT
= 9095
266 DEFAULT_MGR_PROMETHEUS_PORT
= 9283
268 def config(self
, spec
: ServiceSpec
) -> None:
269 # make sure module is enabled
270 mgr_map
= self
.mgr
.get('mgr_map')
271 if 'prometheus' not in mgr_map
.get('services', {}):
272 self
.mgr
.check_mon_command({
273 'prefix': 'mgr module enable',
274 'module': 'prometheus'
276 # we shouldn't get here (mon will tell the mgr to respawn), but no
277 # harm done if we do.
281 daemon_spec
: CephadmDaemonDeploySpec
,
282 ) -> CephadmDaemonDeploySpec
:
283 assert self
.TYPE
== daemon_spec
.daemon_type
284 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
289 daemon_spec
: CephadmDaemonDeploySpec
,
290 ) -> Tuple
[Dict
[str, Any
], List
[str]]:
291 assert self
.TYPE
== daemon_spec
.daemon_type
292 deps
= [] # type: List[str]
296 mgr_map
= self
.mgr
.get('mgr_map')
297 port
= cast(int, self
.mgr
.get_module_option_ex(
298 'prometheus', 'server_port', self
.DEFAULT_MGR_PROMETHEUS_PORT
))
299 deps
.append(str(port
))
300 t
= mgr_map
.get('services', {}).get('prometheus', None)
302 p_result
= urlparse(t
)
303 # urlparse .hostname removes '[]' from the hostname in case
304 # of ipv6 addresses so if this is the case then we just
305 # append the brackets when building the final scrape endpoint
306 if '[' in p_result
.netloc
and ']' in p_result
.netloc
:
307 mgr_scrape_list
.append(f
"[{p_result.hostname}]:{port}")
309 mgr_scrape_list
.append(f
"{p_result.hostname}:{port}")
310 # scan all mgrs to generate deps and to get standbys too.
311 # assume that they are all on the same port as the active mgr.
312 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
313 # we consider the mgr a dep even if the prometheus module is
314 # disabled in order to be consistent with _calc_daemon_deps().
315 deps
.append(dd
.name())
318 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
320 assert dd
.hostname
is not None
321 addr
= self
._inventory
_get
_fqdn
(dd
.hostname
)
322 mgr_scrape_list
.append(build_url(host
=addr
, port
=port
).lstrip('/'))
324 # scrape node exporters
326 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
327 assert dd
.hostname
is not None
328 deps
.append(dd
.name())
329 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
330 port
= dd
.ports
[0] if dd
.ports
else 9100
332 'hostname': dd
.hostname
,
333 'url': build_url(host
=addr
, port
=port
).lstrip('/')
336 # scrape alert managers
337 alertmgr_targets
= []
338 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
339 assert dd
.hostname
is not None
340 deps
.append(dd
.name())
341 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
342 port
= dd
.ports
[0] if dd
.ports
else 9093
343 alertmgr_targets
.append("'{}'".format(build_url(host
=addr
, port
=port
).lstrip('/')))
347 for dd
in self
.mgr
.cache
.get_daemons_by_type('ingress'):
348 if dd
.service_name() in self
.mgr
.spec_store
:
349 spec
= cast(IngressSpec
, self
.mgr
.spec_store
[dd
.service_name()].spec
)
350 assert dd
.hostname
is not None
351 deps
.append(dd
.name())
352 if dd
.daemon_type
== 'haproxy':
353 addr
= self
._inventory
_get
_fqdn
(dd
.hostname
)
354 haproxy_targets
.append({
355 "url": f
"'{build_url(host=addr, port=spec.monitor_port).lstrip('/')}'",
356 "service": dd
.service_name(),
359 # generate the prometheus configuration
361 'alertmgr_targets': alertmgr_targets
,
362 'mgr_scrape_list': mgr_scrape_list
,
363 'haproxy_targets': haproxy_targets
,
369 self
.mgr
.template
.render(
370 'services/prometheus/prometheus.yml.j2', context
)
374 # include alerts, if present in the container
375 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
376 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
378 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
380 # Include custom alerts if present in key value store. This enables the
381 # users to add custom alerts. Write the file in any case, so that if the
382 # content of the key value store changed, that file is overwritten
383 # (emptied in case they value has been removed from the key value
384 # store). This prevents the necessity to adapt `cephadm` binary to
387 # Don't use the template engine for it as
389 # 1. the alerts are always static and
390 # 2. they are a template themselves for the Go template engine, which
391 # use curly braces and escaping that is cumbersome and unnecessary
394 r
['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
395 self
.mgr
.get_store('services/prometheus/alerting/custom_alerts.yml', '')
397 return r
, sorted(deps
)
399 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
400 # TODO: if there are multiple daemons, who is the active one?
402 return daemon_descrs
[0]
403 # if empty list provided, return empty Daemon Desc
404 return DaemonDescription()
406 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
407 dd
= self
.get_active_daemon(daemon_descrs
)
408 assert dd
.hostname
is not None
409 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
410 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
411 service_url
= build_url(scheme
='http', host
=addr
, port
=port
)
412 self
._set
_service
_url
_on
_dashboard
(
414 'dashboard get-prometheus-api-host',
415 'dashboard set-prometheus-api-host',
420 daemon_ids
: List
[str],
422 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
423 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Prometheus', 1)
424 if warn
and not force
:
425 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
426 return HandleCommandResult(0, warn_message
, '')
429 class NodeExporterService(CephadmService
):
430 TYPE
= 'node-exporter'
432 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
433 assert self
.TYPE
== daemon_spec
.daemon_type
434 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
437 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
438 assert self
.TYPE
== daemon_spec
.daemon_type
442 daemon_ids
: List
[str],
444 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
445 # since node exporter runs on each host and cannot compromise data, no extra checks required
446 names
= [f
'{self.TYPE}.{d_id}' for d_id
in daemon_ids
]
447 out
= f
'It is presumed safe to stop {names}'
448 return HandleCommandResult(0, out
, '')
451 class LokiService(CephadmService
):
453 DEFAULT_SERVICE_PORT
= 3100
455 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
456 assert self
.TYPE
== daemon_spec
.daemon_type
457 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
460 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
461 assert self
.TYPE
== daemon_spec
.daemon_type
464 yml
= self
.mgr
.template
.render('services/loki.yml.j2')
472 class PromtailService(CephadmService
):
474 DEFAULT_SERVICE_PORT
= 9080
476 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
477 assert self
.TYPE
== daemon_spec
.daemon_type
478 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
481 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
482 assert self
.TYPE
== daemon_spec
.daemon_type
485 daemons
= self
.mgr
.cache
.get_daemons_by_service('loki')
487 for i
, dd
in enumerate(daemons
):
488 assert dd
.hostname
is not None
490 loki_host
= dd
.ip
if dd
.ip
else self
._inventory
_get
_fqdn
(dd
.hostname
)
492 deps
.append(dd
.name())
495 'client_hostname': loki_host
,
498 yml
= self
.mgr
.template
.render('services/promtail.yml.j2', context
)
506 class SNMPGatewayService(CephadmService
):
507 TYPE
= 'snmp-gateway'
509 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
510 assert self
.TYPE
== daemon_spec
.daemon_type
511 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
514 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
515 assert self
.TYPE
== daemon_spec
.daemon_type
518 spec
= cast(SNMPGatewaySpec
, self
.mgr
.spec_store
[daemon_spec
.service_name
].spec
)
520 "destination": spec
.snmp_destination
,
521 "snmp_version": spec
.snmp_version
,
523 if spec
.snmp_version
== 'V2c':
524 community
= spec
.credentials
.get('snmp_community', None)
525 assert community
is not None
528 "snmp_community": community
531 # SNMP v3 settings can be either authNoPriv or authPriv
532 auth_protocol
= 'SHA' if not spec
.auth_protocol
else spec
.auth_protocol
534 auth_username
= spec
.credentials
.get('snmp_v3_auth_username', None)
535 auth_password
= spec
.credentials
.get('snmp_v3_auth_password', None)
536 assert auth_username
is not None
537 assert auth_password
is not None
538 assert spec
.engine_id
is not None
541 "snmp_v3_auth_protocol": auth_protocol
,
542 "snmp_v3_auth_username": auth_username
,
543 "snmp_v3_auth_password": auth_password
,
544 "snmp_v3_engine_id": spec
.engine_id
,
546 # authPriv adds encryption
547 if spec
.privacy_protocol
:
548 priv_password
= spec
.credentials
.get('snmp_v3_priv_password', None)
549 assert priv_password
is not None
552 "snmp_v3_priv_protocol": spec
.privacy_protocol
,
553 "snmp_v3_priv_password": priv_password
,
557 f
"Generated configuration for '{self.TYPE}' service. Dependencies={deps}")
559 return config
, sorted(deps
)