]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / monitoring.py
1 import errno
2 import ipaddress
3 import logging
4 import os
5 import socket
6 from typing import List, Any, Tuple, Dict, Optional, cast
7 from urllib.parse import urlparse
8
9 from mgr_module import HandleCommandResult
10
11 from orchestrator import DaemonDescription
12 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
13 SNMPGatewaySpec, PrometheusSpec
14 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
15 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash
16 from ceph.deployment.utils import wrap_ipv6
17
18 logger = logging.getLogger(__name__)
19
20
21 class GrafanaService(CephadmService):
22 TYPE = 'grafana'
23 DEFAULT_SERVICE_PORT = 3000
24
25 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
26 assert self.TYPE == daemon_spec.daemon_type
27 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
28 return daemon_spec
29
30 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
31 assert self.TYPE == daemon_spec.daemon_type
32 prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
33 deps = [] # type: List[str]
34 if self.mgr.secure_monitoring_stack and prometheus_user and prometheus_password:
35 deps.append(f'{hash(prometheus_user + prometheus_password)}')
36 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
37
38 prom_services = [] # type: List[str]
39 for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
40 assert dd.hostname is not None
41 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
42 port = dd.ports[0] if dd.ports else 9095
43 protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
44 prom_services.append(build_url(scheme=protocol, host=addr, port=port))
45
46 deps.append(dd.name())
47
48 daemons = self.mgr.cache.get_daemons_by_service('loki')
49 loki_host = ''
50 for i, dd in enumerate(daemons):
51 assert dd.hostname is not None
52 if i == 0:
53 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
54 loki_host = build_url(scheme='http', host=addr, port=3100)
55
56 deps.append(dd.name())
57
58 root_cert = self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
59 oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()])
60 grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2',
61 {'hosts': prom_services,
62 'prometheus_user': prometheus_user,
63 'prometheus_password': prometheus_password,
64 'cephadm_root_ca': oneline_root_cert,
65 'security_enabled': self.mgr.secure_monitoring_stack,
66 'loki_host': loki_host})
67
68 spec: GrafanaSpec = cast(
69 GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name])
70 grafana_ini = self.mgr.template.render(
71 'services/grafana/grafana.ini.j2', {
72 'anonymous_access': spec.anonymous_access,
73 'initial_admin_password': spec.initial_admin_password,
74 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
75 'protocol': spec.protocol,
76 'http_addr': daemon_spec.ip if daemon_spec.ip else ''
77 })
78
79 if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password:
80 self.mgr.check_mon_command(
81 {'prefix': 'dashboard set-grafana-api-password'}, inbuf=spec.initial_admin_password)
82
83 cert, pkey = self.prepare_certificates(daemon_spec)
84 config_file = {
85 'files': {
86 "grafana.ini": grafana_ini,
87 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
88 'certs/cert_file': '# generated by cephadm\n%s' % cert,
89 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
90 }
91 }
92 return config_file, sorted(deps)
93
94 def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
95 cert_path = f'{daemon_spec.host}/grafana_crt'
96 key_path = f'{daemon_spec.host}/grafana_key'
97 cert = self.mgr.get_store(cert_path)
98 pkey = self.mgr.get_store(key_path)
99 certs_present = (cert and pkey)
100 is_valid_certificate = False
101 (org, cn) = (None, None)
102 if certs_present:
103 try:
104 (org, cn) = get_cert_issuer_info(cert)
105 verify_tls(cert, pkey)
106 is_valid_certificate = True
107 except ServerConfigException as e:
108 logger.warning(f'Provided grafana TLS certificates are invalid: {e}')
109
110 if is_valid_certificate:
111 # let's clear health error just in case it was set
112 self.mgr.remove_health_warning('CEPHADM_CERT_ERROR')
113 return cert, pkey
114
115 # certificate is not valid, to avoid overwriting user generated
116 # certificates we only re-generate in case of self signed certificates
117 # that were originally generated by cephadm or in case cert/key are empty.
118 if not certs_present or (org == 'Ceph' and cn == 'cephadm'):
119 logger.info('Regenerating cephadm self-signed grafana TLS certificates')
120 host_fqdn = socket.getfqdn(daemon_spec.host)
121 cert, pkey = create_self_signed_cert('Ceph', host_fqdn)
122 self.mgr.set_store(cert_path, cert)
123 self.mgr.set_store(key_path, pkey)
124 if 'dashboard' in self.mgr.get('mgr_map')['modules']:
125 self.mgr.check_mon_command({
126 'prefix': 'dashboard set-grafana-api-ssl-verify',
127 'value': 'false',
128 })
129 self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') # clear if any
130 else:
131 # the certificate was not generated by cephadm, we cannot overwrite
132 # it by new self-signed ones. Let's warn the user to fix the issue
133 err_msg = """
134 Detected invalid grafana certificates. Set mgr/cephadm/grafana_crt
135 and mgr/cephadm/grafana_key to valid certificates or reset their value
136 to an empty string in case you want cephadm to generate self-signed Grafana
137 certificates.
138
139 Once done, run the following command to reconfig the daemon:
140
141 > ceph orch daemon reconfig <grafana-daemon>
142
143 """
144 self.mgr.set_health_warning(
145 'CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg])
146
147 return cert, pkey
148
149 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
150 # Use the least-created one as the active daemon
151 if daemon_descrs:
152 return daemon_descrs[-1]
153 # if empty list provided, return empty Daemon Desc
154 return DaemonDescription()
155
156 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
157 # TODO: signed cert
158 dd = self.get_active_daemon(daemon_descrs)
159 assert dd.hostname is not None
160 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
161 port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
162 spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec)
163 service_url = build_url(scheme=spec.protocol, host=addr, port=port)
164 self._set_service_url_on_dashboard(
165 'Grafana',
166 'dashboard get-grafana-api-url',
167 'dashboard set-grafana-api-url',
168 service_url
169 )
170
171 def pre_remove(self, daemon: DaemonDescription) -> None:
172 """
173 Called before grafana daemon is removed.
174 """
175 if daemon.hostname is not None:
176 # delete cert/key entires for this grafana daemon
177 cert_path = f'{daemon.hostname}/grafana_crt'
178 key_path = f'{daemon.hostname}/grafana_key'
179 self.mgr.set_store(cert_path, None)
180 self.mgr.set_store(key_path, None)
181
182 def ok_to_stop(self,
183 daemon_ids: List[str],
184 force: bool = False,
185 known: Optional[List[str]] = None) -> HandleCommandResult:
186 warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1)
187 if warn and not force:
188 return HandleCommandResult(-errno.EBUSY, '', warn_message)
189 return HandleCommandResult(0, warn_message, '')
190
191
192 class AlertmanagerService(CephadmService):
193 TYPE = 'alertmanager'
194 DEFAULT_SERVICE_PORT = 9093
195 USER_CFG_KEY = 'alertmanager/web_user'
196 PASS_CFG_KEY = 'alertmanager/web_password'
197
198 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
199 assert self.TYPE == daemon_spec.daemon_type
200 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
201 return daemon_spec
202
203 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
204 assert self.TYPE == daemon_spec.daemon_type
205 deps: List[str] = []
206 default_webhook_urls: List[str] = []
207
208 spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
209 try:
210 secure = spec.secure
211 except AttributeError:
212 secure = False
213 user_data = spec.user_data
214 if 'default_webhook_urls' in user_data and isinstance(
215 user_data['default_webhook_urls'], list):
216 default_webhook_urls.extend(user_data['default_webhook_urls'])
217
218 # dashboard(s)
219 dashboard_urls: List[str] = []
220 snmp_gateway_urls: List[str] = []
221 mgr_map = self.mgr.get('mgr_map')
222 port = None
223 proto = None # http: or https:
224 url = mgr_map.get('services', {}).get('dashboard', None)
225 if url:
226 p_result = urlparse(url.rstrip('/'))
227 hostname = socket.getfqdn(p_result.hostname)
228
229 try:
230 ip = ipaddress.ip_address(hostname)
231 except ValueError:
232 pass
233 else:
234 if ip.version == 6:
235 hostname = f'[{hostname}]'
236
237 dashboard_urls.append(
238 f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
239 proto = p_result.scheme
240 port = p_result.port
241
242 # scan all mgrs to generate deps and to get standbys too.
243 # assume that they are all on the same port as the active mgr.
244 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
245 # we consider mgr a dep even if the dashboard is disabled
246 # in order to be consistent with _calc_daemon_deps().
247 deps.append(dd.name())
248 if not port:
249 continue
250 if dd.daemon_id == self.mgr.get_mgr_id():
251 continue
252 assert dd.hostname is not None
253 addr = self._inventory_get_fqdn(dd.hostname)
254 dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
255
256 for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
257 assert dd.hostname is not None
258 assert dd.ports
259 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
260 deps.append(dd.name())
261
262 snmp_gateway_urls.append(build_url(scheme='http', host=addr,
263 port=dd.ports[0], path='/alerts'))
264
265 context = {
266 'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
267 'dashboard_urls': dashboard_urls,
268 'default_webhook_urls': default_webhook_urls,
269 'snmp_gateway_urls': snmp_gateway_urls,
270 'secure': secure,
271 }
272 yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
273
274 peers = []
275 port = 9094
276 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
277 assert dd.hostname is not None
278 deps.append(dd.name())
279 addr = self._inventory_get_fqdn(dd.hostname)
280 peers.append(build_url(host=addr, port=port).lstrip('/'))
281
282 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
283
284 if self.mgr.secure_monitoring_stack:
285 alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
286 if alertmanager_user and alertmanager_password:
287 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
288 node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
289 host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
290 cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(
291 host_fqdn, node_ip)
292 context = {
293 'alertmanager_web_user': alertmanager_user,
294 'alertmanager_web_password': password_hash(alertmanager_password),
295 }
296 return {
297 "files": {
298 "alertmanager.yml": yml,
299 'alertmanager.crt': cert,
300 'alertmanager.key': key,
301 'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context),
302 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
303 },
304 'peers': peers,
305 'web_config': '/etc/alertmanager/web.yml'
306 }, sorted(deps)
307 else:
308 return {
309 "files": {
310 "alertmanager.yml": yml
311 },
312 "peers": peers
313 }, sorted(deps)
314
315 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
316 # TODO: if there are multiple daemons, who is the active one?
317 if daemon_descrs:
318 return daemon_descrs[0]
319 # if empty list provided, return empty Daemon Desc
320 return DaemonDescription()
321
322 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
323 dd = self.get_active_daemon(daemon_descrs)
324 assert dd.hostname is not None
325 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
326 port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
327 protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
328 service_url = build_url(scheme=protocol, host=addr, port=port)
329 self._set_service_url_on_dashboard(
330 'AlertManager',
331 'dashboard get-alertmanager-api-host',
332 'dashboard set-alertmanager-api-host',
333 service_url
334 )
335
336 def ok_to_stop(self,
337 daemon_ids: List[str],
338 force: bool = False,
339 known: Optional[List[str]] = None) -> HandleCommandResult:
340 warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1)
341 if warn and not force:
342 return HandleCommandResult(-errno.EBUSY, '', warn_message)
343 return HandleCommandResult(0, warn_message, '')
344
345
346 class PrometheusService(CephadmService):
347 TYPE = 'prometheus'
348 DEFAULT_SERVICE_PORT = 9095
349 DEFAULT_MGR_PROMETHEUS_PORT = 9283
350 USER_CFG_KEY = 'prometheus/web_user'
351 PASS_CFG_KEY = 'prometheus/web_password'
352
353 def config(self, spec: ServiceSpec) -> None:
354 # make sure module is enabled
355 mgr_map = self.mgr.get('mgr_map')
356 if 'prometheus' not in mgr_map.get('services', {}):
357 self.mgr.check_mon_command({
358 'prefix': 'mgr module enable',
359 'module': 'prometheus'
360 })
361 # we shouldn't get here (mon will tell the mgr to respawn), but no
362 # harm done if we do.
363
364 def prepare_create(
365 self,
366 daemon_spec: CephadmDaemonDeploySpec,
367 ) -> CephadmDaemonDeploySpec:
368 assert self.TYPE == daemon_spec.daemon_type
369 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
370 return daemon_spec
371
372 def generate_config(
373 self,
374 daemon_spec: CephadmDaemonDeploySpec,
375 ) -> Tuple[Dict[str, Any], List[str]]:
376
377 assert self.TYPE == daemon_spec.daemon_type
378 spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
379
380 try:
381 retention_time = spec.retention_time if spec.retention_time else '15d'
382 except AttributeError:
383 retention_time = '15d'
384
385 try:
386 retention_size = spec.retention_size if spec.retention_size else '0'
387 except AttributeError:
388 # default to disabled
389 retention_size = '0'
390
391 # build service discovery end-point
392 port = self.mgr.service_discovery_port
393 mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip())
394 protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
395 srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?'
396
397 node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter'))
398 alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager'))
399 haproxy_cnt = len(self.mgr.cache.get_daemons_by_type('ingress'))
400 node_exporter_sd_url = f'{srv_end_point}service=node-exporter' if node_exporter_cnt > 0 else None
401 alertmanager_sd_url = f'{srv_end_point}service=alertmanager' if alertmgr_cnt > 0 else None
402 haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
403 mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included
404 ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included
405
406 alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
407 prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
408
409 # generate the prometheus configuration
410 context = {
411 'alertmanager_web_user': alertmanager_user,
412 'alertmanager_web_password': alertmanager_password,
413 'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
414 'service_discovery_username': self.mgr.http_server.service_discovery.username,
415 'service_discovery_password': self.mgr.http_server.service_discovery.password,
416 'mgr_prometheus_sd_url': mgr_prometheus_sd_url,
417 'node_exporter_sd_url': node_exporter_sd_url,
418 'alertmanager_sd_url': alertmanager_sd_url,
419 'haproxy_sd_url': haproxy_sd_url,
420 'ceph_exporter_sd_url': ceph_exporter_sd_url
421 }
422
423 web_context = {
424 'prometheus_web_user': prometheus_user,
425 'prometheus_web_password': password_hash(prometheus_password),
426 }
427
428 if self.mgr.secure_monitoring_stack:
429 cfg_key = 'mgr/prometheus/root/cert'
430 cmd = {'prefix': 'config-key get', 'key': cfg_key}
431 ret, mgr_prometheus_rootca, err = self.mgr.mon_command(cmd)
432 if ret != 0:
433 logger.error(f'mon command to get config-key {cfg_key} failed: {err}')
434 else:
435 node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
436 host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
437 cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(host_fqdn, node_ip)
438 r: Dict[str, Any] = {
439 'files': {
440 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context),
441 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(),
442 'mgr_prometheus_cert.pem': mgr_prometheus_rootca,
443 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context),
444 'prometheus.crt': cert,
445 'prometheus.key': key,
446 },
447 'retention_time': retention_time,
448 'retention_size': retention_size,
449 'web_config': '/etc/prometheus/web.yml'
450 }
451 else:
452 r = {
453 'files': {
454 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context)
455 },
456 'retention_time': retention_time,
457 'retention_size': retention_size
458 }
459
460 # include alerts, if present in the container
461 if os.path.exists(self.mgr.prometheus_alerts_path):
462 with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
463 alerts = f.read()
464 r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
465
466 # Include custom alerts if present in key value store. This enables the
467 # users to add custom alerts. Write the file in any case, so that if the
468 # content of the key value store changed, that file is overwritten
469 # (emptied in case they value has been removed from the key value
470 # store). This prevents the necessity to adapt `cephadm` binary to
471 # remove the file.
472 #
473 # Don't use the template engine for it as
474 #
475 # 1. the alerts are always static and
476 # 2. they are a template themselves for the Go template engine, which
477 # use curly braces and escaping that is cumbersome and unnecessary
478 # for the user.
479 #
480 r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
481 self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '')
482
483 return r, sorted(self.calculate_deps())
484
485 def calculate_deps(self) -> List[str]:
486 deps = [] # type: List[str]
487 port = cast(int, self.mgr.get_module_option_ex('prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
488 deps.append(str(port))
489 deps.append(str(self.mgr.service_discovery_port))
490 # add an explicit dependency on the active manager. This will force to
491 # re-deploy prometheus if the mgr has changed (due to a fail-over i.e).
492 deps.append(self.mgr.get_active_mgr().name())
493 if self.mgr.secure_monitoring_stack:
494 alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
495 prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
496 if prometheus_user and prometheus_password:
497 deps.append(f'{hash(prometheus_user + prometheus_password)}')
498 if alertmanager_user and alertmanager_password:
499 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
500 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
501 # add dependency on ceph-exporter daemons
502 deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')]
503 deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)]
504 if len(self.mgr.cache.get_daemons_by_type('ingress')) > 0:
505 deps.append('ingress')
506 return deps
507
508 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
509 # TODO: if there are multiple daemons, who is the active one?
510 if daemon_descrs:
511 return daemon_descrs[0]
512 # if empty list provided, return empty Daemon Desc
513 return DaemonDescription()
514
515 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
516 dd = self.get_active_daemon(daemon_descrs)
517 assert dd.hostname is not None
518 addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
519 port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
520 protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
521 service_url = build_url(scheme=protocol, host=addr, port=port)
522 self._set_service_url_on_dashboard(
523 'Prometheus',
524 'dashboard get-prometheus-api-host',
525 'dashboard set-prometheus-api-host',
526 service_url
527 )
528
529 def ok_to_stop(self,
530 daemon_ids: List[str],
531 force: bool = False,
532 known: Optional[List[str]] = None) -> HandleCommandResult:
533 warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1)
534 if warn and not force:
535 return HandleCommandResult(-errno.EBUSY, '', warn_message)
536 return HandleCommandResult(0, warn_message, '')
537
538
539 class NodeExporterService(CephadmService):
540 TYPE = 'node-exporter'
541 DEFAULT_SERVICE_PORT = 9100
542
543 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
544 assert self.TYPE == daemon_spec.daemon_type
545 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
546 return daemon_spec
547
548 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
549 assert self.TYPE == daemon_spec.daemon_type
550 deps = [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
551 if self.mgr.secure_monitoring_stack:
552 node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
553 host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
554 cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(
555 host_fqdn, node_ip)
556 r = {
557 'files': {
558 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}),
559 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(),
560 'node_exporter.crt': cert,
561 'node_exporter.key': key,
562 },
563 'web_config': '/etc/node-exporter/web.yml'
564 }
565 else:
566 r = {}
567
568 return r, deps
569
570 def ok_to_stop(self,
571 daemon_ids: List[str],
572 force: bool = False,
573 known: Optional[List[str]] = None) -> HandleCommandResult:
574 # since node exporter runs on each host and cannot compromise data, no extra checks required
575 names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
576 out = f'It is presumed safe to stop {names}'
577 return HandleCommandResult(0, out, '')
578
579
580 class LokiService(CephadmService):
581 TYPE = 'loki'
582 DEFAULT_SERVICE_PORT = 3100
583
584 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
585 assert self.TYPE == daemon_spec.daemon_type
586 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
587 return daemon_spec
588
589 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
590 assert self.TYPE == daemon_spec.daemon_type
591 deps: List[str] = []
592
593 yml = self.mgr.template.render('services/loki.yml.j2')
594 return {
595 "files": {
596 "loki.yml": yml
597 }
598 }, sorted(deps)
599
600
601 class PromtailService(CephadmService):
602 TYPE = 'promtail'
603 DEFAULT_SERVICE_PORT = 9080
604
605 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
606 assert self.TYPE == daemon_spec.daemon_type
607 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
608 return daemon_spec
609
610 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
611 assert self.TYPE == daemon_spec.daemon_type
612 deps: List[str] = []
613
614 daemons = self.mgr.cache.get_daemons_by_service('loki')
615 loki_host = ''
616 for i, dd in enumerate(daemons):
617 assert dd.hostname is not None
618 if i == 0:
619 loki_host = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
620
621 deps.append(dd.name())
622
623 context = {
624 'client_hostname': loki_host,
625 }
626
627 yml = self.mgr.template.render('services/promtail.yml.j2', context)
628 return {
629 "files": {
630 "promtail.yml": yml
631 }
632 }, sorted(deps)
633
634
635 class SNMPGatewayService(CephadmService):
636 TYPE = 'snmp-gateway'
637
638 def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
639 assert self.TYPE == daemon_spec.daemon_type
640 daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
641 return daemon_spec
642
643 def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
644 assert self.TYPE == daemon_spec.daemon_type
645 deps: List[str] = []
646
647 spec = cast(SNMPGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec)
648 config = {
649 "destination": spec.snmp_destination,
650 "snmp_version": spec.snmp_version,
651 }
652 if spec.snmp_version == 'V2c':
653 community = spec.credentials.get('snmp_community', None)
654 assert community is not None
655
656 config.update({
657 "snmp_community": community
658 })
659 else:
660 # SNMP v3 settings can be either authNoPriv or authPriv
661 auth_protocol = 'SHA' if not spec.auth_protocol else spec.auth_protocol
662
663 auth_username = spec.credentials.get('snmp_v3_auth_username', None)
664 auth_password = spec.credentials.get('snmp_v3_auth_password', None)
665 assert auth_username is not None
666 assert auth_password is not None
667 assert spec.engine_id is not None
668
669 config.update({
670 "snmp_v3_auth_protocol": auth_protocol,
671 "snmp_v3_auth_username": auth_username,
672 "snmp_v3_auth_password": auth_password,
673 "snmp_v3_engine_id": spec.engine_id,
674 })
675 # authPriv adds encryption
676 if spec.privacy_protocol:
677 priv_password = spec.credentials.get('snmp_v3_priv_password', None)
678 assert priv_password is not None
679
680 config.update({
681 "snmp_v3_priv_protocol": spec.privacy_protocol,
682 "snmp_v3_priv_password": priv_password,
683 })
684
685 logger.debug(
686 f"Generated configuration for '{self.TYPE}' service. Dependencies={deps}")
687
688 return config, sorted(deps)