]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
3 from typing
import List
, Any
, Tuple
, Dict
5 from orchestrator
import DaemonDescription
6 from ceph
.deployment
.service_spec
import AlertManagerSpec
7 from cephadm
.services
.cephadmservice
import CephadmService
, CephadmDaemonSpec
8 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
10 logger
= logging
.getLogger(__name__
)
13 class GrafanaService(CephadmService
):
15 DEFAULT_SERVICE_PORT
= 3000
17 def create(self
, daemon_spec
: CephadmDaemonSpec
) -> str:
18 assert self
.TYPE
== daemon_spec
.daemon_type
19 return self
.mgr
._create
_daemon
(daemon_spec
)
21 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
22 assert self
.TYPE
== daemon_spec
.daemon_type
23 deps
= [] # type: List[str]
25 prom_services
= [] # type: List[str]
26 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
27 prom_services
.append(dd
.hostname
)
28 deps
.append(dd
.name())
29 grafana_data_sources
= self
.mgr
.template
.render(
30 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
})
32 cert
= self
.mgr
.get_store('grafana_crt')
33 pkey
= self
.mgr
.get_store('grafana_key')
36 verify_tls(cert
, pkey
)
37 except ServerConfigException
as e
:
38 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
39 cert
, pkey
= None, None
40 if not (cert
and pkey
):
41 cert
, pkey
= create_self_signed_cert('Ceph', 'cephadm')
42 self
.mgr
.set_store('grafana_crt', cert
)
43 self
.mgr
.set_store('grafana_key', pkey
)
44 self
.mgr
.check_mon_command({
45 'prefix': 'dashboard set-grafana-api-ssl-verify',
49 grafana_ini
= self
.mgr
.template
.render(
50 'services/grafana/grafana.ini.j2', {'http_port': self
.DEFAULT_SERVICE_PORT
})
54 "grafana.ini": grafana_ini
,
55 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
56 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
57 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
60 return config_file
, sorted(deps
)
62 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
63 # Use the least-created one as the active daemon
64 return daemon_descrs
[-1]
66 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]):
68 dd
= self
.get_active_daemon(daemon_descrs
)
69 service_url
= 'https://{}:{}'.format(
70 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
71 self
._set
_service
_url
_on
_dashboard
(
73 'dashboard get-grafana-api-url',
74 'dashboard set-grafana-api-url',
79 class AlertmanagerService(CephadmService
):
81 DEFAULT_SERVICE_PORT
= 9093
83 def create(self
, daemon_spec
: CephadmDaemonSpec
[AlertManagerSpec
]) -> str:
84 assert self
.TYPE
== daemon_spec
.daemon_type
85 assert daemon_spec
.spec
86 return self
.mgr
._create
_daemon
(daemon_spec
)
88 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
[AlertManagerSpec
]) -> Tuple
[Dict
[str, Any
], List
[str]]:
89 assert self
.TYPE
== daemon_spec
.daemon_type
91 default_webhook_urls
: List
[str] = []
94 user_data
= daemon_spec
.spec
.user_data
95 if 'default_webhook_urls' in user_data
and isinstance(
96 user_data
['default_webhook_urls'], list):
97 default_webhook_urls
.extend(user_data
['default_webhook_urls'])
100 dashboard_urls
: List
[str] = []
101 mgr_map
= self
.mgr
.get('mgr_map')
103 proto
= None # http: or https:
104 url
= mgr_map
.get('services', {}).get('dashboard', None)
106 dashboard_urls
.append(url
)
107 proto
= url
.split('/')[0]
108 port
= url
.split('/')[2].split(':')[1]
109 # scan all mgrs to generate deps and to get standbys too.
110 # assume that they are all on the same port as the active mgr.
111 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
112 # we consider mgr a dep even if the dashboard is disabled
113 # in order to be consistent with _calc_daemon_deps().
114 deps
.append(dd
.name())
117 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
119 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
120 dashboard_urls
.append('%s//%s:%s/' % (proto
, addr
.split(':')[0],
124 'dashboard_urls': dashboard_urls
,
125 'default_webhook_urls': default_webhook_urls
127 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
131 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
132 deps
.append(dd
.name())
133 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
134 peers
.append(addr
.split(':')[0] + ':' + port
)
137 "alertmanager.yml": yml
142 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
143 # TODO: if there are multiple daemons, who is the active one?
144 return daemon_descrs
[0]
146 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]):
147 dd
= self
.get_active_daemon(daemon_descrs
)
148 service_url
= 'http://{}:{}'.format(self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
149 self
._set
_service
_url
_on
_dashboard
(
151 'dashboard get-alertmanager-api-host',
152 'dashboard set-alertmanager-api-host',
157 class PrometheusService(CephadmService
):
159 DEFAULT_SERVICE_PORT
= 9095
161 def create(self
, daemon_spec
: CephadmDaemonSpec
) -> str:
162 assert self
.TYPE
== daemon_spec
.daemon_type
163 return self
.mgr
._create
_daemon
(daemon_spec
)
165 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
166 assert self
.TYPE
== daemon_spec
.daemon_type
167 deps
= [] # type: List[str]
171 mgr_map
= self
.mgr
.get('mgr_map')
173 t
= mgr_map
.get('services', {}).get('prometheus', None)
176 mgr_scrape_list
.append(t
)
179 port
= t
.split(':')[1]
180 # scan all mgrs to generate deps and to get standbys too.
181 # assume that they are all on the same port as the active mgr.
182 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
183 # we consider the mgr a dep even if the prometheus module is
184 # disabled in order to be consistent with _calc_daemon_deps().
185 deps
.append(dd
.name())
188 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
190 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
191 mgr_scrape_list
.append(addr
.split(':')[0] + ':' + port
)
193 # scrape node exporters
195 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
196 deps
.append(dd
.name())
197 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
199 'hostname': dd
.hostname
,
200 'url': addr
.split(':')[0] + ':9100'
203 # scrape alert managers
204 alertmgr_targets
= []
205 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
206 deps
.append(dd
.name())
207 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
208 alertmgr_targets
.append("'{}:9093'".format(addr
.split(':')[0]))
210 # generate the prometheus configuration
212 'alertmgr_targets': alertmgr_targets
,
213 'mgr_scrape_list': mgr_scrape_list
,
219 self
.mgr
.template
.render(
220 'services/prometheus/prometheus.yml.j2', context
)
224 # include alerts, if present in the container
225 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
226 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
228 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
230 return r
, sorted(deps
)
232 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
233 # TODO: if there are multiple daemons, who is the active one?
234 return daemon_descrs
[0]
236 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]):
237 dd
= self
.get_active_daemon(daemon_descrs
)
238 service_url
= 'http://{}:{}'.format(
239 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
240 self
._set
_service
_url
_on
_dashboard
(
242 'dashboard get-prometheus-api-host',
243 'dashboard set-prometheus-api-host',
248 class NodeExporterService(CephadmService
):
249 TYPE
= 'node-exporter'
251 def create(self
, daemon_spec
: CephadmDaemonSpec
) -> str:
252 assert self
.TYPE
== daemon_spec
.daemon_type
253 return self
.mgr
._create
_daemon
(daemon_spec
)
255 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
256 assert self
.TYPE
== daemon_spec
.daemon_type