]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
3 from typing
import List
, Any
, Tuple
, Dict
5 from orchestrator
import DaemonDescription
6 from ceph
.deployment
.service_spec
import AlertManagerSpec
7 from cephadm
.services
.cephadmservice
import CephadmService
, CephadmDaemonSpec
8 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
10 logger
= logging
.getLogger(__name__
)
13 class GrafanaService(CephadmService
):
15 DEFAULT_SERVICE_PORT
= 3000
17 def prepare_create(self
, daemon_spec
: CephadmDaemonSpec
) -> CephadmDaemonSpec
:
18 assert self
.TYPE
== daemon_spec
.daemon_type
21 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
22 assert self
.TYPE
== daemon_spec
.daemon_type
23 deps
= [] # type: List[str]
25 prom_services
= [] # type: List[str]
26 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
27 prom_services
.append(dd
.hostname
)
28 deps
.append(dd
.name())
29 grafana_data_sources
= self
.mgr
.template
.render(
30 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
})
32 cert
= self
.mgr
.get_store('grafana_crt')
33 pkey
= self
.mgr
.get_store('grafana_key')
36 verify_tls(cert
, pkey
)
37 except ServerConfigException
as e
:
38 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
39 cert
, pkey
= None, None
40 if not (cert
and pkey
):
41 cert
, pkey
= create_self_signed_cert('Ceph', 'cephadm')
42 self
.mgr
.set_store('grafana_crt', cert
)
43 self
.mgr
.set_store('grafana_key', pkey
)
44 self
.mgr
.check_mon_command({
45 'prefix': 'dashboard set-grafana-api-ssl-verify',
49 grafana_ini
= self
.mgr
.template
.render(
50 'services/grafana/grafana.ini.j2', {'http_port': self
.DEFAULT_SERVICE_PORT
})
54 "grafana.ini": grafana_ini
,
55 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
56 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
57 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
60 return config_file
, sorted(deps
)
62 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
63 # Use the least-created one as the active daemon
65 return daemon_descrs
[-1]
66 # if empty list provided, return empty Daemon Desc
67 return DaemonDescription()
69 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
71 dd
= self
.get_active_daemon(daemon_descrs
)
72 service_url
= 'https://{}:{}'.format(
73 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
74 self
._set
_service
_url
_on
_dashboard
(
76 'dashboard get-grafana-api-url',
77 'dashboard set-grafana-api-url',
82 class AlertmanagerService(CephadmService
):
84 DEFAULT_SERVICE_PORT
= 9093
86 def prepare_create(self
, daemon_spec
: CephadmDaemonSpec
[AlertManagerSpec
]) -> CephadmDaemonSpec
:
87 assert self
.TYPE
== daemon_spec
.daemon_type
88 assert daemon_spec
.spec
91 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
[AlertManagerSpec
]) -> Tuple
[Dict
[str, Any
], List
[str]]:
92 assert self
.TYPE
== daemon_spec
.daemon_type
94 default_webhook_urls
: List
[str] = []
97 user_data
= daemon_spec
.spec
.user_data
98 if 'default_webhook_urls' in user_data
and isinstance(
99 user_data
['default_webhook_urls'], list):
100 default_webhook_urls
.extend(user_data
['default_webhook_urls'])
103 dashboard_urls
: List
[str] = []
104 mgr_map
= self
.mgr
.get('mgr_map')
106 proto
= None # http: or https:
107 url
= mgr_map
.get('services', {}).get('dashboard', None)
109 dashboard_urls
.append(url
)
110 proto
= url
.split('/')[0]
111 port
= url
.split('/')[2].split(':')[1]
112 # scan all mgrs to generate deps and to get standbys too.
113 # assume that they are all on the same port as the active mgr.
114 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
115 # we consider mgr a dep even if the dashboard is disabled
116 # in order to be consistent with _calc_daemon_deps().
117 deps
.append(dd
.name())
120 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
122 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
123 dashboard_urls
.append('%s//%s:%s/' % (proto
, addr
.split(':')[0],
127 'dashboard_urls': dashboard_urls
,
128 'default_webhook_urls': default_webhook_urls
130 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
134 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
135 deps
.append(dd
.name())
136 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
137 peers
.append(addr
.split(':')[0] + ':' + port
)
140 "alertmanager.yml": yml
145 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
146 # TODO: if there are multiple daemons, who is the active one?
148 return daemon_descrs
[0]
149 # if empty list provided, return empty Daemon Desc
150 return DaemonDescription()
152 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
153 dd
= self
.get_active_daemon(daemon_descrs
)
154 service_url
= 'http://{}:{}'.format(self
._inventory
_get
_addr
(dd
.hostname
),
155 self
.DEFAULT_SERVICE_PORT
)
156 self
._set
_service
_url
_on
_dashboard
(
158 'dashboard get-alertmanager-api-host',
159 'dashboard set-alertmanager-api-host',
164 class PrometheusService(CephadmService
):
166 DEFAULT_SERVICE_PORT
= 9095
168 def prepare_create(self
, daemon_spec
: CephadmDaemonSpec
) -> CephadmDaemonSpec
:
169 assert self
.TYPE
== daemon_spec
.daemon_type
172 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
173 assert self
.TYPE
== daemon_spec
.daemon_type
174 deps
= [] # type: List[str]
178 mgr_map
= self
.mgr
.get('mgr_map')
180 t
= mgr_map
.get('services', {}).get('prometheus', None)
183 mgr_scrape_list
.append(t
)
186 port
= t
.split(':')[1]
187 # scan all mgrs to generate deps and to get standbys too.
188 # assume that they are all on the same port as the active mgr.
189 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
190 # we consider the mgr a dep even if the prometheus module is
191 # disabled in order to be consistent with _calc_daemon_deps().
192 deps
.append(dd
.name())
195 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
197 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
198 mgr_scrape_list
.append(addr
.split(':')[0] + ':' + port
)
200 # scrape node exporters
202 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
203 deps
.append(dd
.name())
204 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
206 'hostname': dd
.hostname
,
207 'url': addr
.split(':')[0] + ':9100'
210 # scrape alert managers
211 alertmgr_targets
= []
212 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
213 deps
.append(dd
.name())
214 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
215 alertmgr_targets
.append("'{}:9093'".format(addr
.split(':')[0]))
217 # generate the prometheus configuration
219 'alertmgr_targets': alertmgr_targets
,
220 'mgr_scrape_list': mgr_scrape_list
,
226 self
.mgr
.template
.render(
227 'services/prometheus/prometheus.yml.j2', context
)
231 # include alerts, if present in the container
232 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
233 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
235 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
237 return r
, sorted(deps
)
239 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
240 # TODO: if there are multiple daemons, who is the active one?
242 return daemon_descrs
[0]
243 # if empty list provided, return empty Daemon Desc
244 return DaemonDescription()
246 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
247 dd
= self
.get_active_daemon(daemon_descrs
)
248 service_url
= 'http://{}:{}'.format(
249 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
250 self
._set
_service
_url
_on
_dashboard
(
252 'dashboard get-prometheus-api-host',
253 'dashboard set-prometheus-api-host',
258 class NodeExporterService(CephadmService
):
259 TYPE
= 'node-exporter'
261 def prepare_create(self
, daemon_spec
: CephadmDaemonSpec
) -> CephadmDaemonSpec
:
262 assert self
.TYPE
== daemon_spec
.daemon_type
265 def generate_config(self
, daemon_spec
: CephadmDaemonSpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
266 assert self
.TYPE
== daemon_spec
.daemon_type