]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
4 from typing
import List
, Any
, Tuple
, Dict
, Optional
, cast
6 from mgr_module
import HandleCommandResult
8 from orchestrator
import DaemonDescription
9 from ceph
.deployment
.service_spec
import AlertManagerSpec
10 from cephadm
.services
.cephadmservice
import CephadmService
, CephadmDaemonDeploySpec
11 from cephadm
.services
.ingress
import IngressSpec
12 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
14 logger
= logging
.getLogger(__name__
)
17 class GrafanaService(CephadmService
):
19 DEFAULT_SERVICE_PORT
= 3000
21 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
22 assert self
.TYPE
== daemon_spec
.daemon_type
23 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
26 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
27 assert self
.TYPE
== daemon_spec
.daemon_type
28 deps
= [] # type: List[str]
30 prom_services
= [] # type: List[str]
31 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
32 assert dd
.hostname
is not None
33 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
34 port
= dd
.ports
[0] if dd
.ports
else 9095
35 prom_services
.append(addr
+ ':' + str(port
))
36 deps
.append(dd
.name())
37 grafana_data_sources
= self
.mgr
.template
.render(
38 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
})
40 cert
= self
.mgr
.get_store('grafana_crt')
41 pkey
= self
.mgr
.get_store('grafana_key')
44 verify_tls(cert
, pkey
)
45 except ServerConfigException
as e
:
46 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
47 cert
, pkey
= None, None
48 if not (cert
and pkey
):
49 cert
, pkey
= create_self_signed_cert('Ceph', 'cephadm')
50 self
.mgr
.set_store('grafana_crt', cert
)
51 self
.mgr
.set_store('grafana_key', pkey
)
52 self
.mgr
.check_mon_command({
53 'prefix': 'dashboard set-grafana-api-ssl-verify',
57 grafana_ini
= self
.mgr
.template
.render(
58 'services/grafana/grafana.ini.j2', {
59 'http_port': daemon_spec
.ports
[0] if daemon_spec
.ports
else self
.DEFAULT_SERVICE_PORT
,
60 'http_addr': daemon_spec
.ip
if daemon_spec
.ip
else ''
65 "grafana.ini": grafana_ini
,
66 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
67 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
68 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
71 return config_file
, sorted(deps
)
73 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
74 # Use the least-created one as the active daemon
76 return daemon_descrs
[-1]
77 # if empty list provided, return empty Daemon Desc
78 return DaemonDescription()
80 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
82 dd
= self
.get_active_daemon(daemon_descrs
)
83 assert dd
.hostname
is not None
84 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
85 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
86 service_url
= 'https://{}:{}'.format(addr
, port
)
87 self
._set
_service
_url
_on
_dashboard
(
89 'dashboard get-grafana-api-url',
90 'dashboard set-grafana-api-url',
95 daemon_ids
: List
[str],
97 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
98 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Grafana', 1)
99 if warn
and not force
:
100 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
101 return HandleCommandResult(0, warn_message
, '')
104 class AlertmanagerService(CephadmService
):
105 TYPE
= 'alertmanager'
106 DEFAULT_SERVICE_PORT
= 9093
108 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
109 assert self
.TYPE
== daemon_spec
.daemon_type
110 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
113 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
114 assert self
.TYPE
== daemon_spec
.daemon_type
116 default_webhook_urls
: List
[str] = []
118 spec
= cast(AlertManagerSpec
, self
.mgr
.spec_store
[daemon_spec
.service_name
].spec
)
119 user_data
= spec
.user_data
120 if 'default_webhook_urls' in user_data
and isinstance(
121 user_data
['default_webhook_urls'], list):
122 default_webhook_urls
.extend(user_data
['default_webhook_urls'])
125 dashboard_urls
: List
[str] = []
126 mgr_map
= self
.mgr
.get('mgr_map')
128 proto
= None # http: or https:
129 url
= mgr_map
.get('services', {}).get('dashboard', None)
131 dashboard_urls
.append(url
)
132 proto
= url
.split('/')[0]
133 port
= url
.split('/')[2].split(':')[1]
134 # scan all mgrs to generate deps and to get standbys too.
135 # assume that they are all on the same port as the active mgr.
136 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
137 # we consider mgr a dep even if the dashboard is disabled
138 # in order to be consistent with _calc_daemon_deps().
139 deps
.append(dd
.name())
142 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
144 assert dd
.hostname
is not None
145 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
146 dashboard_urls
.append('%s//%s:%s/' % (proto
, addr
.split(':')[0],
150 'dashboard_urls': dashboard_urls
,
151 'default_webhook_urls': default_webhook_urls
153 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
157 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
158 assert dd
.hostname
is not None
159 deps
.append(dd
.name())
160 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
161 peers
.append(addr
.split(':')[0] + ':' + port
)
164 "alertmanager.yml": yml
169 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
170 # TODO: if there are multiple daemons, who is the active one?
172 return daemon_descrs
[0]
173 # if empty list provided, return empty Daemon Desc
174 return DaemonDescription()
176 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
177 dd
= self
.get_active_daemon(daemon_descrs
)
178 assert dd
.hostname
is not None
179 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
180 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
181 service_url
= 'http://{}:{}'.format(addr
, port
)
182 self
._set
_service
_url
_on
_dashboard
(
184 'dashboard get-alertmanager-api-host',
185 'dashboard set-alertmanager-api-host',
190 daemon_ids
: List
[str],
192 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
193 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Alertmanager', 1)
194 if warn
and not force
:
195 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
196 return HandleCommandResult(0, warn_message
, '')
199 class PrometheusService(CephadmService
):
201 DEFAULT_SERVICE_PORT
= 9095
203 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
204 assert self
.TYPE
== daemon_spec
.daemon_type
205 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
208 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
209 assert self
.TYPE
== daemon_spec
.daemon_type
210 deps
= [] # type: List[str]
214 mgr_map
= self
.mgr
.get('mgr_map')
216 t
= mgr_map
.get('services', {}).get('prometheus', None)
219 mgr_scrape_list
.append(t
)
222 port
= t
.split(':')[1]
223 # scan all mgrs to generate deps and to get standbys too.
224 # assume that they are all on the same port as the active mgr.
225 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
226 # we consider the mgr a dep even if the prometheus module is
227 # disabled in order to be consistent with _calc_daemon_deps().
228 deps
.append(dd
.name())
231 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
233 assert dd
.hostname
is not None
234 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
235 mgr_scrape_list
.append(addr
.split(':')[0] + ':' + port
)
237 # scrape node exporters
239 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
240 assert dd
.hostname
is not None
241 deps
.append(dd
.name())
242 addr
= dd
.ip
if dd
.ip
else self
.mgr
.inventory
.get_addr(dd
.hostname
)
243 port
= str(dd
.ports
[0]) if dd
.ports
else '9100'
245 'hostname': dd
.hostname
,
246 'url': addr
.split(':')[0] + ':' + port
249 # scrape alert managers
250 alertmgr_targets
= []
251 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
252 assert dd
.hostname
is not None
253 deps
.append(dd
.name())
254 addr
= dd
.ip
if dd
.ip
else self
.mgr
.inventory
.get_addr(dd
.hostname
)
255 port
= str(dd
.ports
[0]) if dd
.ports
else '9093'
256 alertmgr_targets
.append("'{}:{}'".format(addr
.split(':')[0], port
))
260 for dd
in self
.mgr
.cache
.get_daemons_by_type('ingress'):
261 if dd
.service_name() in self
.mgr
.spec_store
:
262 spec
= cast(IngressSpec
, self
.mgr
.spec_store
[dd
.service_name()].spec
)
263 assert dd
.hostname
is not None
264 deps
.append(dd
.name())
265 if dd
.daemon_type
== 'haproxy':
266 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
267 haproxy_targets
.append({
268 "url": f
"'{addr.split(':')[0]}:{spec.monitor_port}'",
269 "service": dd
.service_name(),
272 # generate the prometheus configuration
274 'alertmgr_targets': alertmgr_targets
,
275 'mgr_scrape_list': mgr_scrape_list
,
276 'haproxy_targets': haproxy_targets
,
282 self
.mgr
.template
.render(
283 'services/prometheus/prometheus.yml.j2', context
)
287 # include alerts, if present in the container
288 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
289 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
291 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
293 return r
, sorted(deps
)
295 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
296 # TODO: if there are multiple daemons, who is the active one?
298 return daemon_descrs
[0]
299 # if empty list provided, return empty Daemon Desc
300 return DaemonDescription()
302 def config_dashboard(self
, daemon_descrs
: List
[DaemonDescription
]) -> None:
303 dd
= self
.get_active_daemon(daemon_descrs
)
304 assert dd
.hostname
is not None
305 addr
= dd
.ip
if dd
.ip
else self
._inventory
_get
_addr
(dd
.hostname
)
306 port
= dd
.ports
[0] if dd
.ports
else self
.DEFAULT_SERVICE_PORT
307 service_url
= 'http://{}:{}'.format(addr
, port
)
308 self
._set
_service
_url
_on
_dashboard
(
310 'dashboard get-prometheus-api-host',
311 'dashboard set-prometheus-api-host',
316 daemon_ids
: List
[str],
318 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
319 warn
, warn_message
= self
._enough
_daemons
_to
_stop
(self
.TYPE
, daemon_ids
, 'Prometheus', 1)
320 if warn
and not force
:
321 return HandleCommandResult(-errno
.EBUSY
, '', warn_message
)
322 return HandleCommandResult(0, warn_message
, '')
325 class NodeExporterService(CephadmService
):
326 TYPE
= 'node-exporter'
328 def prepare_create(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> CephadmDaemonDeploySpec
:
329 assert self
.TYPE
== daemon_spec
.daemon_type
330 daemon_spec
.final_config
, daemon_spec
.deps
= self
.generate_config(daemon_spec
)
333 def generate_config(self
, daemon_spec
: CephadmDaemonDeploySpec
) -> Tuple
[Dict
[str, Any
], List
[str]]:
334 assert self
.TYPE
== daemon_spec
.daemon_type
338 daemon_ids
: List
[str],
340 known
: Optional
[List
[str]] = None) -> HandleCommandResult
:
341 # since node exporter runs on each host and cannot compromise data, no extra checks required
342 names
= [f
'{self.TYPE}.{d_id}' for d_id
in daemon_ids
]
343 out
= f
'It is presumed safe to stop {names}'
344 return HandleCommandResult(0, out
, '')