]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
3 from typing
import List
, Any
, Tuple
, Dict
5 from orchestrator
import DaemonDescription
6 from cephadm
.services
.cephadmservice
import CephadmService
7 from mgr_util
import verify_tls
, ServerConfigException
, create_self_signed_cert
9 logger
= logging
.getLogger(__name__
)
11 class GrafanaService(CephadmService
):
12 DEFAULT_SERVICE_PORT
= 3000
14 def create(self
, daemon_id
, host
):
15 # type: (str, str) -> str
16 return self
.mgr
._create
_daemon
('grafana', daemon_id
, host
)
18 def generate_config(self
):
19 # type: () -> Tuple[Dict[str, Any], List[str]]
20 deps
= [] # type: List[str]
22 prom_services
= [] # type: List[str]
23 for dd
in self
.mgr
.cache
.get_daemons_by_service('prometheus'):
24 prom_services
.append(dd
.hostname
)
25 deps
.append(dd
.name())
26 grafana_data_sources
= self
.mgr
.template
.render(
27 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services
})
29 cert
= self
.mgr
.get_store('grafana_crt')
30 pkey
= self
.mgr
.get_store('grafana_key')
33 verify_tls(cert
, pkey
)
34 except ServerConfigException
as e
:
35 logger
.warning('Provided grafana TLS certificates invalid: %s', str(e
))
36 cert
, pkey
= None, None
37 if not (cert
and pkey
):
38 cert
, pkey
= create_self_signed_cert('Ceph', 'cephadm')
39 self
.mgr
.set_store('grafana_crt', cert
)
40 self
.mgr
.set_store('grafana_key', pkey
)
41 self
.mgr
.check_mon_command({
42 'prefix': 'dashboard set-grafana-api-ssl-verify',
46 grafana_ini
= self
.mgr
.template
.render(
47 'services/grafana/grafana.ini.j2', {'http_port': self
.DEFAULT_SERVICE_PORT
})
51 "grafana.ini": grafana_ini
,
52 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources
,
53 'certs/cert_file': '# generated by cephadm\n%s' % cert
,
54 'certs/cert_key': '# generated by cephadm\n%s' % pkey
,
57 return config_file
, sorted(deps
)
59 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
60 # Use the least-created one as the active daemon
61 return daemon_descrs
[-1]
63 def daemon_check_post(self
, daemon_descrs
: List
[DaemonDescription
]):
65 dd
= self
.get_active_daemon(daemon_descrs
)
66 service_url
= 'https://{}:{}'.format(
67 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
68 self
._set
_service
_url
_on
_dashboard
(
70 'dashboard get-grafana-api-url',
71 'dashboard set-grafana-api-url',
75 class AlertmanagerService(CephadmService
):
76 DEFAULT_SERVICE_PORT
= 9093
78 def create(self
, daemon_id
, host
) -> str:
79 return self
.mgr
._create
_daemon
('alertmanager', daemon_id
, host
)
81 def generate_config(self
):
82 # type: () -> Tuple[Dict[str, Any], List[str]]
83 deps
= [] # type: List[str]
87 mgr_map
= self
.mgr
.get('mgr_map')
89 proto
= None # http: or https:
90 url
= mgr_map
.get('services', {}).get('dashboard', None)
92 dashboard_urls
.append(url
)
93 proto
= url
.split('/')[0]
94 port
= url
.split('/')[2].split(':')[1]
95 # scan all mgrs to generate deps and to get standbys too.
96 # assume that they are all on the same port as the active mgr.
97 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
98 # we consider mgr a dep even if the dashboard is disabled
99 # in order to be consistent with _calc_daemon_deps().
100 deps
.append(dd
.name())
103 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
105 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
106 dashboard_urls
.append('%s//%s:%s/' % (proto
, addr
.split(':')[0],
110 'dashboard_urls': dashboard_urls
112 yml
= self
.mgr
.template
.render('services/alertmanager/alertmanager.yml.j2', context
)
116 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
117 deps
.append(dd
.name())
118 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
119 peers
.append(addr
.split(':')[0] + ':' + port
)
122 "alertmanager.yml": yml
127 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
128 # TODO: if there are multiple daemons, who is the active one?
129 return daemon_descrs
[0]
131 def daemon_check_post(self
, daemon_descrs
: List
[DaemonDescription
]):
132 dd
= self
.get_active_daemon(daemon_descrs
)
133 service_url
= 'http://{}:{}'.format(self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
134 self
._set
_service
_url
_on
_dashboard
(
136 'dashboard get-alertmanager-api-host',
137 'dashboard set-alertmanager-api-host',
142 class PrometheusService(CephadmService
):
143 DEFAULT_SERVICE_PORT
= 9095
145 def create(self
, daemon_id
, host
) -> str:
146 return self
.mgr
._create
_daemon
('prometheus', daemon_id
, host
)
148 def generate_config(self
):
149 # type: () -> Tuple[Dict[str, Any], List[str]]
150 deps
= [] # type: List[str]
154 mgr_map
= self
.mgr
.get('mgr_map')
156 t
= mgr_map
.get('services', {}).get('prometheus', None)
159 mgr_scrape_list
.append(t
)
162 port
= t
.split(':')[1]
163 # scan all mgrs to generate deps and to get standbys too.
164 # assume that they are all on the same port as the active mgr.
165 for dd
in self
.mgr
.cache
.get_daemons_by_service('mgr'):
166 # we consider the mgr a dep even if the prometheus module is
167 # disabled in order to be consistent with _calc_daemon_deps().
168 deps
.append(dd
.name())
171 if dd
.daemon_id
== self
.mgr
.get_mgr_id():
173 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
174 mgr_scrape_list
.append(addr
.split(':')[0] + ':' + port
)
176 # scrape node exporters
178 for dd
in self
.mgr
.cache
.get_daemons_by_service('node-exporter'):
179 deps
.append(dd
.name())
180 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
182 'hostname': dd
.hostname
,
183 'url': addr
.split(':')[0] + ':9100'
186 # scrape alert managers
187 alertmgr_targets
= []
188 for dd
in self
.mgr
.cache
.get_daemons_by_service('alertmanager'):
189 deps
.append(dd
.name())
190 addr
= self
.mgr
.inventory
.get_addr(dd
.hostname
)
191 alertmgr_targets
.append("'{}:9093'".format(addr
.split(':')[0]))
193 # generate the prometheus configuration
195 'alertmgr_targets': alertmgr_targets
,
196 'mgr_scrape_list': mgr_scrape_list
,
202 self
.mgr
.template
.render(
203 'services/prometheus/prometheus.yml.j2', context
)
207 # include alerts, if present in the container
208 if os
.path
.exists(self
.mgr
.prometheus_alerts_path
):
209 with
open(self
.mgr
.prometheus_alerts_path
, 'r', encoding
='utf-8') as f
:
211 r
['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
213 return r
, sorted(deps
)
215 def get_active_daemon(self
, daemon_descrs
: List
[DaemonDescription
]) -> DaemonDescription
:
216 # TODO: if there are multiple daemons, who is the active one?
217 return daemon_descrs
[0]
219 def daemon_check_post(self
, daemon_descrs
: List
[DaemonDescription
]):
220 dd
= self
.get_active_daemon(daemon_descrs
)
221 service_url
= 'http://{}:{}'.format(
222 self
._inventory
_get
_addr
(dd
.hostname
), self
.DEFAULT_SERVICE_PORT
)
223 self
._set
_service
_url
_on
_dashboard
(
225 'dashboard get-prometheus-api-host',
226 'dashboard set-prometheus-api-host',
230 class NodeExporterService(CephadmService
):
231 def create(self
, daemon_id
, host
) -> str:
232 return self
.mgr
._create
_daemon
('node-exporter', daemon_id
, host
)
234 def generate_config(self
) -> Tuple
[Dict
[str, Any
], List
[str]]: