]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/cephadm/services/monitoring.py
bump version to 15.2.6-pve1
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / monitoring.py
CommitLineData
e306af50
TL
1import logging
2import os
3from typing import List, Any, Tuple, Dict
4
5from orchestrator import DaemonDescription
f6b5b4d7
TL
6from ceph.deployment.service_spec import AlertManagerSpec
7from cephadm.services.cephadmservice import CephadmService, CephadmDaemonSpec
e306af50
TL
8from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert
9
10logger = logging.getLogger(__name__)
11
f6b5b4d7 12
e306af50 13class GrafanaService(CephadmService):
f6b5b4d7 14 TYPE = 'grafana'
e306af50
TL
15 DEFAULT_SERVICE_PORT = 3000
16
f6b5b4d7
TL
17 def create(self, daemon_spec: CephadmDaemonSpec) -> str:
18 assert self.TYPE == daemon_spec.daemon_type
19 return self.mgr._create_daemon(daemon_spec)
e306af50 20
f6b5b4d7
TL
21 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
22 assert self.TYPE == daemon_spec.daemon_type
e306af50
TL
23 deps = [] # type: List[str]
24
25 prom_services = [] # type: List[str]
26 for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
27 prom_services.append(dd.hostname)
28 deps.append(dd.name())
29 grafana_data_sources = self.mgr.template.render(
30 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
31
32 cert = self.mgr.get_store('grafana_crt')
33 pkey = self.mgr.get_store('grafana_key')
34 if cert and pkey:
35 try:
36 verify_tls(cert, pkey)
37 except ServerConfigException as e:
38 logger.warning('Provided grafana TLS certificates invalid: %s', str(e))
39 cert, pkey = None, None
40 if not (cert and pkey):
41 cert, pkey = create_self_signed_cert('Ceph', 'cephadm')
42 self.mgr.set_store('grafana_crt', cert)
43 self.mgr.set_store('grafana_key', pkey)
44 self.mgr.check_mon_command({
45 'prefix': 'dashboard set-grafana-api-ssl-verify',
46 'value': 'false',
47 })
48
49 grafana_ini = self.mgr.template.render(
50 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
51
52 config_file = {
53 'files': {
54 "grafana.ini": grafana_ini,
55 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
56 'certs/cert_file': '# generated by cephadm\n%s' % cert,
57 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
58 }
59 }
60 return config_file, sorted(deps)
61
62 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
63 # Use the least-created one as the active daemon
64 return daemon_descrs[-1]
65
f6b5b4d7 66 def config_dashboard(self, daemon_descrs: List[DaemonDescription]):
e306af50
TL
67 # TODO: signed cert
68 dd = self.get_active_daemon(daemon_descrs)
69 service_url = 'https://{}:{}'.format(
70 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
71 self._set_service_url_on_dashboard(
72 'Grafana',
73 'dashboard get-grafana-api-url',
74 'dashboard set-grafana-api-url',
75 service_url
76 )
77
f6b5b4d7 78
e306af50 79class AlertmanagerService(CephadmService):
f6b5b4d7 80 TYPE = 'alertmanager'
e306af50
TL
81 DEFAULT_SERVICE_PORT = 9093
82
f6b5b4d7
TL
83 def create(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> str:
84 assert self.TYPE == daemon_spec.daemon_type
85 assert daemon_spec.spec
86 return self.mgr._create_daemon(daemon_spec)
87
88 def generate_config(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> Tuple[Dict[str, Any], List[str]]:
89 assert self.TYPE == daemon_spec.daemon_type
90 deps: List[str] = []
91 default_webhook_urls: List[str] = []
e306af50 92
f6b5b4d7
TL
93 if daemon_spec.spec:
94 user_data = daemon_spec.spec.user_data
95 if 'default_webhook_urls' in user_data and isinstance(
96 user_data['default_webhook_urls'], list):
97 default_webhook_urls.extend(user_data['default_webhook_urls'])
e306af50
TL
98
99 # dashboard(s)
f6b5b4d7 100 dashboard_urls: List[str] = []
e306af50
TL
101 mgr_map = self.mgr.get('mgr_map')
102 port = None
103 proto = None # http: or https:
104 url = mgr_map.get('services', {}).get('dashboard', None)
105 if url:
106 dashboard_urls.append(url)
107 proto = url.split('/')[0]
108 port = url.split('/')[2].split(':')[1]
109 # scan all mgrs to generate deps and to get standbys too.
110 # assume that they are all on the same port as the active mgr.
111 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
112 # we consider mgr a dep even if the dashboard is disabled
113 # in order to be consistent with _calc_daemon_deps().
114 deps.append(dd.name())
115 if not port:
116 continue
117 if dd.daemon_id == self.mgr.get_mgr_id():
118 continue
119 addr = self.mgr.inventory.get_addr(dd.hostname)
120 dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0],
121 port))
122
123 context = {
f6b5b4d7
TL
124 'dashboard_urls': dashboard_urls,
125 'default_webhook_urls': default_webhook_urls
e306af50
TL
126 }
127 yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
128
129 peers = []
130 port = '9094'
131 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
132 deps.append(dd.name())
133 addr = self.mgr.inventory.get_addr(dd.hostname)
134 peers.append(addr.split(':')[0] + ':' + port)
135 return {
136 "files": {
137 "alertmanager.yml": yml
138 },
139 "peers": peers
140 }, sorted(deps)
141
142 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
143 # TODO: if there are multiple daemons, who is the active one?
144 return daemon_descrs[0]
145
f6b5b4d7 146 def config_dashboard(self, daemon_descrs: List[DaemonDescription]):
e306af50
TL
147 dd = self.get_active_daemon(daemon_descrs)
148 service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
149 self._set_service_url_on_dashboard(
150 'AlertManager',
151 'dashboard get-alertmanager-api-host',
152 'dashboard set-alertmanager-api-host',
153 service_url
154 )
155
156
157class PrometheusService(CephadmService):
f6b5b4d7 158 TYPE = 'prometheus'
e306af50
TL
159 DEFAULT_SERVICE_PORT = 9095
160
f6b5b4d7
TL
161 def create(self, daemon_spec: CephadmDaemonSpec) -> str:
162 assert self.TYPE == daemon_spec.daemon_type
163 return self.mgr._create_daemon(daemon_spec)
e306af50 164
f6b5b4d7
TL
165 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
166 assert self.TYPE == daemon_spec.daemon_type
e306af50
TL
167 deps = [] # type: List[str]
168
169 # scrape mgrs
170 mgr_scrape_list = []
171 mgr_map = self.mgr.get('mgr_map')
172 port = None
173 t = mgr_map.get('services', {}).get('prometheus', None)
174 if t:
175 t = t.split('/')[2]
176 mgr_scrape_list.append(t)
177 port = '9283'
178 if ':' in t:
179 port = t.split(':')[1]
180 # scan all mgrs to generate deps and to get standbys too.
181 # assume that they are all on the same port as the active mgr.
182 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
183 # we consider the mgr a dep even if the prometheus module is
184 # disabled in order to be consistent with _calc_daemon_deps().
185 deps.append(dd.name())
186 if not port:
187 continue
188 if dd.daemon_id == self.mgr.get_mgr_id():
189 continue
190 addr = self.mgr.inventory.get_addr(dd.hostname)
191 mgr_scrape_list.append(addr.split(':')[0] + ':' + port)
192
193 # scrape node exporters
194 nodes = []
195 for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
196 deps.append(dd.name())
197 addr = self.mgr.inventory.get_addr(dd.hostname)
198 nodes.append({
199 'hostname': dd.hostname,
200 'url': addr.split(':')[0] + ':9100'
201 })
202
203 # scrape alert managers
204 alertmgr_targets = []
205 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
206 deps.append(dd.name())
207 addr = self.mgr.inventory.get_addr(dd.hostname)
208 alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
209
210 # generate the prometheus configuration
211 context = {
212 'alertmgr_targets': alertmgr_targets,
213 'mgr_scrape_list': mgr_scrape_list,
214 'nodes': nodes,
215 }
216 r = {
217 'files': {
218 'prometheus.yml':
219 self.mgr.template.render(
220 'services/prometheus/prometheus.yml.j2', context)
221 }
222 }
223
224 # include alerts, if present in the container
225 if os.path.exists(self.mgr.prometheus_alerts_path):
226 with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
227 alerts = f.read()
228 r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
229
230 return r, sorted(deps)
231
232 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
233 # TODO: if there are multiple daemons, who is the active one?
234 return daemon_descrs[0]
235
f6b5b4d7 236 def config_dashboard(self, daemon_descrs: List[DaemonDescription]):
e306af50
TL
237 dd = self.get_active_daemon(daemon_descrs)
238 service_url = 'http://{}:{}'.format(
239 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
240 self._set_service_url_on_dashboard(
241 'Prometheus',
242 'dashboard get-prometheus-api-host',
243 'dashboard set-prometheus-api-host',
244 service_url
245 )
246
f6b5b4d7 247
e306af50 248class NodeExporterService(CephadmService):
f6b5b4d7
TL
249 TYPE = 'node-exporter'
250
251 def create(self, daemon_spec: CephadmDaemonSpec) -> str:
252 assert self.TYPE == daemon_spec.daemon_type
253 return self.mgr._create_daemon(daemon_spec)
e306af50 254
f6b5b4d7
TL
255 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
256 assert self.TYPE == daemon_spec.daemon_type
e306af50 257 return {}, []