]>
Commit | Line | Data |
---|---|---|
f67539c2 | 1 | import errno |
e306af50 TL |
2 | import logging |
3 | import os | |
f67539c2 TL |
4 | from typing import List, Any, Tuple, Dict, Optional, cast |
5 | ||
6 | from mgr_module import HandleCommandResult | |
e306af50 TL |
7 | |
8 | from orchestrator import DaemonDescription | |
522d829b | 9 | from ceph.deployment.service_spec import AlertManagerSpec, ServiceSpec |
f67539c2 TL |
10 | from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec |
11 | from cephadm.services.ingress import IngressSpec | |
522d829b | 12 | from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url |
e306af50 TL |
13 | |
14 | logger = logging.getLogger(__name__) | |
15 | ||
f6b5b4d7 | 16 | |
e306af50 | 17 | class GrafanaService(CephadmService): |
f6b5b4d7 | 18 | TYPE = 'grafana' |
e306af50 TL |
19 | DEFAULT_SERVICE_PORT = 3000 |
20 | ||
f67539c2 | 21 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 22 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 23 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 24 | return daemon_spec |
e306af50 | 25 | |
f67539c2 | 26 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 | 27 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 TL |
28 | deps = [] # type: List[str] |
29 | ||
30 | prom_services = [] # type: List[str] | |
31 | for dd in self.mgr.cache.get_daemons_by_service('prometheus'): | |
f67539c2 | 32 | assert dd.hostname is not None |
b3b6e05e TL |
33 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) |
34 | port = dd.ports[0] if dd.ports else 9095 | |
35 | prom_services.append(addr + ':' + str(port)) | |
e306af50 TL |
36 | deps.append(dd.name()) |
37 | grafana_data_sources = self.mgr.template.render( | |
38 | 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) | |
39 | ||
40 | cert = self.mgr.get_store('grafana_crt') | |
41 | pkey = self.mgr.get_store('grafana_key') | |
42 | if cert and pkey: | |
43 | try: | |
44 | verify_tls(cert, pkey) | |
45 | except ServerConfigException as e: | |
46 | logger.warning('Provided grafana TLS certificates invalid: %s', str(e)) | |
47 | cert, pkey = None, None | |
48 | if not (cert and pkey): | |
49 | cert, pkey = create_self_signed_cert('Ceph', 'cephadm') | |
50 | self.mgr.set_store('grafana_crt', cert) | |
51 | self.mgr.set_store('grafana_key', pkey) | |
522d829b TL |
52 | if 'dashboard' in self.mgr.get('mgr_map')['modules']: |
53 | self.mgr.check_mon_command({ | |
54 | 'prefix': 'dashboard set-grafana-api-ssl-verify', | |
55 | 'value': 'false', | |
56 | }) | |
e306af50 TL |
57 | |
58 | grafana_ini = self.mgr.template.render( | |
b3b6e05e TL |
59 | 'services/grafana/grafana.ini.j2', { |
60 | 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT, | |
61 | 'http_addr': daemon_spec.ip if daemon_spec.ip else '' | |
62 | }) | |
e306af50 TL |
63 | |
64 | config_file = { | |
65 | 'files': { | |
66 | "grafana.ini": grafana_ini, | |
67 | 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, | |
68 | 'certs/cert_file': '# generated by cephadm\n%s' % cert, | |
69 | 'certs/cert_key': '# generated by cephadm\n%s' % pkey, | |
70 | } | |
71 | } | |
72 | return config_file, sorted(deps) | |
73 | ||
74 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
75 | # Use the least-created one as the active daemon | |
f91f0fd5 TL |
76 | if daemon_descrs: |
77 | return daemon_descrs[-1] | |
78 | # if empty list provided, return empty Daemon Desc | |
79 | return DaemonDescription() | |
e306af50 | 80 | |
f91f0fd5 | 81 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 TL |
82 | # TODO: signed cert |
83 | dd = self.get_active_daemon(daemon_descrs) | |
f67539c2 | 84 | assert dd.hostname is not None |
b3b6e05e TL |
85 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) |
86 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
522d829b | 87 | service_url = build_url(scheme='https', host=addr, port=port) |
e306af50 TL |
88 | self._set_service_url_on_dashboard( |
89 | 'Grafana', | |
90 | 'dashboard get-grafana-api-url', | |
91 | 'dashboard set-grafana-api-url', | |
92 | service_url | |
93 | ) | |
94 | ||
f67539c2 TL |
95 | def ok_to_stop(self, |
96 | daemon_ids: List[str], | |
97 | force: bool = False, | |
98 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
99 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) | |
100 | if warn and not force: | |
101 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
102 | return HandleCommandResult(0, warn_message, '') | |
103 | ||
f6b5b4d7 | 104 | |
e306af50 | 105 | class AlertmanagerService(CephadmService): |
f6b5b4d7 | 106 | TYPE = 'alertmanager' |
e306af50 TL |
107 | DEFAULT_SERVICE_PORT = 9093 |
108 | ||
f67539c2 | 109 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 110 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 111 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 112 | return daemon_spec |
f6b5b4d7 | 113 | |
f67539c2 | 114 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 TL |
115 | assert self.TYPE == daemon_spec.daemon_type |
116 | deps: List[str] = [] | |
117 | default_webhook_urls: List[str] = [] | |
e306af50 | 118 | |
f67539c2 TL |
119 | spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) |
120 | user_data = spec.user_data | |
121 | if 'default_webhook_urls' in user_data and isinstance( | |
122 | user_data['default_webhook_urls'], list): | |
123 | default_webhook_urls.extend(user_data['default_webhook_urls']) | |
e306af50 TL |
124 | |
125 | # dashboard(s) | |
f6b5b4d7 | 126 | dashboard_urls: List[str] = [] |
e306af50 TL |
127 | mgr_map = self.mgr.get('mgr_map') |
128 | port = None | |
129 | proto = None # http: or https: | |
130 | url = mgr_map.get('services', {}).get('dashboard', None) | |
131 | if url: | |
132 | dashboard_urls.append(url) | |
133 | proto = url.split('/')[0] | |
134 | port = url.split('/')[2].split(':')[1] | |
135 | # scan all mgrs to generate deps and to get standbys too. | |
136 | # assume that they are all on the same port as the active mgr. | |
137 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
138 | # we consider mgr a dep even if the dashboard is disabled | |
139 | # in order to be consistent with _calc_daemon_deps(). | |
140 | deps.append(dd.name()) | |
141 | if not port: | |
142 | continue | |
143 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
144 | continue | |
f67539c2 | 145 | assert dd.hostname is not None |
e306af50 TL |
146 | addr = self.mgr.inventory.get_addr(dd.hostname) |
147 | dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], | |
148 | port)) | |
149 | ||
150 | context = { | |
f6b5b4d7 TL |
151 | 'dashboard_urls': dashboard_urls, |
152 | 'default_webhook_urls': default_webhook_urls | |
e306af50 TL |
153 | } |
154 | yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) | |
155 | ||
156 | peers = [] | |
157 | port = '9094' | |
158 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
f67539c2 | 159 | assert dd.hostname is not None |
e306af50 TL |
160 | deps.append(dd.name()) |
161 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
162 | peers.append(addr.split(':')[0] + ':' + port) | |
163 | return { | |
164 | "files": { | |
165 | "alertmanager.yml": yml | |
166 | }, | |
167 | "peers": peers | |
168 | }, sorted(deps) | |
169 | ||
170 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
171 | # TODO: if there are multiple daemons, who is the active one? | |
f91f0fd5 TL |
172 | if daemon_descrs: |
173 | return daemon_descrs[0] | |
174 | # if empty list provided, return empty Daemon Desc | |
175 | return DaemonDescription() | |
e306af50 | 176 | |
f91f0fd5 | 177 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 | 178 | dd = self.get_active_daemon(daemon_descrs) |
f67539c2 | 179 | assert dd.hostname is not None |
b3b6e05e TL |
180 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) |
181 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
182 | service_url = 'http://{}:{}'.format(addr, port) | |
e306af50 TL |
183 | self._set_service_url_on_dashboard( |
184 | 'AlertManager', | |
185 | 'dashboard get-alertmanager-api-host', | |
186 | 'dashboard set-alertmanager-api-host', | |
187 | service_url | |
188 | ) | |
189 | ||
f67539c2 TL |
190 | def ok_to_stop(self, |
191 | daemon_ids: List[str], | |
192 | force: bool = False, | |
193 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
194 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) | |
195 | if warn and not force: | |
196 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
197 | return HandleCommandResult(0, warn_message, '') | |
198 | ||
e306af50 TL |
199 | |
200 | class PrometheusService(CephadmService): | |
f6b5b4d7 | 201 | TYPE = 'prometheus' |
e306af50 TL |
202 | DEFAULT_SERVICE_PORT = 9095 |
203 | ||
522d829b TL |
204 | def config(self, spec: ServiceSpec) -> None: |
205 | # make sure module is enabled | |
206 | mgr_map = self.mgr.get('mgr_map') | |
207 | if 'prometheus' not in mgr_map.get('services', {}): | |
208 | self.mgr.check_mon_command({ | |
209 | 'prefix': 'mgr module enable', | |
210 | 'module': 'prometheus' | |
211 | }) | |
212 | # we shouldn't get here (mon will tell the mgr to respawn), but no | |
213 | # harm done if we do. | |
214 | ||
215 | def prepare_create( | |
216 | self, | |
217 | daemon_spec: CephadmDaemonDeploySpec, | |
218 | ) -> CephadmDaemonDeploySpec: | |
f6b5b4d7 | 219 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 220 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 221 | return daemon_spec |
e306af50 | 222 | |
522d829b TL |
223 | def generate_config( |
224 | self, | |
225 | daemon_spec: CephadmDaemonDeploySpec, | |
226 | ) -> Tuple[Dict[str, Any], List[str]]: | |
f6b5b4d7 | 227 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 TL |
228 | deps = [] # type: List[str] |
229 | ||
230 | # scrape mgrs | |
231 | mgr_scrape_list = [] | |
232 | mgr_map = self.mgr.get('mgr_map') | |
233 | port = None | |
234 | t = mgr_map.get('services', {}).get('prometheus', None) | |
235 | if t: | |
236 | t = t.split('/')[2] | |
237 | mgr_scrape_list.append(t) | |
238 | port = '9283' | |
239 | if ':' in t: | |
240 | port = t.split(':')[1] | |
241 | # scan all mgrs to generate deps and to get standbys too. | |
242 | # assume that they are all on the same port as the active mgr. | |
243 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
244 | # we consider the mgr a dep even if the prometheus module is | |
245 | # disabled in order to be consistent with _calc_daemon_deps(). | |
246 | deps.append(dd.name()) | |
247 | if not port: | |
248 | continue | |
249 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
250 | continue | |
f67539c2 | 251 | assert dd.hostname is not None |
e306af50 TL |
252 | addr = self.mgr.inventory.get_addr(dd.hostname) |
253 | mgr_scrape_list.append(addr.split(':')[0] + ':' + port) | |
254 | ||
255 | # scrape node exporters | |
256 | nodes = [] | |
257 | for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): | |
f67539c2 | 258 | assert dd.hostname is not None |
e306af50 | 259 | deps.append(dd.name()) |
b3b6e05e TL |
260 | addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) |
261 | port = str(dd.ports[0]) if dd.ports else '9100' | |
e306af50 TL |
262 | nodes.append({ |
263 | 'hostname': dd.hostname, | |
b3b6e05e | 264 | 'url': addr.split(':')[0] + ':' + port |
e306af50 TL |
265 | }) |
266 | ||
267 | # scrape alert managers | |
268 | alertmgr_targets = [] | |
269 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
f67539c2 | 270 | assert dd.hostname is not None |
e306af50 | 271 | deps.append(dd.name()) |
b3b6e05e TL |
272 | addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) |
273 | port = str(dd.ports[0]) if dd.ports else '9093' | |
274 | alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port)) | |
e306af50 | 275 | |
f67539c2 TL |
276 | # scrape haproxies |
277 | haproxy_targets = [] | |
278 | for dd in self.mgr.cache.get_daemons_by_type('ingress'): | |
279 | if dd.service_name() in self.mgr.spec_store: | |
280 | spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec) | |
281 | assert dd.hostname is not None | |
282 | deps.append(dd.name()) | |
283 | if dd.daemon_type == 'haproxy': | |
284 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
285 | haproxy_targets.append({ | |
286 | "url": f"'{addr.split(':')[0]}:{spec.monitor_port}'", | |
287 | "service": dd.service_name(), | |
288 | }) | |
289 | ||
e306af50 TL |
290 | # generate the prometheus configuration |
291 | context = { | |
292 | 'alertmgr_targets': alertmgr_targets, | |
293 | 'mgr_scrape_list': mgr_scrape_list, | |
f67539c2 | 294 | 'haproxy_targets': haproxy_targets, |
e306af50 TL |
295 | 'nodes': nodes, |
296 | } | |
297 | r = { | |
298 | 'files': { | |
299 | 'prometheus.yml': | |
300 | self.mgr.template.render( | |
301 | 'services/prometheus/prometheus.yml.j2', context) | |
302 | } | |
303 | } | |
304 | ||
305 | # include alerts, if present in the container | |
306 | if os.path.exists(self.mgr.prometheus_alerts_path): | |
307 | with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: | |
308 | alerts = f.read() | |
309 | r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts | |
310 | ||
311 | return r, sorted(deps) | |
312 | ||
313 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
314 | # TODO: if there are multiple daemons, who is the active one? | |
f91f0fd5 TL |
315 | if daemon_descrs: |
316 | return daemon_descrs[0] | |
317 | # if empty list provided, return empty Daemon Desc | |
318 | return DaemonDescription() | |
e306af50 | 319 | |
f91f0fd5 | 320 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 | 321 | dd = self.get_active_daemon(daemon_descrs) |
f67539c2 | 322 | assert dd.hostname is not None |
b3b6e05e TL |
323 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) |
324 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
325 | service_url = 'http://{}:{}'.format(addr, port) | |
e306af50 TL |
326 | self._set_service_url_on_dashboard( |
327 | 'Prometheus', | |
328 | 'dashboard get-prometheus-api-host', | |
329 | 'dashboard set-prometheus-api-host', | |
330 | service_url | |
331 | ) | |
332 | ||
f67539c2 TL |
333 | def ok_to_stop(self, |
334 | daemon_ids: List[str], | |
335 | force: bool = False, | |
336 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
337 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) | |
338 | if warn and not force: | |
339 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
340 | return HandleCommandResult(0, warn_message, '') | |
341 | ||
f6b5b4d7 | 342 | |
e306af50 | 343 | class NodeExporterService(CephadmService): |
f6b5b4d7 TL |
344 | TYPE = 'node-exporter' |
345 | ||
f67539c2 | 346 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 347 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 348 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 349 | return daemon_spec |
e306af50 | 350 | |
f67539c2 | 351 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 | 352 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 | 353 | return {}, [] |
f67539c2 TL |
354 | |
355 | def ok_to_stop(self, | |
356 | daemon_ids: List[str], | |
357 | force: bool = False, | |
358 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
359 | # since node exporter runs on each host and cannot compromise data, no extra checks required | |
360 | names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] | |
361 | out = f'It is presumed safe to stop {names}' | |
362 | return HandleCommandResult(0, out, '') |