]>
Commit | Line | Data |
---|---|---|
f67539c2 | 1 | import errno |
e306af50 TL |
2 | import logging |
3 | import os | |
f67539c2 TL |
4 | from typing import List, Any, Tuple, Dict, Optional, cast |
5 | ||
6 | from mgr_module import HandleCommandResult | |
e306af50 TL |
7 | |
8 | from orchestrator import DaemonDescription | |
f6b5b4d7 | 9 | from ceph.deployment.service_spec import AlertManagerSpec |
f67539c2 TL |
10 | from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec |
11 | from cephadm.services.ingress import IngressSpec | |
e306af50 TL |
12 | from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert |
13 | ||
14 | logger = logging.getLogger(__name__) | |
15 | ||
f6b5b4d7 | 16 | |
e306af50 | 17 | class GrafanaService(CephadmService): |
f6b5b4d7 | 18 | TYPE = 'grafana' |
e306af50 TL |
19 | DEFAULT_SERVICE_PORT = 3000 |
20 | ||
f67539c2 | 21 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 22 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 23 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 24 | return daemon_spec |
e306af50 | 25 | |
f67539c2 | 26 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 | 27 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 TL |
28 | deps = [] # type: List[str] |
29 | ||
30 | prom_services = [] # type: List[str] | |
31 | for dd in self.mgr.cache.get_daemons_by_service('prometheus'): | |
f67539c2 | 32 | assert dd.hostname is not None |
e306af50 TL |
33 | prom_services.append(dd.hostname) |
34 | deps.append(dd.name()) | |
35 | grafana_data_sources = self.mgr.template.render( | |
36 | 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) | |
37 | ||
38 | cert = self.mgr.get_store('grafana_crt') | |
39 | pkey = self.mgr.get_store('grafana_key') | |
40 | if cert and pkey: | |
41 | try: | |
42 | verify_tls(cert, pkey) | |
43 | except ServerConfigException as e: | |
44 | logger.warning('Provided grafana TLS certificates invalid: %s', str(e)) | |
45 | cert, pkey = None, None | |
46 | if not (cert and pkey): | |
47 | cert, pkey = create_self_signed_cert('Ceph', 'cephadm') | |
48 | self.mgr.set_store('grafana_crt', cert) | |
49 | self.mgr.set_store('grafana_key', pkey) | |
50 | self.mgr.check_mon_command({ | |
51 | 'prefix': 'dashboard set-grafana-api-ssl-verify', | |
52 | 'value': 'false', | |
53 | }) | |
54 | ||
55 | grafana_ini = self.mgr.template.render( | |
56 | 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT}) | |
57 | ||
58 | config_file = { | |
59 | 'files': { | |
60 | "grafana.ini": grafana_ini, | |
61 | 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, | |
62 | 'certs/cert_file': '# generated by cephadm\n%s' % cert, | |
63 | 'certs/cert_key': '# generated by cephadm\n%s' % pkey, | |
64 | } | |
65 | } | |
66 | return config_file, sorted(deps) | |
67 | ||
68 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
69 | # Use the least-created one as the active daemon | |
f91f0fd5 TL |
70 | if daemon_descrs: |
71 | return daemon_descrs[-1] | |
72 | # if empty list provided, return empty Daemon Desc | |
73 | return DaemonDescription() | |
e306af50 | 74 | |
f91f0fd5 | 75 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 TL |
76 | # TODO: signed cert |
77 | dd = self.get_active_daemon(daemon_descrs) | |
f67539c2 | 78 | assert dd.hostname is not None |
e306af50 TL |
79 | service_url = 'https://{}:{}'.format( |
80 | self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) | |
81 | self._set_service_url_on_dashboard( | |
82 | 'Grafana', | |
83 | 'dashboard get-grafana-api-url', | |
84 | 'dashboard set-grafana-api-url', | |
85 | service_url | |
86 | ) | |
87 | ||
f67539c2 TL |
88 | def ok_to_stop(self, |
89 | daemon_ids: List[str], | |
90 | force: bool = False, | |
91 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
92 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) | |
93 | if warn and not force: | |
94 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
95 | return HandleCommandResult(0, warn_message, '') | |
96 | ||
f6b5b4d7 | 97 | |
e306af50 | 98 | class AlertmanagerService(CephadmService): |
f6b5b4d7 | 99 | TYPE = 'alertmanager' |
e306af50 TL |
100 | DEFAULT_SERVICE_PORT = 9093 |
101 | ||
f67539c2 | 102 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 103 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 104 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 105 | return daemon_spec |
f6b5b4d7 | 106 | |
f67539c2 | 107 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 TL |
108 | assert self.TYPE == daemon_spec.daemon_type |
109 | deps: List[str] = [] | |
110 | default_webhook_urls: List[str] = [] | |
e306af50 | 111 | |
f67539c2 TL |
112 | spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) |
113 | user_data = spec.user_data | |
114 | if 'default_webhook_urls' in user_data and isinstance( | |
115 | user_data['default_webhook_urls'], list): | |
116 | default_webhook_urls.extend(user_data['default_webhook_urls']) | |
e306af50 TL |
117 | |
118 | # dashboard(s) | |
f6b5b4d7 | 119 | dashboard_urls: List[str] = [] |
e306af50 TL |
120 | mgr_map = self.mgr.get('mgr_map') |
121 | port = None | |
122 | proto = None # http: or https: | |
123 | url = mgr_map.get('services', {}).get('dashboard', None) | |
124 | if url: | |
125 | dashboard_urls.append(url) | |
126 | proto = url.split('/')[0] | |
127 | port = url.split('/')[2].split(':')[1] | |
128 | # scan all mgrs to generate deps and to get standbys too. | |
129 | # assume that they are all on the same port as the active mgr. | |
130 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
131 | # we consider mgr a dep even if the dashboard is disabled | |
132 | # in order to be consistent with _calc_daemon_deps(). | |
133 | deps.append(dd.name()) | |
134 | if not port: | |
135 | continue | |
136 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
137 | continue | |
f67539c2 | 138 | assert dd.hostname is not None |
e306af50 TL |
139 | addr = self.mgr.inventory.get_addr(dd.hostname) |
140 | dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], | |
141 | port)) | |
142 | ||
143 | context = { | |
f6b5b4d7 TL |
144 | 'dashboard_urls': dashboard_urls, |
145 | 'default_webhook_urls': default_webhook_urls | |
e306af50 TL |
146 | } |
147 | yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) | |
148 | ||
149 | peers = [] | |
150 | port = '9094' | |
151 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
f67539c2 | 152 | assert dd.hostname is not None |
e306af50 TL |
153 | deps.append(dd.name()) |
154 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
155 | peers.append(addr.split(':')[0] + ':' + port) | |
156 | return { | |
157 | "files": { | |
158 | "alertmanager.yml": yml | |
159 | }, | |
160 | "peers": peers | |
161 | }, sorted(deps) | |
162 | ||
163 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
164 | # TODO: if there are multiple daemons, who is the active one? | |
f91f0fd5 TL |
165 | if daemon_descrs: |
166 | return daemon_descrs[0] | |
167 | # if empty list provided, return empty Daemon Desc | |
168 | return DaemonDescription() | |
e306af50 | 169 | |
f91f0fd5 | 170 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 | 171 | dd = self.get_active_daemon(daemon_descrs) |
f67539c2 | 172 | assert dd.hostname is not None |
f91f0fd5 TL |
173 | service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), |
174 | self.DEFAULT_SERVICE_PORT) | |
e306af50 TL |
175 | self._set_service_url_on_dashboard( |
176 | 'AlertManager', | |
177 | 'dashboard get-alertmanager-api-host', | |
178 | 'dashboard set-alertmanager-api-host', | |
179 | service_url | |
180 | ) | |
181 | ||
f67539c2 TL |
182 | def ok_to_stop(self, |
183 | daemon_ids: List[str], | |
184 | force: bool = False, | |
185 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
186 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) | |
187 | if warn and not force: | |
188 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
189 | return HandleCommandResult(0, warn_message, '') | |
190 | ||
e306af50 TL |
191 | |
192 | class PrometheusService(CephadmService): | |
f6b5b4d7 | 193 | TYPE = 'prometheus' |
e306af50 TL |
194 | DEFAULT_SERVICE_PORT = 9095 |
195 | ||
f67539c2 | 196 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 197 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 198 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 199 | return daemon_spec |
e306af50 | 200 | |
f67539c2 | 201 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 | 202 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 TL |
203 | deps = [] # type: List[str] |
204 | ||
205 | # scrape mgrs | |
206 | mgr_scrape_list = [] | |
207 | mgr_map = self.mgr.get('mgr_map') | |
208 | port = None | |
209 | t = mgr_map.get('services', {}).get('prometheus', None) | |
210 | if t: | |
211 | t = t.split('/')[2] | |
212 | mgr_scrape_list.append(t) | |
213 | port = '9283' | |
214 | if ':' in t: | |
215 | port = t.split(':')[1] | |
216 | # scan all mgrs to generate deps and to get standbys too. | |
217 | # assume that they are all on the same port as the active mgr. | |
218 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
219 | # we consider the mgr a dep even if the prometheus module is | |
220 | # disabled in order to be consistent with _calc_daemon_deps(). | |
221 | deps.append(dd.name()) | |
222 | if not port: | |
223 | continue | |
224 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
225 | continue | |
f67539c2 | 226 | assert dd.hostname is not None |
e306af50 TL |
227 | addr = self.mgr.inventory.get_addr(dd.hostname) |
228 | mgr_scrape_list.append(addr.split(':')[0] + ':' + port) | |
229 | ||
230 | # scrape node exporters | |
231 | nodes = [] | |
232 | for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): | |
f67539c2 | 233 | assert dd.hostname is not None |
e306af50 TL |
234 | deps.append(dd.name()) |
235 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
236 | nodes.append({ | |
237 | 'hostname': dd.hostname, | |
238 | 'url': addr.split(':')[0] + ':9100' | |
239 | }) | |
240 | ||
241 | # scrape alert managers | |
242 | alertmgr_targets = [] | |
243 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
f67539c2 | 244 | assert dd.hostname is not None |
e306af50 TL |
245 | deps.append(dd.name()) |
246 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
247 | alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0])) | |
248 | ||
f67539c2 TL |
249 | # scrape haproxies |
250 | haproxy_targets = [] | |
251 | for dd in self.mgr.cache.get_daemons_by_type('ingress'): | |
252 | if dd.service_name() in self.mgr.spec_store: | |
253 | spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec) | |
254 | assert dd.hostname is not None | |
255 | deps.append(dd.name()) | |
256 | if dd.daemon_type == 'haproxy': | |
257 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
258 | haproxy_targets.append({ | |
259 | "url": f"'{addr.split(':')[0]}:{spec.monitor_port}'", | |
260 | "service": dd.service_name(), | |
261 | }) | |
262 | ||
e306af50 TL |
263 | # generate the prometheus configuration |
264 | context = { | |
265 | 'alertmgr_targets': alertmgr_targets, | |
266 | 'mgr_scrape_list': mgr_scrape_list, | |
f67539c2 | 267 | 'haproxy_targets': haproxy_targets, |
e306af50 TL |
268 | 'nodes': nodes, |
269 | } | |
270 | r = { | |
271 | 'files': { | |
272 | 'prometheus.yml': | |
273 | self.mgr.template.render( | |
274 | 'services/prometheus/prometheus.yml.j2', context) | |
275 | } | |
276 | } | |
277 | ||
278 | # include alerts, if present in the container | |
279 | if os.path.exists(self.mgr.prometheus_alerts_path): | |
280 | with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: | |
281 | alerts = f.read() | |
282 | r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts | |
283 | ||
284 | return r, sorted(deps) | |
285 | ||
286 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
287 | # TODO: if there are multiple daemons, who is the active one? | |
f91f0fd5 TL |
288 | if daemon_descrs: |
289 | return daemon_descrs[0] | |
290 | # if empty list provided, return empty Daemon Desc | |
291 | return DaemonDescription() | |
e306af50 | 292 | |
f91f0fd5 | 293 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
e306af50 | 294 | dd = self.get_active_daemon(daemon_descrs) |
f67539c2 | 295 | assert dd.hostname is not None |
e306af50 TL |
296 | service_url = 'http://{}:{}'.format( |
297 | self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) | |
298 | self._set_service_url_on_dashboard( | |
299 | 'Prometheus', | |
300 | 'dashboard get-prometheus-api-host', | |
301 | 'dashboard set-prometheus-api-host', | |
302 | service_url | |
303 | ) | |
304 | ||
f67539c2 TL |
305 | def ok_to_stop(self, |
306 | daemon_ids: List[str], | |
307 | force: bool = False, | |
308 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
309 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) | |
310 | if warn and not force: | |
311 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
312 | return HandleCommandResult(0, warn_message, '') | |
313 | ||
f6b5b4d7 | 314 | |
e306af50 | 315 | class NodeExporterService(CephadmService): |
f6b5b4d7 TL |
316 | TYPE = 'node-exporter' |
317 | ||
f67539c2 | 318 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: |
f6b5b4d7 | 319 | assert self.TYPE == daemon_spec.daemon_type |
f67539c2 | 320 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) |
f91f0fd5 | 321 | return daemon_spec |
e306af50 | 322 | |
f67539c2 | 323 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: |
f6b5b4d7 | 324 | assert self.TYPE == daemon_spec.daemon_type |
e306af50 | 325 | return {}, [] |
f67539c2 TL |
326 | |
327 | def ok_to_stop(self, | |
328 | daemon_ids: List[str], | |
329 | force: bool = False, | |
330 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
331 | # since node exporter runs on each host and cannot compromise data, no extra checks required | |
332 | names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] | |
333 | out = f'It is presumed safe to stop {names}' | |
334 | return HandleCommandResult(0, out, '') |