]>
Commit | Line | Data |
---|---|---|
1 | import errno | |
2 | import logging | |
3 | import os | |
4 | from typing import List, Any, Tuple, Dict, Optional, cast | |
5 | ||
6 | from mgr_module import HandleCommandResult | |
7 | ||
8 | from orchestrator import DaemonDescription | |
9 | from ceph.deployment.service_spec import AlertManagerSpec | |
10 | from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec | |
11 | from cephadm.services.ingress import IngressSpec | |
12 | from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert | |
13 | ||
14 | logger = logging.getLogger(__name__) | |
15 | ||
16 | ||
17 | class GrafanaService(CephadmService): | |
18 | TYPE = 'grafana' | |
19 | DEFAULT_SERVICE_PORT = 3000 | |
20 | ||
21 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: | |
22 | assert self.TYPE == daemon_spec.daemon_type | |
23 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) | |
24 | return daemon_spec | |
25 | ||
26 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: | |
27 | assert self.TYPE == daemon_spec.daemon_type | |
28 | deps = [] # type: List[str] | |
29 | ||
30 | prom_services = [] # type: List[str] | |
31 | for dd in self.mgr.cache.get_daemons_by_service('prometheus'): | |
32 | assert dd.hostname is not None | |
33 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) | |
34 | port = dd.ports[0] if dd.ports else 9095 | |
35 | prom_services.append(addr + ':' + str(port)) | |
36 | deps.append(dd.name()) | |
37 | grafana_data_sources = self.mgr.template.render( | |
38 | 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) | |
39 | ||
40 | cert = self.mgr.get_store('grafana_crt') | |
41 | pkey = self.mgr.get_store('grafana_key') | |
42 | if cert and pkey: | |
43 | try: | |
44 | verify_tls(cert, pkey) | |
45 | except ServerConfigException as e: | |
46 | logger.warning('Provided grafana TLS certificates invalid: %s', str(e)) | |
47 | cert, pkey = None, None | |
48 | if not (cert and pkey): | |
49 | cert, pkey = create_self_signed_cert('Ceph', 'cephadm') | |
50 | self.mgr.set_store('grafana_crt', cert) | |
51 | self.mgr.set_store('grafana_key', pkey) | |
52 | self.mgr.check_mon_command({ | |
53 | 'prefix': 'dashboard set-grafana-api-ssl-verify', | |
54 | 'value': 'false', | |
55 | }) | |
56 | ||
57 | grafana_ini = self.mgr.template.render( | |
58 | 'services/grafana/grafana.ini.j2', { | |
59 | 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT, | |
60 | 'http_addr': daemon_spec.ip if daemon_spec.ip else '' | |
61 | }) | |
62 | ||
63 | config_file = { | |
64 | 'files': { | |
65 | "grafana.ini": grafana_ini, | |
66 | 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, | |
67 | 'certs/cert_file': '# generated by cephadm\n%s' % cert, | |
68 | 'certs/cert_key': '# generated by cephadm\n%s' % pkey, | |
69 | } | |
70 | } | |
71 | return config_file, sorted(deps) | |
72 | ||
73 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
74 | # Use the least-created one as the active daemon | |
75 | if daemon_descrs: | |
76 | return daemon_descrs[-1] | |
77 | # if empty list provided, return empty Daemon Desc | |
78 | return DaemonDescription() | |
79 | ||
80 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: | |
81 | # TODO: signed cert | |
82 | dd = self.get_active_daemon(daemon_descrs) | |
83 | assert dd.hostname is not None | |
84 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) | |
85 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
86 | service_url = 'https://{}:{}'.format(addr, port) | |
87 | self._set_service_url_on_dashboard( | |
88 | 'Grafana', | |
89 | 'dashboard get-grafana-api-url', | |
90 | 'dashboard set-grafana-api-url', | |
91 | service_url | |
92 | ) | |
93 | ||
94 | def ok_to_stop(self, | |
95 | daemon_ids: List[str], | |
96 | force: bool = False, | |
97 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
98 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) | |
99 | if warn and not force: | |
100 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
101 | return HandleCommandResult(0, warn_message, '') | |
102 | ||
103 | ||
104 | class AlertmanagerService(CephadmService): | |
105 | TYPE = 'alertmanager' | |
106 | DEFAULT_SERVICE_PORT = 9093 | |
107 | ||
108 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: | |
109 | assert self.TYPE == daemon_spec.daemon_type | |
110 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) | |
111 | return daemon_spec | |
112 | ||
113 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: | |
114 | assert self.TYPE == daemon_spec.daemon_type | |
115 | deps: List[str] = [] | |
116 | default_webhook_urls: List[str] = [] | |
117 | ||
118 | spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) | |
119 | user_data = spec.user_data | |
120 | if 'default_webhook_urls' in user_data and isinstance( | |
121 | user_data['default_webhook_urls'], list): | |
122 | default_webhook_urls.extend(user_data['default_webhook_urls']) | |
123 | ||
124 | # dashboard(s) | |
125 | dashboard_urls: List[str] = [] | |
126 | mgr_map = self.mgr.get('mgr_map') | |
127 | port = None | |
128 | proto = None # http: or https: | |
129 | url = mgr_map.get('services', {}).get('dashboard', None) | |
130 | if url: | |
131 | dashboard_urls.append(url) | |
132 | proto = url.split('/')[0] | |
133 | port = url.split('/')[2].split(':')[1] | |
134 | # scan all mgrs to generate deps and to get standbys too. | |
135 | # assume that they are all on the same port as the active mgr. | |
136 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
137 | # we consider mgr a dep even if the dashboard is disabled | |
138 | # in order to be consistent with _calc_daemon_deps(). | |
139 | deps.append(dd.name()) | |
140 | if not port: | |
141 | continue | |
142 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
143 | continue | |
144 | assert dd.hostname is not None | |
145 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
146 | dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], | |
147 | port)) | |
148 | ||
149 | context = { | |
150 | 'dashboard_urls': dashboard_urls, | |
151 | 'default_webhook_urls': default_webhook_urls | |
152 | } | |
153 | yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) | |
154 | ||
155 | peers = [] | |
156 | port = '9094' | |
157 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
158 | assert dd.hostname is not None | |
159 | deps.append(dd.name()) | |
160 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
161 | peers.append(addr.split(':')[0] + ':' + port) | |
162 | return { | |
163 | "files": { | |
164 | "alertmanager.yml": yml | |
165 | }, | |
166 | "peers": peers | |
167 | }, sorted(deps) | |
168 | ||
169 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
170 | # TODO: if there are multiple daemons, who is the active one? | |
171 | if daemon_descrs: | |
172 | return daemon_descrs[0] | |
173 | # if empty list provided, return empty Daemon Desc | |
174 | return DaemonDescription() | |
175 | ||
176 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: | |
177 | dd = self.get_active_daemon(daemon_descrs) | |
178 | assert dd.hostname is not None | |
179 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) | |
180 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
181 | service_url = 'http://{}:{}'.format(addr, port) | |
182 | self._set_service_url_on_dashboard( | |
183 | 'AlertManager', | |
184 | 'dashboard get-alertmanager-api-host', | |
185 | 'dashboard set-alertmanager-api-host', | |
186 | service_url | |
187 | ) | |
188 | ||
189 | def ok_to_stop(self, | |
190 | daemon_ids: List[str], | |
191 | force: bool = False, | |
192 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
193 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) | |
194 | if warn and not force: | |
195 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
196 | return HandleCommandResult(0, warn_message, '') | |
197 | ||
198 | ||
199 | class PrometheusService(CephadmService): | |
200 | TYPE = 'prometheus' | |
201 | DEFAULT_SERVICE_PORT = 9095 | |
202 | ||
203 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: | |
204 | assert self.TYPE == daemon_spec.daemon_type | |
205 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) | |
206 | return daemon_spec | |
207 | ||
208 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: | |
209 | assert self.TYPE == daemon_spec.daemon_type | |
210 | deps = [] # type: List[str] | |
211 | ||
212 | # scrape mgrs | |
213 | mgr_scrape_list = [] | |
214 | mgr_map = self.mgr.get('mgr_map') | |
215 | port = None | |
216 | t = mgr_map.get('services', {}).get('prometheus', None) | |
217 | if t: | |
218 | t = t.split('/')[2] | |
219 | mgr_scrape_list.append(t) | |
220 | port = '9283' | |
221 | if ':' in t: | |
222 | port = t.split(':')[1] | |
223 | # scan all mgrs to generate deps and to get standbys too. | |
224 | # assume that they are all on the same port as the active mgr. | |
225 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
226 | # we consider the mgr a dep even if the prometheus module is | |
227 | # disabled in order to be consistent with _calc_daemon_deps(). | |
228 | deps.append(dd.name()) | |
229 | if not port: | |
230 | continue | |
231 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
232 | continue | |
233 | assert dd.hostname is not None | |
234 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
235 | mgr_scrape_list.append(addr.split(':')[0] + ':' + port) | |
236 | ||
237 | # scrape node exporters | |
238 | nodes = [] | |
239 | for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): | |
240 | assert dd.hostname is not None | |
241 | deps.append(dd.name()) | |
242 | addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) | |
243 | port = str(dd.ports[0]) if dd.ports else '9100' | |
244 | nodes.append({ | |
245 | 'hostname': dd.hostname, | |
246 | 'url': addr.split(':')[0] + ':' + port | |
247 | }) | |
248 | ||
249 | # scrape alert managers | |
250 | alertmgr_targets = [] | |
251 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
252 | assert dd.hostname is not None | |
253 | deps.append(dd.name()) | |
254 | addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) | |
255 | port = str(dd.ports[0]) if dd.ports else '9093' | |
256 | alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port)) | |
257 | ||
258 | # scrape haproxies | |
259 | haproxy_targets = [] | |
260 | for dd in self.mgr.cache.get_daemons_by_type('ingress'): | |
261 | if dd.service_name() in self.mgr.spec_store: | |
262 | spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec) | |
263 | assert dd.hostname is not None | |
264 | deps.append(dd.name()) | |
265 | if dd.daemon_type == 'haproxy': | |
266 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
267 | haproxy_targets.append({ | |
268 | "url": f"'{addr.split(':')[0]}:{spec.monitor_port}'", | |
269 | "service": dd.service_name(), | |
270 | }) | |
271 | ||
272 | # generate the prometheus configuration | |
273 | context = { | |
274 | 'alertmgr_targets': alertmgr_targets, | |
275 | 'mgr_scrape_list': mgr_scrape_list, | |
276 | 'haproxy_targets': haproxy_targets, | |
277 | 'nodes': nodes, | |
278 | } | |
279 | r = { | |
280 | 'files': { | |
281 | 'prometheus.yml': | |
282 | self.mgr.template.render( | |
283 | 'services/prometheus/prometheus.yml.j2', context) | |
284 | } | |
285 | } | |
286 | ||
287 | # include alerts, if present in the container | |
288 | if os.path.exists(self.mgr.prometheus_alerts_path): | |
289 | with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: | |
290 | alerts = f.read() | |
291 | r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts | |
292 | ||
293 | return r, sorted(deps) | |
294 | ||
295 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
296 | # TODO: if there are multiple daemons, who is the active one? | |
297 | if daemon_descrs: | |
298 | return daemon_descrs[0] | |
299 | # if empty list provided, return empty Daemon Desc | |
300 | return DaemonDescription() | |
301 | ||
302 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: | |
303 | dd = self.get_active_daemon(daemon_descrs) | |
304 | assert dd.hostname is not None | |
305 | addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) | |
306 | port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT | |
307 | service_url = 'http://{}:{}'.format(addr, port) | |
308 | self._set_service_url_on_dashboard( | |
309 | 'Prometheus', | |
310 | 'dashboard get-prometheus-api-host', | |
311 | 'dashboard set-prometheus-api-host', | |
312 | service_url | |
313 | ) | |
314 | ||
315 | def ok_to_stop(self, | |
316 | daemon_ids: List[str], | |
317 | force: bool = False, | |
318 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
319 | warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) | |
320 | if warn and not force: | |
321 | return HandleCommandResult(-errno.EBUSY, '', warn_message) | |
322 | return HandleCommandResult(0, warn_message, '') | |
323 | ||
324 | ||
325 | class NodeExporterService(CephadmService): | |
326 | TYPE = 'node-exporter' | |
327 | ||
328 | def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: | |
329 | assert self.TYPE == daemon_spec.daemon_type | |
330 | daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) | |
331 | return daemon_spec | |
332 | ||
333 | def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: | |
334 | assert self.TYPE == daemon_spec.daemon_type | |
335 | return {}, [] | |
336 | ||
337 | def ok_to_stop(self, | |
338 | daemon_ids: List[str], | |
339 | force: bool = False, | |
340 | known: Optional[List[str]] = None) -> HandleCommandResult: | |
341 | # since node exporter runs on each host and cannot compromise data, no extra checks required | |
342 | names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] | |
343 | out = f'It is presumed safe to stop {names}' | |
344 | return HandleCommandResult(0, out, '') |