]>
Commit | Line | Data |
---|---|---|
e306af50 TL |
1 | import logging |
2 | import os | |
3 | from typing import List, Any, Tuple, Dict | |
4 | ||
5 | from orchestrator import DaemonDescription | |
f6b5b4d7 TL |
6 | from ceph.deployment.service_spec import AlertManagerSpec |
7 | from cephadm.services.cephadmservice import CephadmService, CephadmDaemonSpec | |
e306af50 TL |
8 | from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert |
9 | ||
10 | logger = logging.getLogger(__name__) | |
11 | ||
f6b5b4d7 | 12 | |
e306af50 | 13 | class GrafanaService(CephadmService): |
f6b5b4d7 | 14 | TYPE = 'grafana' |
e306af50 TL |
15 | DEFAULT_SERVICE_PORT = 3000 |
16 | ||
f6b5b4d7 TL |
17 | def create(self, daemon_spec: CephadmDaemonSpec) -> str: |
18 | assert self.TYPE == daemon_spec.daemon_type | |
19 | return self.mgr._create_daemon(daemon_spec) | |
e306af50 | 20 | |
f6b5b4d7 TL |
21 | def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]: |
22 | assert self.TYPE == daemon_spec.daemon_type | |
e306af50 TL |
23 | deps = [] # type: List[str] |
24 | ||
25 | prom_services = [] # type: List[str] | |
26 | for dd in self.mgr.cache.get_daemons_by_service('prometheus'): | |
27 | prom_services.append(dd.hostname) | |
28 | deps.append(dd.name()) | |
29 | grafana_data_sources = self.mgr.template.render( | |
30 | 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) | |
31 | ||
32 | cert = self.mgr.get_store('grafana_crt') | |
33 | pkey = self.mgr.get_store('grafana_key') | |
34 | if cert and pkey: | |
35 | try: | |
36 | verify_tls(cert, pkey) | |
37 | except ServerConfigException as e: | |
38 | logger.warning('Provided grafana TLS certificates invalid: %s', str(e)) | |
39 | cert, pkey = None, None | |
40 | if not (cert and pkey): | |
41 | cert, pkey = create_self_signed_cert('Ceph', 'cephadm') | |
42 | self.mgr.set_store('grafana_crt', cert) | |
43 | self.mgr.set_store('grafana_key', pkey) | |
44 | self.mgr.check_mon_command({ | |
45 | 'prefix': 'dashboard set-grafana-api-ssl-verify', | |
46 | 'value': 'false', | |
47 | }) | |
48 | ||
49 | grafana_ini = self.mgr.template.render( | |
50 | 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT}) | |
51 | ||
52 | config_file = { | |
53 | 'files': { | |
54 | "grafana.ini": grafana_ini, | |
55 | 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, | |
56 | 'certs/cert_file': '# generated by cephadm\n%s' % cert, | |
57 | 'certs/cert_key': '# generated by cephadm\n%s' % pkey, | |
58 | } | |
59 | } | |
60 | return config_file, sorted(deps) | |
61 | ||
62 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
63 | # Use the least-created one as the active daemon | |
64 | return daemon_descrs[-1] | |
65 | ||
f6b5b4d7 | 66 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]): |
e306af50 TL |
67 | # TODO: signed cert |
68 | dd = self.get_active_daemon(daemon_descrs) | |
69 | service_url = 'https://{}:{}'.format( | |
70 | self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) | |
71 | self._set_service_url_on_dashboard( | |
72 | 'Grafana', | |
73 | 'dashboard get-grafana-api-url', | |
74 | 'dashboard set-grafana-api-url', | |
75 | service_url | |
76 | ) | |
77 | ||
f6b5b4d7 | 78 | |
e306af50 | 79 | class AlertmanagerService(CephadmService): |
f6b5b4d7 | 80 | TYPE = 'alertmanager' |
e306af50 TL |
81 | DEFAULT_SERVICE_PORT = 9093 |
82 | ||
f6b5b4d7 TL |
83 | def create(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> str: |
84 | assert self.TYPE == daemon_spec.daemon_type | |
85 | assert daemon_spec.spec | |
86 | return self.mgr._create_daemon(daemon_spec) | |
87 | ||
88 | def generate_config(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> Tuple[Dict[str, Any], List[str]]: | |
89 | assert self.TYPE == daemon_spec.daemon_type | |
90 | deps: List[str] = [] | |
91 | default_webhook_urls: List[str] = [] | |
e306af50 | 92 | |
f6b5b4d7 TL |
93 | if daemon_spec.spec: |
94 | user_data = daemon_spec.spec.user_data | |
95 | if 'default_webhook_urls' in user_data and isinstance( | |
96 | user_data['default_webhook_urls'], list): | |
97 | default_webhook_urls.extend(user_data['default_webhook_urls']) | |
e306af50 TL |
98 | |
99 | # dashboard(s) | |
f6b5b4d7 | 100 | dashboard_urls: List[str] = [] |
e306af50 TL |
101 | mgr_map = self.mgr.get('mgr_map') |
102 | port = None | |
103 | proto = None # http: or https: | |
104 | url = mgr_map.get('services', {}).get('dashboard', None) | |
105 | if url: | |
106 | dashboard_urls.append(url) | |
107 | proto = url.split('/')[0] | |
108 | port = url.split('/')[2].split(':')[1] | |
109 | # scan all mgrs to generate deps and to get standbys too. | |
110 | # assume that they are all on the same port as the active mgr. | |
111 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
112 | # we consider mgr a dep even if the dashboard is disabled | |
113 | # in order to be consistent with _calc_daemon_deps(). | |
114 | deps.append(dd.name()) | |
115 | if not port: | |
116 | continue | |
117 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
118 | continue | |
119 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
120 | dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], | |
121 | port)) | |
122 | ||
123 | context = { | |
f6b5b4d7 TL |
124 | 'dashboard_urls': dashboard_urls, |
125 | 'default_webhook_urls': default_webhook_urls | |
e306af50 TL |
126 | } |
127 | yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) | |
128 | ||
129 | peers = [] | |
130 | port = '9094' | |
131 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
132 | deps.append(dd.name()) | |
133 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
134 | peers.append(addr.split(':')[0] + ':' + port) | |
135 | return { | |
136 | "files": { | |
137 | "alertmanager.yml": yml | |
138 | }, | |
139 | "peers": peers | |
140 | }, sorted(deps) | |
141 | ||
142 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
143 | # TODO: if there are multiple daemons, who is the active one? | |
144 | return daemon_descrs[0] | |
145 | ||
f6b5b4d7 | 146 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]): |
e306af50 TL |
147 | dd = self.get_active_daemon(daemon_descrs) |
148 | service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) | |
149 | self._set_service_url_on_dashboard( | |
150 | 'AlertManager', | |
151 | 'dashboard get-alertmanager-api-host', | |
152 | 'dashboard set-alertmanager-api-host', | |
153 | service_url | |
154 | ) | |
155 | ||
156 | ||
157 | class PrometheusService(CephadmService): | |
f6b5b4d7 | 158 | TYPE = 'prometheus' |
e306af50 TL |
159 | DEFAULT_SERVICE_PORT = 9095 |
160 | ||
f6b5b4d7 TL |
161 | def create(self, daemon_spec: CephadmDaemonSpec) -> str: |
162 | assert self.TYPE == daemon_spec.daemon_type | |
163 | return self.mgr._create_daemon(daemon_spec) | |
e306af50 | 164 | |
f6b5b4d7 TL |
165 | def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]: |
166 | assert self.TYPE == daemon_spec.daemon_type | |
e306af50 TL |
167 | deps = [] # type: List[str] |
168 | ||
169 | # scrape mgrs | |
170 | mgr_scrape_list = [] | |
171 | mgr_map = self.mgr.get('mgr_map') | |
172 | port = None | |
173 | t = mgr_map.get('services', {}).get('prometheus', None) | |
174 | if t: | |
175 | t = t.split('/')[2] | |
176 | mgr_scrape_list.append(t) | |
177 | port = '9283' | |
178 | if ':' in t: | |
179 | port = t.split(':')[1] | |
180 | # scan all mgrs to generate deps and to get standbys too. | |
181 | # assume that they are all on the same port as the active mgr. | |
182 | for dd in self.mgr.cache.get_daemons_by_service('mgr'): | |
183 | # we consider the mgr a dep even if the prometheus module is | |
184 | # disabled in order to be consistent with _calc_daemon_deps(). | |
185 | deps.append(dd.name()) | |
186 | if not port: | |
187 | continue | |
188 | if dd.daemon_id == self.mgr.get_mgr_id(): | |
189 | continue | |
190 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
191 | mgr_scrape_list.append(addr.split(':')[0] + ':' + port) | |
192 | ||
193 | # scrape node exporters | |
194 | nodes = [] | |
195 | for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): | |
196 | deps.append(dd.name()) | |
197 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
198 | nodes.append({ | |
199 | 'hostname': dd.hostname, | |
200 | 'url': addr.split(':')[0] + ':9100' | |
201 | }) | |
202 | ||
203 | # scrape alert managers | |
204 | alertmgr_targets = [] | |
205 | for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): | |
206 | deps.append(dd.name()) | |
207 | addr = self.mgr.inventory.get_addr(dd.hostname) | |
208 | alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0])) | |
209 | ||
210 | # generate the prometheus configuration | |
211 | context = { | |
212 | 'alertmgr_targets': alertmgr_targets, | |
213 | 'mgr_scrape_list': mgr_scrape_list, | |
214 | 'nodes': nodes, | |
215 | } | |
216 | r = { | |
217 | 'files': { | |
218 | 'prometheus.yml': | |
219 | self.mgr.template.render( | |
220 | 'services/prometheus/prometheus.yml.j2', context) | |
221 | } | |
222 | } | |
223 | ||
224 | # include alerts, if present in the container | |
225 | if os.path.exists(self.mgr.prometheus_alerts_path): | |
226 | with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: | |
227 | alerts = f.read() | |
228 | r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts | |
229 | ||
230 | return r, sorted(deps) | |
231 | ||
232 | def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: | |
233 | # TODO: if there are multiple daemons, who is the active one? | |
234 | return daemon_descrs[0] | |
235 | ||
f6b5b4d7 | 236 | def config_dashboard(self, daemon_descrs: List[DaemonDescription]): |
e306af50 TL |
237 | dd = self.get_active_daemon(daemon_descrs) |
238 | service_url = 'http://{}:{}'.format( | |
239 | self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) | |
240 | self._set_service_url_on_dashboard( | |
241 | 'Prometheus', | |
242 | 'dashboard get-prometheus-api-host', | |
243 | 'dashboard set-prometheus-api-host', | |
244 | service_url | |
245 | ) | |
246 | ||
f6b5b4d7 | 247 | |
e306af50 | 248 | class NodeExporterService(CephadmService): |
f6b5b4d7 TL |
249 | TYPE = 'node-exporter' |
250 | ||
251 | def create(self, daemon_spec: CephadmDaemonSpec) -> str: | |
252 | assert self.TYPE == daemon_spec.daemon_type | |
253 | return self.mgr._create_daemon(daemon_spec) | |
e306af50 | 254 | |
f6b5b4d7 TL |
255 | def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]: |
256 | assert self.TYPE == daemon_spec.daemon_type | |
e306af50 | 257 | return {}, [] |