]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/services/monitoring.py
Import ceph 15.2.8
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / monitoring.py
1 import logging
2 import os
3 from typing import List, Any, Tuple, Dict
4
5 from orchestrator import DaemonDescription
6 from ceph.deployment.service_spec import AlertManagerSpec
7 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonSpec
8 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert
9
10 logger = logging.getLogger(__name__)
11
12
13 class GrafanaService(CephadmService):
14 TYPE = 'grafana'
15 DEFAULT_SERVICE_PORT = 3000
16
17 def prepare_create(self, daemon_spec: CephadmDaemonSpec) -> CephadmDaemonSpec:
18 assert self.TYPE == daemon_spec.daemon_type
19 return daemon_spec
20
21 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
22 assert self.TYPE == daemon_spec.daemon_type
23 deps = [] # type: List[str]
24
25 prom_services = [] # type: List[str]
26 for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
27 prom_services.append(dd.hostname)
28 deps.append(dd.name())
29 grafana_data_sources = self.mgr.template.render(
30 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
31
32 cert = self.mgr.get_store('grafana_crt')
33 pkey = self.mgr.get_store('grafana_key')
34 if cert and pkey:
35 try:
36 verify_tls(cert, pkey)
37 except ServerConfigException as e:
38 logger.warning('Provided grafana TLS certificates invalid: %s', str(e))
39 cert, pkey = None, None
40 if not (cert and pkey):
41 cert, pkey = create_self_signed_cert('Ceph', 'cephadm')
42 self.mgr.set_store('grafana_crt', cert)
43 self.mgr.set_store('grafana_key', pkey)
44 self.mgr.check_mon_command({
45 'prefix': 'dashboard set-grafana-api-ssl-verify',
46 'value': 'false',
47 })
48
49 grafana_ini = self.mgr.template.render(
50 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
51
52 config_file = {
53 'files': {
54 "grafana.ini": grafana_ini,
55 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
56 'certs/cert_file': '# generated by cephadm\n%s' % cert,
57 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
58 }
59 }
60 return config_file, sorted(deps)
61
62 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
63 # Use the least-created one as the active daemon
64 if daemon_descrs:
65 return daemon_descrs[-1]
66 # if empty list provided, return empty Daemon Desc
67 return DaemonDescription()
68
69 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
70 # TODO: signed cert
71 dd = self.get_active_daemon(daemon_descrs)
72 service_url = 'https://{}:{}'.format(
73 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
74 self._set_service_url_on_dashboard(
75 'Grafana',
76 'dashboard get-grafana-api-url',
77 'dashboard set-grafana-api-url',
78 service_url
79 )
80
81
82 class AlertmanagerService(CephadmService):
83 TYPE = 'alertmanager'
84 DEFAULT_SERVICE_PORT = 9093
85
86 def prepare_create(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> CephadmDaemonSpec:
87 assert self.TYPE == daemon_spec.daemon_type
88 assert daemon_spec.spec
89 return daemon_spec
90
91 def generate_config(self, daemon_spec: CephadmDaemonSpec[AlertManagerSpec]) -> Tuple[Dict[str, Any], List[str]]:
92 assert self.TYPE == daemon_spec.daemon_type
93 deps: List[str] = []
94 default_webhook_urls: List[str] = []
95
96 if daemon_spec.spec:
97 user_data = daemon_spec.spec.user_data
98 if 'default_webhook_urls' in user_data and isinstance(
99 user_data['default_webhook_urls'], list):
100 default_webhook_urls.extend(user_data['default_webhook_urls'])
101
102 # dashboard(s)
103 dashboard_urls: List[str] = []
104 mgr_map = self.mgr.get('mgr_map')
105 port = None
106 proto = None # http: or https:
107 url = mgr_map.get('services', {}).get('dashboard', None)
108 if url:
109 dashboard_urls.append(url)
110 proto = url.split('/')[0]
111 port = url.split('/')[2].split(':')[1]
112 # scan all mgrs to generate deps and to get standbys too.
113 # assume that they are all on the same port as the active mgr.
114 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
115 # we consider mgr a dep even if the dashboard is disabled
116 # in order to be consistent with _calc_daemon_deps().
117 deps.append(dd.name())
118 if not port:
119 continue
120 if dd.daemon_id == self.mgr.get_mgr_id():
121 continue
122 addr = self.mgr.inventory.get_addr(dd.hostname)
123 dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0],
124 port))
125
126 context = {
127 'dashboard_urls': dashboard_urls,
128 'default_webhook_urls': default_webhook_urls
129 }
130 yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
131
132 peers = []
133 port = '9094'
134 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
135 deps.append(dd.name())
136 addr = self.mgr.inventory.get_addr(dd.hostname)
137 peers.append(addr.split(':')[0] + ':' + port)
138 return {
139 "files": {
140 "alertmanager.yml": yml
141 },
142 "peers": peers
143 }, sorted(deps)
144
145 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
146 # TODO: if there are multiple daemons, who is the active one?
147 if daemon_descrs:
148 return daemon_descrs[0]
149 # if empty list provided, return empty Daemon Desc
150 return DaemonDescription()
151
152 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
153 dd = self.get_active_daemon(daemon_descrs)
154 service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname),
155 self.DEFAULT_SERVICE_PORT)
156 self._set_service_url_on_dashboard(
157 'AlertManager',
158 'dashboard get-alertmanager-api-host',
159 'dashboard set-alertmanager-api-host',
160 service_url
161 )
162
163
164 class PrometheusService(CephadmService):
165 TYPE = 'prometheus'
166 DEFAULT_SERVICE_PORT = 9095
167
168 def prepare_create(self, daemon_spec: CephadmDaemonSpec) -> CephadmDaemonSpec:
169 assert self.TYPE == daemon_spec.daemon_type
170 return daemon_spec
171
172 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
173 assert self.TYPE == daemon_spec.daemon_type
174 deps = [] # type: List[str]
175
176 # scrape mgrs
177 mgr_scrape_list = []
178 mgr_map = self.mgr.get('mgr_map')
179 port = None
180 t = mgr_map.get('services', {}).get('prometheus', None)
181 if t:
182 t = t.split('/')[2]
183 mgr_scrape_list.append(t)
184 port = '9283'
185 if ':' in t:
186 port = t.split(':')[1]
187 # scan all mgrs to generate deps and to get standbys too.
188 # assume that they are all on the same port as the active mgr.
189 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
190 # we consider the mgr a dep even if the prometheus module is
191 # disabled in order to be consistent with _calc_daemon_deps().
192 deps.append(dd.name())
193 if not port:
194 continue
195 if dd.daemon_id == self.mgr.get_mgr_id():
196 continue
197 addr = self.mgr.inventory.get_addr(dd.hostname)
198 mgr_scrape_list.append(addr.split(':')[0] + ':' + port)
199
200 # scrape node exporters
201 nodes = []
202 for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
203 deps.append(dd.name())
204 addr = self.mgr.inventory.get_addr(dd.hostname)
205 nodes.append({
206 'hostname': dd.hostname,
207 'url': addr.split(':')[0] + ':9100'
208 })
209
210 # scrape alert managers
211 alertmgr_targets = []
212 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
213 deps.append(dd.name())
214 addr = self.mgr.inventory.get_addr(dd.hostname)
215 alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
216
217 # generate the prometheus configuration
218 context = {
219 'alertmgr_targets': alertmgr_targets,
220 'mgr_scrape_list': mgr_scrape_list,
221 'nodes': nodes,
222 }
223 r = {
224 'files': {
225 'prometheus.yml':
226 self.mgr.template.render(
227 'services/prometheus/prometheus.yml.j2', context)
228 }
229 }
230
231 # include alerts, if present in the container
232 if os.path.exists(self.mgr.prometheus_alerts_path):
233 with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
234 alerts = f.read()
235 r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
236
237 return r, sorted(deps)
238
239 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
240 # TODO: if there are multiple daemons, who is the active one?
241 if daemon_descrs:
242 return daemon_descrs[0]
243 # if empty list provided, return empty Daemon Desc
244 return DaemonDescription()
245
246 def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
247 dd = self.get_active_daemon(daemon_descrs)
248 service_url = 'http://{}:{}'.format(
249 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
250 self._set_service_url_on_dashboard(
251 'Prometheus',
252 'dashboard get-prometheus-api-host',
253 'dashboard set-prometheus-api-host',
254 service_url
255 )
256
257
258 class NodeExporterService(CephadmService):
259 TYPE = 'node-exporter'
260
261 def prepare_create(self, daemon_spec: CephadmDaemonSpec) -> CephadmDaemonSpec:
262 assert self.TYPE == daemon_spec.daemon_type
263 return daemon_spec
264
265 def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]:
266 assert self.TYPE == daemon_spec.daemon_type
267 return {}, []