]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/cephadm/services/monitoring.py
bump version to 15.2.4-pve1
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / monitoring.py
CommitLineData
e306af50
TL
1import logging
2import os
3from typing import List, Any, Tuple, Dict
4
5from orchestrator import DaemonDescription
6from cephadm.services.cephadmservice import CephadmService
7from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert
8
9logger = logging.getLogger(__name__)
10
11class GrafanaService(CephadmService):
12 DEFAULT_SERVICE_PORT = 3000
13
14 def create(self, daemon_id, host):
15 # type: (str, str) -> str
16 return self.mgr._create_daemon('grafana', daemon_id, host)
17
18 def generate_config(self):
19 # type: () -> Tuple[Dict[str, Any], List[str]]
20 deps = [] # type: List[str]
21
22 prom_services = [] # type: List[str]
23 for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
24 prom_services.append(dd.hostname)
25 deps.append(dd.name())
26 grafana_data_sources = self.mgr.template.render(
27 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
28
29 cert = self.mgr.get_store('grafana_crt')
30 pkey = self.mgr.get_store('grafana_key')
31 if cert and pkey:
32 try:
33 verify_tls(cert, pkey)
34 except ServerConfigException as e:
35 logger.warning('Provided grafana TLS certificates invalid: %s', str(e))
36 cert, pkey = None, None
37 if not (cert and pkey):
38 cert, pkey = create_self_signed_cert('Ceph', 'cephadm')
39 self.mgr.set_store('grafana_crt', cert)
40 self.mgr.set_store('grafana_key', pkey)
41 self.mgr.check_mon_command({
42 'prefix': 'dashboard set-grafana-api-ssl-verify',
43 'value': 'false',
44 })
45
46 grafana_ini = self.mgr.template.render(
47 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
48
49 config_file = {
50 'files': {
51 "grafana.ini": grafana_ini,
52 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
53 'certs/cert_file': '# generated by cephadm\n%s' % cert,
54 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
55 }
56 }
57 return config_file, sorted(deps)
58
59 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
60 # Use the least-created one as the active daemon
61 return daemon_descrs[-1]
62
63 def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
64 # TODO: signed cert
65 dd = self.get_active_daemon(daemon_descrs)
66 service_url = 'https://{}:{}'.format(
67 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
68 self._set_service_url_on_dashboard(
69 'Grafana',
70 'dashboard get-grafana-api-url',
71 'dashboard set-grafana-api-url',
72 service_url
73 )
74
75class AlertmanagerService(CephadmService):
76 DEFAULT_SERVICE_PORT = 9093
77
78 def create(self, daemon_id, host) -> str:
79 return self.mgr._create_daemon('alertmanager', daemon_id, host)
80
81 def generate_config(self):
82 # type: () -> Tuple[Dict[str, Any], List[str]]
83 deps = [] # type: List[str]
84
85 # dashboard(s)
86 dashboard_urls = []
87 mgr_map = self.mgr.get('mgr_map')
88 port = None
89 proto = None # http: or https:
90 url = mgr_map.get('services', {}).get('dashboard', None)
91 if url:
92 dashboard_urls.append(url)
93 proto = url.split('/')[0]
94 port = url.split('/')[2].split(':')[1]
95 # scan all mgrs to generate deps and to get standbys too.
96 # assume that they are all on the same port as the active mgr.
97 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
98 # we consider mgr a dep even if the dashboard is disabled
99 # in order to be consistent with _calc_daemon_deps().
100 deps.append(dd.name())
101 if not port:
102 continue
103 if dd.daemon_id == self.mgr.get_mgr_id():
104 continue
105 addr = self.mgr.inventory.get_addr(dd.hostname)
106 dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0],
107 port))
108
109 context = {
110 'dashboard_urls': dashboard_urls
111 }
112 yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
113
114 peers = []
115 port = '9094'
116 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
117 deps.append(dd.name())
118 addr = self.mgr.inventory.get_addr(dd.hostname)
119 peers.append(addr.split(':')[0] + ':' + port)
120 return {
121 "files": {
122 "alertmanager.yml": yml
123 },
124 "peers": peers
125 }, sorted(deps)
126
127 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
128 # TODO: if there are multiple daemons, who is the active one?
129 return daemon_descrs[0]
130
131 def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
132 dd = self.get_active_daemon(daemon_descrs)
133 service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
134 self._set_service_url_on_dashboard(
135 'AlertManager',
136 'dashboard get-alertmanager-api-host',
137 'dashboard set-alertmanager-api-host',
138 service_url
139 )
140
141
142class PrometheusService(CephadmService):
143 DEFAULT_SERVICE_PORT = 9095
144
145 def create(self, daemon_id, host) -> str:
146 return self.mgr._create_daemon('prometheus', daemon_id, host)
147
148 def generate_config(self):
149 # type: () -> Tuple[Dict[str, Any], List[str]]
150 deps = [] # type: List[str]
151
152 # scrape mgrs
153 mgr_scrape_list = []
154 mgr_map = self.mgr.get('mgr_map')
155 port = None
156 t = mgr_map.get('services', {}).get('prometheus', None)
157 if t:
158 t = t.split('/')[2]
159 mgr_scrape_list.append(t)
160 port = '9283'
161 if ':' in t:
162 port = t.split(':')[1]
163 # scan all mgrs to generate deps and to get standbys too.
164 # assume that they are all on the same port as the active mgr.
165 for dd in self.mgr.cache.get_daemons_by_service('mgr'):
166 # we consider the mgr a dep even if the prometheus module is
167 # disabled in order to be consistent with _calc_daemon_deps().
168 deps.append(dd.name())
169 if not port:
170 continue
171 if dd.daemon_id == self.mgr.get_mgr_id():
172 continue
173 addr = self.mgr.inventory.get_addr(dd.hostname)
174 mgr_scrape_list.append(addr.split(':')[0] + ':' + port)
175
176 # scrape node exporters
177 nodes = []
178 for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
179 deps.append(dd.name())
180 addr = self.mgr.inventory.get_addr(dd.hostname)
181 nodes.append({
182 'hostname': dd.hostname,
183 'url': addr.split(':')[0] + ':9100'
184 })
185
186 # scrape alert managers
187 alertmgr_targets = []
188 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
189 deps.append(dd.name())
190 addr = self.mgr.inventory.get_addr(dd.hostname)
191 alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
192
193 # generate the prometheus configuration
194 context = {
195 'alertmgr_targets': alertmgr_targets,
196 'mgr_scrape_list': mgr_scrape_list,
197 'nodes': nodes,
198 }
199 r = {
200 'files': {
201 'prometheus.yml':
202 self.mgr.template.render(
203 'services/prometheus/prometheus.yml.j2', context)
204 }
205 }
206
207 # include alerts, if present in the container
208 if os.path.exists(self.mgr.prometheus_alerts_path):
209 with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
210 alerts = f.read()
211 r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
212
213 return r, sorted(deps)
214
215 def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
216 # TODO: if there are multiple daemons, who is the active one?
217 return daemon_descrs[0]
218
219 def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
220 dd = self.get_active_daemon(daemon_descrs)
221 service_url = 'http://{}:{}'.format(
222 self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
223 self._set_service_url_on_dashboard(
224 'Prometheus',
225 'dashboard get-prometheus-api-host',
226 'dashboard set-prometheus-api-host',
227 service_url
228 )
229
230class NodeExporterService(CephadmService):
231 def create(self, daemon_id, host) -> str:
232 return self.mgr._create_daemon('node-exporter', daemon_id, host)
233
234 def generate_config(self) -> Tuple[Dict[str, Any], List[str]]:
235 return {}, []