]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/service_discovery.py
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / pybind / mgr / cephadm / service_discovery.py
1 try:
2 import cherrypy
3 from cherrypy._cpserver import Server
4 except ImportError:
5 # to avoid sphinx build crash
6 class Server: # type: ignore
7 pass
8
9 import logging
10 import socket
11
12 import orchestrator # noqa
13 from mgr_module import ServiceInfoT
14 from mgr_util import build_url
15 from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional
16 from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
17 import secrets
18
19 from cephadm.services.ingress import IngressSpec
20 from cephadm.ssl_cert_utils import SSLCerts
21 from cephadm.services.cephadmservice import CephExporterService
22
23 if TYPE_CHECKING:
24 from cephadm.module import CephadmOrchestrator
25
26
27 def cherrypy_filter(record: logging.LogRecord) -> int:
28 blocked = [
29 'TLSV1_ALERT_DECRYPT_ERROR'
30 ]
31 msg = record.getMessage()
32 return not any([m for m in blocked if m in msg])
33
34
35 logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
36 cherrypy.log.access_log.propagate = False
37
38
39 class Route(NamedTuple):
40 name: str
41 route: str
42 controller: Callable
43
44
45 class ServiceDiscovery:
46
47 KV_STORE_SD_ROOT_CERT = 'service_discovery/root/cert'
48 KV_STORE_SD_ROOT_KEY = 'service_discovery/root/key'
49
50 def __init__(self, mgr: "CephadmOrchestrator") -> None:
51 self.mgr = mgr
52 self.ssl_certs = SSLCerts()
53 self.username: Optional[str] = None
54 self.password: Optional[str] = None
55
56 def validate_password(self, realm: str, username: str, password: str) -> bool:
57 return (password == self.password and username == self.username)
58
59 def configure_routes(self, server: Server, enable_auth: bool) -> None:
60 ROUTES = [
61 Route('index', '/', server.index),
62 Route('sd-config', '/prometheus/sd-config', server.get_sd_config),
63 Route('rules', '/prometheus/rules', server.get_prometheus_rules),
64 ]
65 d = cherrypy.dispatch.RoutesDispatcher()
66 for route in ROUTES:
67 d.connect(**route._asdict())
68 if enable_auth:
69 conf = {
70 '/': {
71 'request.dispatch': d,
72 'tools.auth_basic.on': True,
73 'tools.auth_basic.realm': 'localhost',
74 'tools.auth_basic.checkpassword': self.validate_password
75 }
76 }
77 else:
78 conf = {'/': {'request.dispatch': d}}
79 cherrypy.tree.mount(None, '/sd', config=conf)
80
81 def enable_auth(self) -> None:
82 self.username = self.mgr.get_store('service_discovery/root/username')
83 self.password = self.mgr.get_store('service_discovery/root/password')
84 if not self.password or not self.username:
85 self.username = 'admin' # TODO(redo): what should be the default username
86 self.password = secrets.token_urlsafe(20)
87 self.mgr.set_store('service_discovery/root/password', self.password)
88 self.mgr.set_store('service_discovery/root/username', self.username)
89
90 def configure_tls(self, server: Server) -> None:
91 old_cert = self.mgr.get_store(self.KV_STORE_SD_ROOT_CERT)
92 old_key = self.mgr.get_store(self.KV_STORE_SD_ROOT_KEY)
93 if old_key and old_cert:
94 self.ssl_certs.load_root_credentials(old_cert, old_key)
95 else:
96 self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip())
97 self.mgr.set_store(self.KV_STORE_SD_ROOT_CERT, self.ssl_certs.get_root_cert())
98 self.mgr.set_store(self.KV_STORE_SD_ROOT_KEY, self.ssl_certs.get_root_key())
99 addr = self.mgr.get_mgr_ip()
100 host_fqdn = socket.getfqdn(addr)
101 server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(
102 host_fqdn, addr)
103
104 def configure(self, port: int, addr: str, enable_security: bool) -> None:
105 # we create a new server to enforce TLS/SSL config refresh
106 self.root_server = Root(self.mgr, port, addr)
107 self.root_server.ssl_certificate = None
108 self.root_server.ssl_private_key = None
109 if enable_security:
110 self.enable_auth()
111 self.configure_tls(self.root_server)
112 self.configure_routes(self.root_server, enable_security)
113
114
115 class Root(Server):
116
117 # collapse everything to '/'
118 def _cp_dispatch(self, vpath: str) -> 'Root':
119 cherrypy.request.path = ''
120 return self
121
122 def stop(self) -> None:
123 # we must call unsubscribe before stopping the server,
124 # otherwise the port is not released and we will get
125 # an exception when trying to restart it
126 self.unsubscribe()
127 super().stop()
128
129 def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''):
130 self.mgr = mgr
131 super().__init__()
132 self.socket_port = port
133 self.socket_host = host
134 self.subscribe()
135
136 @cherrypy.expose
137 def index(self) -> str:
138 return '''<!DOCTYPE html>
139 <html>
140 <head><title>Cephadm HTTP Endpoint</title></head>
141 <body>
142 <h2>Cephadm Service Discovery Endpoints</h2>
143 <p><a href='prometheus/sd-config?service=mgr-prometheus'>mgr/Prometheus http sd-config</a></p>
144 <p><a href='prometheus/sd-config?service=alertmanager'>Alertmanager http sd-config</a></p>
145 <p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
146 <p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
147 <p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
148 <p><a href='prometheus/rules'>Prometheus rules</a></p>
149 </body>
150 </html>'''
151
152 @cherrypy.expose
153 @cherrypy.tools.json_out()
154 def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
155 """Return <http_sd_config> compatible prometheus config for the specified service."""
156 if service == 'mgr-prometheus':
157 return self.prometheus_sd_config()
158 elif service == 'alertmanager':
159 return self.alertmgr_sd_config()
160 elif service == 'node-exporter':
161 return self.node_exporter_sd_config()
162 elif service == 'haproxy':
163 return self.haproxy_sd_config()
164 elif service == 'ceph-exporter':
165 return self.ceph_exporter_sd_config()
166 else:
167 return []
168
169 def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]:
170 """Return <http_sd_config> compatible prometheus config for prometheus service."""
171 servers = self.mgr.list_servers()
172 targets = []
173 for server in servers:
174 hostname = server.get('hostname', '')
175 for service in cast(List[ServiceInfoT], server.get('services', [])):
176 if service['type'] != 'mgr' or service['id'] != self.mgr.get_mgr_id():
177 continue
178 port = self.mgr.get_module_option_ex(
179 'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
180 targets.append(f'{hostname}:{port}')
181 return [{"targets": targets, "labels": {}}]
182
183 def alertmgr_sd_config(self) -> List[Dict[str, Collection[str]]]:
184 """Return <http_sd_config> compatible prometheus config for mgr alertmanager service."""
185 srv_entries = []
186 for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
187 assert dd.hostname is not None
188 addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
189 port = dd.ports[0] if dd.ports else AlertmanagerService.DEFAULT_SERVICE_PORT
190 srv_entries.append('{}'.format(build_url(host=addr, port=port).lstrip('/')))
191 return [{"targets": srv_entries, "labels": {}}]
192
193 def node_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
194 """Return <http_sd_config> compatible prometheus config for node-exporter service."""
195 srv_entries = []
196 for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
197 assert dd.hostname is not None
198 addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
199 port = dd.ports[0] if dd.ports else NodeExporterService.DEFAULT_SERVICE_PORT
200 srv_entries.append({
201 'targets': [build_url(host=addr, port=port).lstrip('/')],
202 'labels': {'instance': dd.hostname}
203 })
204 return srv_entries
205
206 def haproxy_sd_config(self) -> List[Dict[str, Collection[str]]]:
207 """Return <http_sd_config> compatible prometheus config for haproxy service."""
208 srv_entries = []
209 for dd in self.mgr.cache.get_daemons_by_type('ingress'):
210 if dd.service_name() in self.mgr.spec_store:
211 spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec)
212 assert dd.hostname is not None
213 if dd.daemon_type == 'haproxy':
214 addr = self.mgr.inventory.get_addr(dd.hostname)
215 srv_entries.append({
216 'targets': [f"{build_url(host=addr, port=spec.monitor_port).lstrip('/')}"],
217 'labels': {'instance': dd.service_name()}
218 })
219 return srv_entries
220
221 def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
222 """Return <http_sd_config> compatible prometheus config for ceph-exporter service."""
223 srv_entries = []
224 for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'):
225 assert dd.hostname is not None
226 addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
227 port = dd.ports[0] if dd.ports else CephExporterService.DEFAULT_SERVICE_PORT
228 srv_entries.append({
229 'targets': [build_url(host=addr, port=port).lstrip('/')],
230 'labels': {'instance': dd.hostname}
231 })
232 return srv_entries
233
234 @cherrypy.expose(alias='prometheus/rules')
235 def get_prometheus_rules(self) -> str:
236 """Return currently configured prometheus rules as Yaml."""
237 cherrypy.response.headers['Content-Type'] = 'text/plain'
238 with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
239 return f.read()