]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/pybind/mgr/cephadm/services/monitoring.py
import ceph quincy 17.2.1
[ceph.git] / ceph / src / pybind / mgr / cephadm / services / monitoring.py
index d67d3057bb80765d3ac74fe9c58b57781b2c4e41..0bd388f461fc2b174adbc9aeb766a37ec41718d4 100644 (file)
@@ -31,13 +31,22 @@ class GrafanaService(CephadmService):
         prom_services = []  # type: List[str]
         for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
             assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
             port = dd.ports[0] if dd.ports else 9095
             prom_services.append(build_url(scheme='http', host=addr, port=port))
 
             deps.append(dd.name())
+
+        daemons = self.mgr.cache.get_daemons_by_service('mgr')
+        loki_host = ''
+        assert daemons is not None
+        if daemons != []:
+            assert daemons[0].hostname is not None
+            addr = daemons[0].ip if daemons[0].ip else self._inventory_get_fqdn(daemons[0].hostname)
+            loki_host = build_url(scheme='http', host=addr, port=3100)
+
         grafana_data_sources = self.mgr.template.render(
-            'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
+            'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_host})
 
         cert = self.mgr.get_store('grafana_crt')
         pkey = self.mgr.get_store('grafana_key')
@@ -87,7 +96,7 @@ class GrafanaService(CephadmService):
         # TODO: signed cert
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         service_url = build_url(scheme='https', host=addr, port=port)
         self._set_service_url_on_dashboard(
@@ -122,6 +131,10 @@ class AlertmanagerService(CephadmService):
         default_webhook_urls: List[str] = []
 
         spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+        try:
+            secure = spec.secure
+        except AttributeError:
+            secure = False
         user_data = spec.user_data
         if 'default_webhook_urls' in user_data and isinstance(
                 user_data['default_webhook_urls'], list):
@@ -135,7 +148,7 @@ class AlertmanagerService(CephadmService):
         proto = None  # http: or https:
         url = mgr_map.get('services', {}).get('dashboard', None)
         if url:
-            dashboard_urls.append(url)
+            dashboard_urls.append(url.rstrip('/'))
             p_result = urlparse(url)
             proto = p_result.scheme
             port = p_result.port
@@ -150,13 +163,13 @@ class AlertmanagerService(CephadmService):
             if dd.daemon_id == self.mgr.get_mgr_id():
                 continue
             assert dd.hostname is not None
-            addr = self.mgr.inventory.get_addr(dd.hostname)
-            dashboard_urls.append(build_url(scheme=proto, host=addr, port=port))
+            addr = self._inventory_get_fqdn(dd.hostname)
+            dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
 
         for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
             assert dd.hostname is not None
             assert dd.ports
-            addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
             deps.append(dd.name())
 
             snmp_gateway_urls.append(build_url(scheme='http', host=addr,
@@ -166,6 +179,7 @@ class AlertmanagerService(CephadmService):
             'dashboard_urls': dashboard_urls,
             'default_webhook_urls': default_webhook_urls,
             'snmp_gateway_urls': snmp_gateway_urls,
+            'secure': secure,
         }
         yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
 
@@ -174,7 +188,7 @@ class AlertmanagerService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = self.mgr.inventory.get_addr(dd.hostname)
+            addr = self._inventory_get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
         return {
@@ -194,7 +208,7 @@ class AlertmanagerService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         service_url = build_url(scheme='http', host=addr, port=port)
         self._set_service_url_on_dashboard(
@@ -217,6 +231,7 @@ class AlertmanagerService(CephadmService):
 class PrometheusService(CephadmService):
     TYPE = 'prometheus'
     DEFAULT_SERVICE_PORT = 9095
+    DEFAULT_MGR_PROMETHEUS_PORT = 9283
 
     def config(self, spec: ServiceSpec) -> None:
         # make sure module is enabled
@@ -247,13 +262,19 @@ class PrometheusService(CephadmService):
         # scrape mgrs
         mgr_scrape_list = []
         mgr_map = self.mgr.get('mgr_map')
-        port = None
+        port = cast(int, self.mgr.get_module_option_ex(
+            'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
+        deps.append(str(port))
         t = mgr_map.get('services', {}).get('prometheus', None)
         if t:
             p_result = urlparse(t)
-            t = t.split('/')[2]
-            mgr_scrape_list.append(t)
-            port = p_result.port or 9283
+            # urlparse .hostname removes '[]' from the hostname in case
+            # of ipv6 addresses so if this is the case then we just
+            # append the brackets when building the final scrape endpoint
+            if '[' in p_result.netloc and ']' in p_result.netloc:
+                mgr_scrape_list.append(f"[{p_result.hostname}]:{port}")
+            else:
+                mgr_scrape_list.append(f"{p_result.hostname}:{port}")
         # scan all mgrs to generate deps and to get standbys too.
         # assume that they are all on the same port as the active mgr.
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
@@ -265,7 +286,7 @@ class PrometheusService(CephadmService):
             if dd.daemon_id == self.mgr.get_mgr_id():
                 continue
             assert dd.hostname is not None
-            addr = self.mgr.inventory.get_addr(dd.hostname)
+            addr = self._inventory_get_fqdn(dd.hostname)
             mgr_scrape_list.append(build_url(host=addr, port=port).lstrip('/'))
 
         # scrape node exporters
@@ -273,7 +294,7 @@ class PrometheusService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
             port = dd.ports[0] if dd.ports else 9100
             nodes.append({
                 'hostname': dd.hostname,
@@ -285,7 +306,7 @@ class PrometheusService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
             port = dd.ports[0] if dd.ports else 9093
             alertmgr_targets.append("'{}'".format(build_url(host=addr, port=port).lstrip('/')))
 
@@ -297,7 +318,7 @@ class PrometheusService(CephadmService):
                 assert dd.hostname is not None
                 deps.append(dd.name())
                 if dd.daemon_type == 'haproxy':
-                    addr = self.mgr.inventory.get_addr(dd.hostname)
+                    addr = self._inventory_get_fqdn(dd.hostname)
                     haproxy_targets.append({
                         "url": f"'{build_url(host=addr, port=spec.monitor_port).lstrip('/')}'",
                         "service": dd.service_name(),
@@ -336,7 +357,7 @@ class PrometheusService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         service_url = build_url(scheme='http', host=addr, port=port)
         self._set_service_url_on_dashboard(
@@ -378,6 +399,57 @@ class NodeExporterService(CephadmService):
         return HandleCommandResult(0, out, '')
 
 
+class LokiService(CephadmService):
+    TYPE = 'loki'
+    DEFAULT_SERVICE_PORT = 3100
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        deps: List[str] = []
+
+        yml = self.mgr.template.render('services/loki.yml.j2')
+        return {
+            "files": {
+                "loki.yml": yml
+            }
+        }, sorted(deps)
+
+
+class PromtailService(CephadmService):
+    TYPE = 'promtail'
+    DEFAULT_SERVICE_PORT = 9080
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        deps: List[str] = []
+        hostnames: List[str] = []
+        for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+            assert dd.hostname is not None
+            addr = self.mgr.inventory.get_addr(dd.hostname)
+            hostnames.append(addr)
+        context = {
+            'hostnames': hostnames,
+            'client_hostname': hostnames[0],
+        }
+
+        yml = self.mgr.template.render('services/promtail.yml.j2', context)
+        return {
+            "files": {
+                "promtail.yml": yml
+            }
+        }, sorted(deps)
+
+
 class SNMPGatewayService(CephadmService):
     TYPE = 'snmp-gateway'