X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Fpybind%2Fmgr%2Fprometheus%2Fmodule.py;h=c215181540965bb4d6f0cc141a625552e037bced;hb=494da23a05e25ed98f5539f3b89e6af3cafe3fec;hp=ca2e8d907f0972e4d91897cf3a11f1054153e01f;hpb=931c18d5142274109ebfaecd012da8e5a3ebc67e;p=ceph.git diff --git a/ceph/src/pybind/mgr/prometheus/module.py b/ceph/src/pybind/mgr/prometheus/module.py index ca2e8d907..c21518154 100644 --- a/ceph/src/pybind/mgr/prometheus/module.py +++ b/ceph/src/pybind/mgr/prometheus/module.py @@ -9,13 +9,13 @@ import socket import threading import time from mgr_module import MgrModule, MgrStandbyModule, CommandResult, PG_STATES +from mgr_util import get_default_addr from rbd import RBD # Defaults for the Prometheus HTTP server. Can also set in config-key # see https://github.com/prometheus/prometheus/wiki/Default-port-allocations # for Prometheus exporter port registry -DEFAULT_ADDR = '::' DEFAULT_PORT = 9283 # When the CherryPy server in 3.2.2 (and later) starts it attempts to verify @@ -79,6 +79,14 @@ MDS_METADATA = ('ceph_daemon', 'fs_id', 'hostname', 'public_addr', 'rank', MON_METADATA = ('ceph_daemon', 'hostname', 'public_addr', 'rank', 'ceph_version') +MGR_METADATA = ('ceph_daemon', 'hostname', 'ceph_version') + +MGR_STATUS = ('ceph_daemon',) + +MGR_MODULE_STATUS = ('name',) + +MGR_MODULE_CAN_RUN = ('name',) + OSD_METADATA = ('back_iface', 'ceph_daemon', 'cluster_addr', 'device_class', 'front_iface', 'hostname', 'objectstore', 'public_addr', 'ceph_version') @@ -244,6 +252,30 @@ class Module(MgrModule): 'MON Metadata', MON_METADATA ) + metrics['mgr_metadata'] = Metric( + 'gauge', + 'mgr_metadata', + 'MGR metadata', + MGR_METADATA + ) + metrics['mgr_status'] = Metric( + 'gauge', + 'mgr_status', + 'MGR status (0=standby, 1=active)', + MGR_STATUS + ) + metrics['mgr_module_status'] = Metric( + 'gauge', + 'mgr_module_status', + 'MGR module status (0=disabled, 1=enabled, 2=auto-enabled)', + MGR_MODULE_STATUS + ) + metrics['mgr_module_can_run'] = Metric( + 'gauge', + 'mgr_module_can_run', + 'MGR module runnable state i.e. can it run (0=no, 1=yes)', + MGR_MODULE_CAN_RUN + ) metrics['osd_metadata'] = Metric( 'untyped', 'osd_metadata', @@ -428,6 +460,50 @@ class Module(MgrModule): 'mon.{}'.format(id_), )) + def get_mgr_status(self): + mgr_map = self.get('mgr_map') + servers = self.get_service_list() + + active = mgr_map['active_name'] + standbys = [s.get('name') for s in mgr_map['standbys']] + + all_mgrs = list(standbys) + all_mgrs.append(active) + + all_modules = {module.get('name'):module.get('can_run') for module in mgr_map['available_modules']} + + for mgr in all_mgrs: + host_version = servers.get((mgr, 'mgr'), ('', '')) + if mgr == active: + _state = 1 + ceph_release = host_version[1].split()[-2] # e.g. nautilus + else: + _state = 0 + + self.metrics['mgr_metadata'].set(1, ( + 'mgr.{}'.format(mgr), host_version[0], + host_version[1] + )) + self.metrics['mgr_status'].set(_state, ( + 'mgr.{}'.format(mgr), + )) + always_on_modules = mgr_map['always_on_modules'][ceph_release] + active_modules = list(always_on_modules) + active_modules.extend(mgr_map['modules']) + + for mod_name in all_modules.keys(): + + if mod_name in always_on_modules: + _state = 2 + elif mod_name in active_modules: + _state = 1 + else: + _state = 0 + + _can_run = 1 if all_modules[mod_name] else 0 + self.metrics['mgr_module_status'].set(_state, (mod_name,)) + self.metrics['mgr_module_can_run'].set(_can_run, (mod_name,)) + def get_pg_status(self): # TODO add per pool status? pg_status = self.get('pg_status') @@ -839,6 +915,7 @@ class Module(MgrModule): self.get_fs() self.get_osd_stats() self.get_quorum_status() + self.get_mgr_status() self.get_metadata_and_osd_status() self.get_pg_status() self.get_num_objects() @@ -999,7 +1076,7 @@ class Module(MgrModule): 'scrape_interval', 5.0) server_addr = self.get_localized_module_option( - 'server_addr', DEFAULT_ADDR) + 'server_addr', get_default_addr()) server_port = self.get_localized_module_option( 'server_port', DEFAULT_PORT) self.log.info( @@ -1041,7 +1118,8 @@ class StandbyModule(MgrStandbyModule): self.shutdown_event = threading.Event() def serve(self): - server_addr = self.get_localized_module_option('server_addr', '::') + server_addr = self.get_localized_module_option( + 'server_addr', get_default_addr()) server_port = self.get_localized_module_option( 'server_port', DEFAULT_PORT) self.log.info("server_addr: %s server_port: %s" %