]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/zabbix/module.py
2 Zabbix module for ceph-mgr
4 Collect statistics from Ceph cluster and every X seconds send data to a Zabbix
5 server using the zabbix_sender executable.
9 from subprocess
import Popen
, PIPE
10 from threading
import Event
11 from mgr_module
import MgrModule
16 return sum(data
) / float(len(data
))
21 class ZabbixSender(object):
22 def __init__(self
, sender
, host
, port
, log
):
28 def send(self
, hostname
, data
):
32 cmd
= [self
.sender
, '-z', self
.host
, '-p', str(self
.port
), '-s',
33 hostname
, '-vv', '-i', '-']
35 proc
= Popen(cmd
, stdin
=PIPE
, stdout
=PIPE
, stderr
=PIPE
)
37 for key
, value
in data
.items():
38 proc
.stdin
.write('{0} ceph.{1} {2}\n'.format(hostname
, key
, value
))
40 stdout
, stderr
= proc
.communicate()
41 if proc
.returncode
!= 0:
42 raise RuntimeError('%s exited non-zero: %s' % (self
.sender
,
45 self
.log
.debug('Zabbix Sender: %s', stdout
.rstrip())
48 class Module(MgrModule
):
51 ceph_health_mapping
= {'HEALTH_OK': 0, 'HEALTH_WARN': 1, 'HEALTH_ERR': 2}
54 'zabbix_sender': '/usr/bin/zabbix_sender',
57 'identifier': None, 'interval': 60
62 "cmd": "zabbix config-set name=key,type=CephString "
63 "name=value,type=CephString",
64 "desc": "Set a configuration value",
68 "cmd": "zabbix config-show",
69 "desc": "Show current configuration",
74 "desc": "Force sending data to Zabbux",
78 "cmd": "zabbix self-test",
79 "desc": "Run a self-test on the Zabbix module",
84 def __init__(self
, *args
, **kwargs
):
85 super(Module
, self
).__init
__(*args
, **kwargs
)
88 def init_module_config(self
):
89 for key
, default
in self
.config_keys
.items():
90 value
= self
.get_localized_config(key
, default
)
92 raise RuntimeError('Configuration key {0} not set; "ceph '
93 'config-key set mgr/zabbix/{0} '
94 '<value>"'.format(key
))
96 self
.set_config_option(key
, value
)
98 def set_config_option(self
, option
, value
):
99 if option
not in self
.config_keys
.keys():
100 raise RuntimeError('{0} is a unknown configuration '
101 'option'.format(option
))
103 if option
in ['zabbix_port', 'interval']:
106 except (ValueError, TypeError):
107 raise RuntimeError('invalid {0} configured. Please specify '
108 'a valid integer'.format(option
))
110 if option
== 'interval' and value
< 10:
111 raise RuntimeError('interval should be set to at least 10 seconds')
113 self
.config
[option
] = value
118 health
= json
.loads(self
.get('health')['json'])
119 # 'status' is luminous+, 'overall_status' is legacy mode.
120 data
['overall_status'] = health
.get('status',
121 health
.get('overall_status'))
122 data
['overall_status_int'] = \
123 self
.ceph_health_mapping
.get(data
['overall_status'])
125 mon_status
= json
.loads(self
.get('mon_status')['json'])
126 data
['num_mon'] = len(mon_status
['monmap']['mons'])
129 data
['num_pools'] = len(df
['pools'])
130 data
['total_objects'] = df
['stats']['total_objects']
131 data
['total_used_bytes'] = df
['stats']['total_used_bytes']
132 data
['total_bytes'] = df
['stats']['total_bytes']
133 data
['total_avail_bytes'] = df
['stats']['total_avail_bytes']
140 for pool
in df
['pools']:
141 wr_ops
+= pool
['stats']['wr']
142 rd_ops
+= pool
['stats']['rd']
143 wr_bytes
+= pool
['stats']['wr_bytes']
144 rd_bytes
+= pool
['stats']['rd_bytes']
146 data
['wr_ops'] = wr_ops
147 data
['rd_ops'] = rd_ops
148 data
['wr_bytes'] = wr_bytes
149 data
['rd_bytes'] = rd_bytes
151 osd_map
= self
.get('osd_map')
152 data
['num_osd'] = len(osd_map
['osds'])
153 data
['osd_nearfull_ratio'] = osd_map
['nearfull_ratio']
154 data
['osd_full_ratio'] = osd_map
['full_ratio']
155 data
['osd_backfillfull_ratio'] = osd_map
['backfillfull_ratio']
157 data
['num_pg_temp'] = len(osd_map
['pg_temp'])
161 for osd
in osd_map
['osds']:
168 data
['num_osd_up'] = num_up
169 data
['num_osd_in'] = num_in
172 osd_apply_latency
= list()
173 osd_commit_latency
= list()
175 osd_stats
= self
.get('osd_stats')
176 for osd
in osd_stats
['osd_stats']:
179 osd_fill
.append((float(osd
['kb_used']) / float(osd
['kb'])) * 100)
180 osd_apply_latency
.append(osd
['perf_stat']['apply_latency_ms'])
181 osd_commit_latency
.append(osd
['perf_stat']['commit_latency_ms'])
184 data
['osd_max_fill'] = max(osd_fill
)
185 data
['osd_min_fill'] = min(osd_fill
)
186 data
['osd_avg_fill'] = avg(osd_fill
)
191 data
['osd_latency_apply_max'] = max(osd_apply_latency
)
192 data
['osd_latency_apply_min'] = min(osd_apply_latency
)
193 data
['osd_latency_apply_avg'] = avg(osd_apply_latency
)
195 data
['osd_latency_commit_max'] = max(osd_commit_latency
)
196 data
['osd_latency_commit_min'] = min(osd_commit_latency
)
197 data
['osd_latency_commit_avg'] = avg(osd_commit_latency
)
201 pg_summary
= self
.get('pg_summary')
203 for state
, num
in pg_summary
['all'].items():
206 data
['num_pg'] = num_pg
211 data
= self
.get_data()
213 self
.log
.debug('Sending data to Zabbix server %s',
214 self
.config
['zabbix_host'])
218 zabbix
= ZabbixSender(self
.config
['zabbix_sender'],
219 self
.config
['zabbix_host'],
220 self
.config
['zabbix_port'], self
.log
)
221 zabbix
.send(self
.config
['identifier'], data
)
222 except Exception as exc
:
223 self
.log
.error('Exception when sending: %s', exc
)
225 def handle_command(self
, command
):
226 if command
['prefix'] == 'zabbix config-show':
227 return 0, json
.dumps(self
.config
), ''
228 elif command
['prefix'] == 'zabbix config-set':
230 value
= command
['value']
232 return -errno
.EINVAL
, '', 'Value should not be empty or None'
234 self
.log
.debug('Setting configuration option %s to %s', key
, value
)
235 self
.set_config_option(key
, value
)
236 self
.set_localized_config(key
, value
)
237 return 0, 'Configuration option {0} updated'.format(key
), ''
238 elif command
['prefix'] == 'zabbix send':
240 return 0, 'Sending data to Zabbix', ''
241 elif command
['prefix'] == 'zabbix self-test':
243 return 0, 'Self-test succeeded', ''
245 return (-errno
.EINVAL
, '',
246 "Command not found '{0}'".format(command
['prefix']))
249 self
.log
.info('Stopping zabbix')
254 self
.log
.debug('Zabbix module starting up')
257 self
.init_module_config()
259 for key
, value
in self
.config
.items():
260 self
.log
.debug('%s: %s', key
, value
)
263 self
.log
.debug('Waking up for new iteration')
267 except Exception as exc
:
268 # Shouldn't happen, but let's log it and retry next interval,
269 # rather than dying completely.
270 self
.log
.exception("Unexpected error during send():")
272 interval
= self
.config
['interval']
273 self
.log
.debug('Sleeping for %d seconds', interval
)
274 self
.event
.wait(interval
)
277 data
= self
.get_data()
279 if data
['overall_status'] not in self
.ceph_health_mapping
:
280 raise RuntimeError('No valid overall_status found in data')
282 int(data
['overall_status_int'])
284 if data
['num_mon'] < 1:
285 raise RuntimeError('num_mon is smaller than 1')