]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/alerts/module.py
d6cec4b64baccd56727f2179094ea7c65adedd2b
[ceph.git] / ceph / src / pybind / mgr / alerts / module.py
1
2 """
3 A simple cluster health alerting module.
4 """
5
6 from mgr_module import MgrModule, HandleCommandResult
7 from threading import Event
8 import errno
9 import json
10 import smtplib
11
12 class Alerts(MgrModule):
13 COMMANDS = [
14 {
15 "cmd": "alerts send",
16 "desc": "(re)send alerts immediately",
17 "perm": "r"
18 },
19 ]
20
21 MODULE_OPTIONS = [
22 {
23 'name': 'interval',
24 'type': 'secs',
25 'default': 60,
26 'desc': 'How frequently to reexamine health status',
27 'runtime': True,
28 },
29 # smtp
30 {
31 'name': 'smtp_host',
32 'default': '',
33 'desc': 'SMTP server',
34 'runtime': True,
35 },
36 {
37 'name': 'smtp_destination',
38 'default': '',
39 'desc': 'Email address to send alerts to',
40 'runtime': True,
41 },
42 {
43 'name': 'smtp_port',
44 'type': 'int',
45 'default': 465,
46 'desc': 'SMTP port',
47 'runtime': True,
48 },
49 {
50 'name': 'smtp_ssl',
51 'type': 'bool',
52 'default': True,
53 'desc': 'Use SSL to connect to SMTP server',
54 'runtime': True,
55 },
56 {
57 'name': 'smtp_user',
58 'default': '',
59 'desc': 'User to authenticate as',
60 'runtime': True,
61 },
62 {
63 'name': 'smtp_password',
64 'default': '',
65 'desc': 'Password to authenticate with',
66 'runtime': True,
67 },
68 {
69 'name': 'smtp_sender',
70 'default': '',
71 'desc': 'SMTP envelope sender',
72 'runtime': True,
73 },
74 {
75 'name': 'smtp_from_name',
76 'default': 'Ceph',
77 'desc': 'Email From: name',
78 'runtime': True,
79 },
80 ]
81
82 # These are "native" Ceph options that this module cares about.
83 NATIVE_OPTIONS = [
84 ]
85
86 def __init__(self, *args, **kwargs):
87 super(Alerts, self).__init__(*args, **kwargs)
88
89 # set up some members to enable the serve() method and shutdown()
90 self.run = True
91 self.event = Event()
92
93 # ensure config options members are initialized; see config_notify()
94 self.config_notify()
95
96 self.log.info("Init")
97
98
99 def config_notify(self):
100 """
101 This method is called whenever one of our config options is changed.
102 """
103 # This is some boilerplate that stores MODULE_OPTIONS in a class
104 # member, so that, for instance, the 'emphatic' option is always
105 # available as 'self.emphatic'.
106 for opt in self.MODULE_OPTIONS:
107 setattr(self,
108 opt['name'],
109 self.get_module_option(opt['name']))
110 self.log.debug(' mgr option %s = %s',
111 opt['name'], getattr(self, opt['name']))
112 # Do the same for the native options.
113 for opt in self.NATIVE_OPTIONS:
114 setattr(self,
115 opt,
116 self.get_ceph_option(opt))
117 self.log.debug(' native option %s = %s', opt, getattr(self, opt))
118
119 def handle_command(self, inbuf, cmd):
120 ret = 0
121 out = ''
122 err = ''
123 if cmd['prefix'] == 'alerts send':
124 status = json.loads(self.get('health')['json'])
125 self._send_alert(status, {})
126 return HandleCommandResult(
127 retval=ret, # exit code
128 stdout=out, # stdout
129 stderr=err)
130
131 def _diff(self, last, new):
132 d = {}
133 for code, alert in new.get('checks', {}).items():
134 self.log.debug('new code %s alert %s' % (code, alert))
135 if code not in last.get('checks', {}):
136 if 'new' not in d:
137 d['new'] = {}
138 d['new'][code] = alert
139 elif alert['summary'].get('count', 0) > \
140 last['checks'][code]['summary'].get('count', 0):
141 if 'updated' not in d:
142 d['updated'] = {}
143 d['updated'][code] = alert
144 for code, alert in last.get('checks', {}).items():
145 self.log.debug('old code %s alert %s' % (code, alert))
146 if code not in new.get('checks', {}):
147 if 'cleared' not in d:
148 d['cleared'] = {}
149 d['cleared'][code] = alert
150 return d
151
152 def _send_alert(self, status, diff):
153 checks = {}
154 if self.smtp_host:
155 r = self._send_alert_smtp(status, diff)
156 if r:
157 for code, alert in r.items():
158 checks[code] = alert
159 else:
160 self.log.warn('Alert is not sent because smtp_host is not configured')
161 self.set_health_checks(checks)
162
163 def serve(self):
164 """
165 This method is called by the mgr when the module starts and can be
166 used for any background activity.
167 """
168 self.log.info("Starting")
169 last_status = {}
170 while self.run:
171 # Do some useful background work here.
172 new_status = json.loads(self.get('health')['json'])
173 if new_status != last_status:
174 self.log.debug('last_status %s' % last_status)
175 self.log.debug('new_status %s' % new_status)
176 diff = self._diff(last_status,
177 new_status)
178 self.log.debug('diff %s' % diff)
179 if diff:
180 self._send_alert(new_status, diff)
181 last_status = new_status
182
183 self.log.debug('Sleeping for %d seconds', self.interval)
184 ret = self.event.wait(self.interval)
185 self.event.clear()
186
187 def shutdown(self):
188 """
189 This method is called by the mgr when the module needs to shut
190 down (i.e., when the serve() function needs to exit).
191 """
192 self.log.info('Stopping')
193 self.run = False
194 self.event.set()
195
196 # SMTP
197 def _smtp_format_alert(self, code, alert):
198 r = '[{sev}] {code}: {summary}\n'.format(
199 code=code,
200 sev=alert['severity'].split('_')[1],
201 summary=alert['summary']['message'])
202 for detail in alert['detail']:
203 r += ' {message}\n'.format(
204 message=detail['message'])
205 return r
206
207 def _send_alert_smtp(self, status, diff):
208 # message
209 self.log.debug('_send_alert_smtp')
210 message = ('From: {from_name} <{sender}>\n'
211 'Subject: {status}\n'
212 'To: {target}\n'
213 '\n'
214 '{status}\n'.format(
215 sender=self.smtp_sender,
216 from_name=self.smtp_from_name,
217 status=status['status'],
218 target=self.smtp_destination))
219
220 if 'new' in diff:
221 message += ('\n--- New ---\n')
222 for code, alert in diff['new'].items():
223 message += self._smtp_format_alert(code, alert)
224 if 'updated' in diff:
225 message += ('\n--- Updated ---\n')
226 for code, alert in diff['updated'].items():
227 message += self._smtp_format_alert(code, alert)
228 if 'cleared' in diff:
229 message += ('\n--- Cleared ---\n')
230 for code, alert in diff['cleared'].items():
231 message += self._smtp_format_alert(code, alert)
232
233 message += ('\n\n=== Full health status ===\n')
234 for code, alert in status['checks'].items():
235 message += self._smtp_format_alert(code, alert)
236
237 self.log.debug('message: %s' % message)
238
239 # send
240 try:
241 if self.smtp_ssl:
242 server = smtplib.SMTP_SSL(self.smtp_host, self.smtp_port)
243 else:
244 server = smtplib.SMTP(self.smtp_host, self.smtp_port)
245 if self.smtp_password:
246 server.login(self.smtp_user, self.smtp_password)
247 server.sendmail(self.smtp_sender, self.smtp_destination, message)
248 server.quit()
249 except Exception as e:
250 return {
251 'ALERTS_SMTP_ERROR': {
252 'severity': 'warning',
253 'summary': 'unable to send alert email',
254 'count': 1,
255 'detail': [ str(e) ]
256 }
257 }
258 self.log.debug('Sent email to %s' % self.smtp_destination)
259 return None