]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/alerts/module.py
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / pybind / mgr / alerts / module.py
1
2 """
3 A simple cluster health alerting module.
4 """
5
6 from mgr_module import MgrModule, HandleCommandResult
7 from threading import Event
8 import errno
9 import json
10 import smtplib
11
12 class Alerts(MgrModule):
13 COMMANDS = [
14 {
15 "cmd": "alerts send",
16 "desc": "(re)send alerts immediately",
17 "perm": "r"
18 },
19 ]
20
21 MODULE_OPTIONS = [
22 {
23 'name': 'interval',
24 'type': 'seconds',
25 'default': 60,
26 'desc': 'How frequently to reexamine health status',
27 'runtime': True,
28 },
29 # smtp
30 {
31 'name': 'smtp_host',
32 'desc': 'SMTP server',
33 'runtime': True,
34 },
35 {
36 'name': 'smtp_destination',
37 'default': '',
38 'desc': 'Email address to send alerts to',
39 'runtime': True,
40 },
41 {
42 'name': 'smtp_port',
43 'type': 'int',
44 'default': 465,
45 'desc': 'SMTP port',
46 'runtime': True,
47 },
48 {
49 'name': 'smtp_ssl',
50 'type': 'bool',
51 'default': True,
52 'desc': 'Use SSL to connect to SMTP server',
53 'runtime': True,
54 },
55 {
56 'name': 'smtp_user',
57 'default': '',
58 'desc': 'User to authenticate as',
59 'runtime': True,
60 },
61 {
62 'name': 'smtp_password',
63 'default': '',
64 'desc': 'Password to authenticate with',
65 'runtime': True,
66 },
67 {
68 'name': 'smtp_sender',
69 'default': '',
70 'desc': 'SMTP envelope sender',
71 'runtime': True,
72 },
73 {
74 'name': 'smtp_from_name',
75 'default': 'Ceph',
76 'desc': 'Email From: name',
77 'runtime': True,
78 },
79 ]
80
81 # These are "native" Ceph options that this module cares about.
82 NATIVE_OPTIONS = [
83 ]
84
85 def __init__(self, *args, **kwargs):
86 super(Alerts, self).__init__(*args, **kwargs)
87
88 # set up some members to enable the serve() method and shutdown()
89 self.run = True
90 self.event = Event()
91
92 # ensure config options members are initialized; see config_notify()
93 self.config_notify()
94
95 self.log.info("Init")
96
97
98 def config_notify(self):
99 """
100 This method is called whenever one of our config options is changed.
101 """
102 # This is some boilerplate that stores MODULE_OPTIONS in a class
103 # member, so that, for instance, the 'emphatic' option is always
104 # available as 'self.emphatic'.
105 for opt in self.MODULE_OPTIONS:
106 setattr(self,
107 opt['name'],
108 self.get_module_option(opt['name']) or opt['default'])
109 self.log.debug(' mgr option %s = %s',
110 opt['name'], getattr(self, opt['name']))
111 # Do the same for the native options.
112 for opt in self.NATIVE_OPTIONS:
113 setattr(self,
114 opt,
115 self.get_ceph_option(opt))
116 self.log.debug(' native option %s = %s', opt, getattr(self, opt))
117
118 def handle_command(self, inbuf, cmd):
119 ret = 0
120 out = ''
121 err = ''
122 if cmd['prefix'] == 'alerts send':
123 status = json.loads(self.get('health')['json'])
124 self._send_alert(status, {})
125 return HandleCommandResult(
126 retval=ret, # exit code
127 stdout=out, # stdout
128 stderr=err)
129
130 def _diff(self, last, new):
131 d = {}
132 for code, alert in new.get('checks', {}).items():
133 self.log.debug('new code %s alert %s' % (code, alert))
134 if code not in last.get('checks', {}):
135 if 'new' not in d:
136 d['new'] = {}
137 d['new'][code] = alert
138 elif alert['summary'].get('count', 0) > \
139 last['checks'][code]['summary'].get('count', 0):
140 if 'updated' not in d:
141 d['updated'] = {}
142 d['updated'][code] = alert
143 for code, alert in last.get('checks', {}).items():
144 self.log.debug('old code %s alert %s' % (code, alert))
145 if code not in new.get('checks', {}):
146 if 'cleared' not in d:
147 d['cleared'] = {}
148 d['cleared'][code] = alert
149 return d
150
151 def _send_alert(self, status, diff):
152 checks = {}
153 if self.smtp_host:
154 r = self._send_alert_smtp(status, diff)
155 if r:
156 for code, alert in r.items():
157 checks[code] = alert
158 self.set_health_checks(checks)
159
160 def serve(self):
161 """
162 This method is called by the mgr when the module starts and can be
163 used for any background activity.
164 """
165 self.log.info("Starting")
166 last_status = {}
167 while self.run:
168 # Do some useful background work here.
169 new_status = json.loads(self.get('health')['json'])
170 if new_status != last_status:
171 self.log.debug('last_status %s' % last_status)
172 self.log.debug('new_status %s' % new_status)
173 diff = self._diff(last_status,
174 new_status)
175 self.log.debug('diff %s' % diff)
176 if diff:
177 self._send_alert(new_status, diff)
178 last_status = new_status
179
180 self.log.debug('Sleeping for %d seconds', self.interval)
181 ret = self.event.wait(self.interval)
182 self.event.clear()
183
184 def shutdown(self):
185 """
186 This method is called by the mgr when the module needs to shut
187 down (i.e., when the serve() function needs to exit).
188 """
189 self.log.info('Stopping')
190 self.run = False
191 self.event.set()
192
193 # SMTP
194 def _smtp_format_alert(self, code, alert):
195 r = '[{sev}] {code}: {summary}\n'.format(
196 code=code,
197 sev=alert['severity'].split('_')[1],
198 summary=alert['summary']['message'])
199 for detail in alert['detail']:
200 r += ' {message}\n'.format(
201 message=detail['message'])
202 return r
203
204 def _send_alert_smtp(self, status, diff):
205 # message
206 self.log.debug('_send_alert_smtp')
207 message = ('From: {from_name} <{sender}>\n'
208 'Subject: {status}\n'
209 'To: {target}\n'
210 '\n'
211 '{status}\n'.format(
212 sender=self.smtp_sender,
213 from_name=self.smtp_from_name,
214 status=status['status'],
215 target=self.smtp_destination))
216
217 if 'new' in diff:
218 message += ('\n--- New ---\n')
219 for code, alert in diff['new'].items():
220 message += self._smtp_format_alert(code, alert)
221 if 'updated' in diff:
222 message += ('\n--- Updated ---\n')
223 for code, alert in diff['updated'].items():
224 message += self._smtp_format_alert(code, alert)
225 if 'cleared' in diff:
226 message += ('\n--- Cleared ---\n')
227 for code, alert in diff['cleared'].items():
228 message += self._smtp_format_alert(code, alert)
229
230 message += ('\n\n=== Full health status ===\n')
231 for code, alert in status['checks'].items():
232 message += self._smtp_format_alert(code, alert)
233
234 self.log.debug('message: %s' % message)
235
236 # send
237 try:
238 if self.smtp_ssl:
239 server = smtplib.SMTP_SSL(self.smtp_host, self.smtp_port)
240 else:
241 server = smtplib.SMTP(self.smtp_host, self.smtp_port)
242 if self.smtp_password:
243 server.login(self.smtp_user, self.smtp_password)
244 server.sendmail(self.smtp_sender, self.smtp_destination, message)
245 server.quit()
246 except Exception as e:
247 return {
248 'ALERTS_SMTP_ERROR': {
249 'severity': 'warning',
250 'summary': 'unable to send alert email',
251 'count': 1,
252 'detail': [ str(e) ]
253 }
254 }
255 self.log.debug('Sent email to %s' % self.smtp_destination)
256 return None