]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | |
2 | """ | |
3 | A simple cluster health alerting module. | |
4 | """ | |
5 | ||
6 | from mgr_module import MgrModule, HandleCommandResult | |
7 | from threading import Event | |
8 | import errno | |
9 | import json | |
10 | import smtplib | |
11 | ||
12 | class Alerts(MgrModule): | |
13 | COMMANDS = [ | |
14 | { | |
15 | "cmd": "alerts send", | |
16 | "desc": "(re)send alerts immediately", | |
17 | "perm": "r" | |
18 | }, | |
19 | ] | |
20 | ||
21 | MODULE_OPTIONS = [ | |
22 | { | |
23 | 'name': 'interval', | |
24 | 'type': 'seconds', | |
25 | 'default': 60, | |
26 | 'desc': 'How frequently to reexamine health status', | |
27 | 'runtime': True, | |
28 | }, | |
29 | # smtp | |
30 | { | |
31 | 'name': 'smtp_host', | |
32 | 'desc': 'SMTP server', | |
33 | 'runtime': True, | |
34 | }, | |
35 | { | |
36 | 'name': 'smtp_destination', | |
37 | 'default': '', | |
38 | 'desc': 'Email address to send alerts to', | |
39 | 'runtime': True, | |
40 | }, | |
41 | { | |
42 | 'name': 'smtp_port', | |
43 | 'type': 'int', | |
44 | 'default': 465, | |
45 | 'desc': 'SMTP port', | |
46 | 'runtime': True, | |
47 | }, | |
48 | { | |
49 | 'name': 'smtp_ssl', | |
50 | 'type': 'bool', | |
51 | 'default': True, | |
52 | 'desc': 'Use SSL to connect to SMTP server', | |
53 | 'runtime': True, | |
54 | }, | |
55 | { | |
56 | 'name': 'smtp_user', | |
57 | 'default': '', | |
58 | 'desc': 'User to authenticate as', | |
59 | 'runtime': True, | |
60 | }, | |
61 | { | |
62 | 'name': 'smtp_password', | |
63 | 'default': '', | |
64 | 'desc': 'Password to authenticate with', | |
65 | 'runtime': True, | |
66 | }, | |
67 | { | |
68 | 'name': 'smtp_sender', | |
69 | 'default': '', | |
70 | 'desc': 'SMTP envelope sender', | |
71 | 'runtime': True, | |
72 | }, | |
73 | { | |
74 | 'name': 'smtp_from_name', | |
75 | 'default': 'Ceph', | |
76 | 'desc': 'Email From: name', | |
77 | 'runtime': True, | |
78 | }, | |
79 | ] | |
80 | ||
81 | # These are "native" Ceph options that this module cares about. | |
82 | NATIVE_OPTIONS = [ | |
83 | ] | |
84 | ||
85 | def __init__(self, *args, **kwargs): | |
86 | super(Alerts, self).__init__(*args, **kwargs) | |
87 | ||
88 | # set up some members to enable the serve() method and shutdown() | |
89 | self.run = True | |
90 | self.event = Event() | |
91 | ||
92 | # ensure config options members are initialized; see config_notify() | |
93 | self.config_notify() | |
94 | ||
95 | self.log.info("Init") | |
96 | ||
97 | ||
98 | def config_notify(self): | |
99 | """ | |
100 | This method is called whenever one of our config options is changed. | |
101 | """ | |
102 | # This is some boilerplate that stores MODULE_OPTIONS in a class | |
103 | # member, so that, for instance, the 'emphatic' option is always | |
104 | # available as 'self.emphatic'. | |
105 | for opt in self.MODULE_OPTIONS: | |
106 | setattr(self, | |
107 | opt['name'], | |
108 | self.get_module_option(opt['name']) or opt['default']) | |
109 | self.log.debug(' mgr option %s = %s', | |
110 | opt['name'], getattr(self, opt['name'])) | |
111 | # Do the same for the native options. | |
112 | for opt in self.NATIVE_OPTIONS: | |
113 | setattr(self, | |
114 | opt, | |
115 | self.get_ceph_option(opt)) | |
116 | self.log.debug(' native option %s = %s', opt, getattr(self, opt)) | |
117 | ||
118 | def handle_command(self, inbuf, cmd): | |
119 | ret = 0 | |
120 | out = '' | |
121 | err = '' | |
122 | if cmd['prefix'] == 'alerts send': | |
123 | status = json.loads(self.get('health')['json']) | |
124 | self._send_alert(status, {}) | |
125 | return HandleCommandResult( | |
126 | retval=ret, # exit code | |
127 | stdout=out, # stdout | |
128 | stderr=err) | |
129 | ||
130 | def _diff(self, last, new): | |
131 | d = {} | |
132 | for code, alert in new.get('checks', {}).items(): | |
133 | self.log.debug('new code %s alert %s' % (code, alert)) | |
134 | if code not in last.get('checks', {}): | |
135 | if 'new' not in d: | |
136 | d['new'] = {} | |
137 | d['new'][code] = alert | |
138 | elif alert['summary'].get('count', 0) > \ | |
139 | last['checks'][code]['summary'].get('count', 0): | |
140 | if 'updated' not in d: | |
141 | d['updated'] = {} | |
142 | d['updated'][code] = alert | |
143 | for code, alert in last.get('checks', {}).items(): | |
144 | self.log.debug('old code %s alert %s' % (code, alert)) | |
145 | if code not in new.get('checks', {}): | |
146 | if 'cleared' not in d: | |
147 | d['cleared'] = {} | |
148 | d['cleared'][code] = alert | |
149 | return d | |
150 | ||
151 | def _send_alert(self, status, diff): | |
152 | checks = {} | |
153 | if self.smtp_host: | |
154 | r = self._send_alert_smtp(status, diff) | |
155 | if r: | |
156 | for code, alert in r.items(): | |
157 | checks[code] = alert | |
158 | self.set_health_checks(checks) | |
159 | ||
160 | def serve(self): | |
161 | """ | |
162 | This method is called by the mgr when the module starts and can be | |
163 | used for any background activity. | |
164 | """ | |
165 | self.log.info("Starting") | |
166 | last_status = {} | |
167 | while self.run: | |
168 | # Do some useful background work here. | |
169 | new_status = json.loads(self.get('health')['json']) | |
170 | if new_status != last_status: | |
171 | self.log.debug('last_status %s' % last_status) | |
172 | self.log.debug('new_status %s' % new_status) | |
173 | diff = self._diff(last_status, | |
174 | new_status) | |
175 | self.log.debug('diff %s' % diff) | |
176 | if diff: | |
177 | self._send_alert(new_status, diff) | |
178 | last_status = new_status | |
179 | ||
180 | self.log.debug('Sleeping for %d seconds', self.interval) | |
181 | ret = self.event.wait(self.interval) | |
182 | self.event.clear() | |
183 | ||
184 | def shutdown(self): | |
185 | """ | |
186 | This method is called by the mgr when the module needs to shut | |
187 | down (i.e., when the serve() function needs to exit). | |
188 | """ | |
189 | self.log.info('Stopping') | |
190 | self.run = False | |
191 | self.event.set() | |
192 | ||
193 | # SMTP | |
194 | def _smtp_format_alert(self, code, alert): | |
195 | r = '[{sev}] {code}: {summary}\n'.format( | |
196 | code=code, | |
197 | sev=alert['severity'].split('_')[1], | |
198 | summary=alert['summary']['message']) | |
199 | for detail in alert['detail']: | |
200 | r += ' {message}\n'.format( | |
201 | message=detail['message']) | |
202 | return r | |
203 | ||
204 | def _send_alert_smtp(self, status, diff): | |
205 | # message | |
206 | self.log.debug('_send_alert_smtp') | |
207 | message = ('From: {from_name} <{sender}>\n' | |
208 | 'Subject: {status}\n' | |
209 | 'To: {target}\n' | |
210 | '\n' | |
211 | '{status}\n'.format( | |
212 | sender=self.smtp_sender, | |
213 | from_name=self.smtp_from_name, | |
214 | status=status['status'], | |
215 | target=self.smtp_destination)) | |
216 | ||
217 | if 'new' in diff: | |
218 | message += ('\n--- New ---\n') | |
219 | for code, alert in diff['new'].items(): | |
220 | message += self._smtp_format_alert(code, alert) | |
221 | if 'updated' in diff: | |
222 | message += ('\n--- Updated ---\n') | |
223 | for code, alert in diff['updated'].items(): | |
224 | message += self._smtp_format_alert(code, alert) | |
225 | if 'cleared' in diff: | |
226 | message += ('\n--- Cleared ---\n') | |
227 | for code, alert in diff['cleared'].items(): | |
228 | message += self._smtp_format_alert(code, alert) | |
229 | ||
230 | message += ('\n\n=== Full health status ===\n') | |
231 | for code, alert in status['checks'].items(): | |
232 | message += self._smtp_format_alert(code, alert) | |
233 | ||
234 | self.log.debug('message: %s' % message) | |
235 | ||
236 | # send | |
237 | try: | |
238 | if self.smtp_ssl: | |
239 | server = smtplib.SMTP_SSL(self.smtp_host, self.smtp_port) | |
240 | else: | |
241 | server = smtplib.SMTP(self.smtp_host, self.smtp_port) | |
242 | if self.smtp_password: | |
243 | server.login(self.smtp_user, self.smtp_password) | |
244 | server.sendmail(self.smtp_sender, self.smtp_destination, message) | |
245 | server.quit() | |
246 | except Exception as e: | |
247 | return { | |
248 | 'ALERTS_SMTP_ERROR': { | |
249 | 'severity': 'warning', | |
250 | 'summary': 'unable to send alert email', | |
251 | 'count': 1, | |
252 | 'detail': [ str(e) ] | |
253 | } | |
254 | } | |
255 | self.log.debug('Sent email to %s' % self.smtp_destination) | |
256 | return None |