]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/alerts/module.py
d6cec4b64baccd56727f2179094ea7c65adedd2b
3 A simple cluster health alerting module.
6 from mgr_module
import MgrModule
, HandleCommandResult
7 from threading
import Event
12 class Alerts(MgrModule
):
16 "desc": "(re)send alerts immediately",
26 'desc': 'How frequently to reexamine health status',
33 'desc': 'SMTP server',
37 'name': 'smtp_destination',
39 'desc': 'Email address to send alerts to',
53 'desc': 'Use SSL to connect to SMTP server',
59 'desc': 'User to authenticate as',
63 'name': 'smtp_password',
65 'desc': 'Password to authenticate with',
69 'name': 'smtp_sender',
71 'desc': 'SMTP envelope sender',
75 'name': 'smtp_from_name',
77 'desc': 'Email From: name',
82 # These are "native" Ceph options that this module cares about.
86 def __init__(self
, *args
, **kwargs
):
87 super(Alerts
, self
).__init
__(*args
, **kwargs
)
89 # set up some members to enable the serve() method and shutdown()
93 # ensure config options members are initialized; see config_notify()
99 def config_notify(self
):
101 This method is called whenever one of our config options is changed.
103 # This is some boilerplate that stores MODULE_OPTIONS in a class
104 # member, so that, for instance, the 'emphatic' option is always
105 # available as 'self.emphatic'.
106 for opt
in self
.MODULE_OPTIONS
:
109 self
.get_module_option(opt
['name']))
110 self
.log
.debug(' mgr option %s = %s',
111 opt
['name'], getattr(self
, opt
['name']))
112 # Do the same for the native options.
113 for opt
in self
.NATIVE_OPTIONS
:
116 self
.get_ceph_option(opt
))
117 self
.log
.debug(' native option %s = %s', opt
, getattr(self
, opt
))
119 def handle_command(self
, inbuf
, cmd
):
123 if cmd
['prefix'] == 'alerts send':
124 status
= json
.loads(self
.get('health')['json'])
125 self
._send
_alert
(status
, {})
126 return HandleCommandResult(
127 retval
=ret
, # exit code
131 def _diff(self
, last
, new
):
133 for code
, alert
in new
.get('checks', {}).items():
134 self
.log
.debug('new code %s alert %s' % (code
, alert
))
135 if code
not in last
.get('checks', {}):
138 d
['new'][code
] = alert
139 elif alert
['summary'].get('count', 0) > \
140 last
['checks'][code
]['summary'].get('count', 0):
141 if 'updated' not in d
:
143 d
['updated'][code
] = alert
144 for code
, alert
in last
.get('checks', {}).items():
145 self
.log
.debug('old code %s alert %s' % (code
, alert
))
146 if code
not in new
.get('checks', {}):
147 if 'cleared' not in d
:
149 d
['cleared'][code
] = alert
152 def _send_alert(self
, status
, diff
):
155 r
= self
._send
_alert
_smtp
(status
, diff
)
157 for code
, alert
in r
.items():
160 self
.log
.warn('Alert is not sent because smtp_host is not configured')
161 self
.set_health_checks(checks
)
165 This method is called by the mgr when the module starts and can be
166 used for any background activity.
168 self
.log
.info("Starting")
171 # Do some useful background work here.
172 new_status
= json
.loads(self
.get('health')['json'])
173 if new_status
!= last_status
:
174 self
.log
.debug('last_status %s' % last_status
)
175 self
.log
.debug('new_status %s' % new_status
)
176 diff
= self
._diff
(last_status
,
178 self
.log
.debug('diff %s' % diff
)
180 self
._send
_alert
(new_status
, diff
)
181 last_status
= new_status
183 self
.log
.debug('Sleeping for %d seconds', self
.interval
)
184 ret
= self
.event
.wait(self
.interval
)
189 This method is called by the mgr when the module needs to shut
190 down (i.e., when the serve() function needs to exit).
192 self
.log
.info('Stopping')
197 def _smtp_format_alert(self
, code
, alert
):
198 r
= '[{sev}] {code}: {summary}\n'.format(
200 sev
=alert
['severity'].split('_')[1],
201 summary
=alert
['summary']['message'])
202 for detail
in alert
['detail']:
203 r
+= ' {message}\n'.format(
204 message
=detail
['message'])
207 def _send_alert_smtp(self
, status
, diff
):
209 self
.log
.debug('_send_alert_smtp')
210 message
= ('From: {from_name} <{sender}>\n'
211 'Subject: {status}\n'
215 sender
=self
.smtp_sender
,
216 from_name
=self
.smtp_from_name
,
217 status
=status
['status'],
218 target
=self
.smtp_destination
))
221 message
+= ('\n--- New ---\n')
222 for code
, alert
in diff
['new'].items():
223 message
+= self
._smtp
_format
_alert
(code
, alert
)
224 if 'updated' in diff
:
225 message
+= ('\n--- Updated ---\n')
226 for code
, alert
in diff
['updated'].items():
227 message
+= self
._smtp
_format
_alert
(code
, alert
)
228 if 'cleared' in diff
:
229 message
+= ('\n--- Cleared ---\n')
230 for code
, alert
in diff
['cleared'].items():
231 message
+= self
._smtp
_format
_alert
(code
, alert
)
233 message
+= ('\n\n=== Full health status ===\n')
234 for code
, alert
in status
['checks'].items():
235 message
+= self
._smtp
_format
_alert
(code
, alert
)
237 self
.log
.debug('message: %s' % message
)
242 server
= smtplib
.SMTP_SSL(self
.smtp_host
, self
.smtp_port
)
244 server
= smtplib
.SMTP(self
.smtp_host
, self
.smtp_port
)
245 if self
.smtp_password
:
246 server
.login(self
.smtp_user
, self
.smtp_password
)
247 server
.sendmail(self
.smtp_sender
, self
.smtp_destination
, message
)
249 except Exception as e
:
251 'ALERTS_SMTP_ERROR': {
252 'severity': 'warning',
253 'summary': 'unable to send alert email',
258 self
.log
.debug('Sent email to %s' % self
.smtp_destination
)