]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/alerts/module.py
3 A simple cluster health alerting module.
6 from mgr_module
import MgrModule
, HandleCommandResult
7 from threading
import Event
12 class Alerts(MgrModule
):
16 "desc": "(re)send alerts immediately",
26 'desc': 'How frequently to reexamine health status',
32 'desc': 'SMTP server',
36 'name': 'smtp_destination',
38 'desc': 'Email address to send alerts to',
52 'desc': 'Use SSL to connect to SMTP server',
58 'desc': 'User to authenticate as',
62 'name': 'smtp_password',
64 'desc': 'Password to authenticate with',
68 'name': 'smtp_sender',
70 'desc': 'SMTP envelope sender',
74 'name': 'smtp_from_name',
76 'desc': 'Email From: name',
81 # These are "native" Ceph options that this module cares about.
85 def __init__(self
, *args
, **kwargs
):
86 super(Alerts
, self
).__init
__(*args
, **kwargs
)
88 # set up some members to enable the serve() method and shutdown()
92 # ensure config options members are initialized; see config_notify()
98 def config_notify(self
):
100 This method is called whenever one of our config options is changed.
102 # This is some boilerplate that stores MODULE_OPTIONS in a class
103 # member, so that, for instance, the 'emphatic' option is always
104 # available as 'self.emphatic'.
105 for opt
in self
.MODULE_OPTIONS
:
108 self
.get_module_option(opt
['name']) or opt
['default'])
109 self
.log
.debug(' mgr option %s = %s',
110 opt
['name'], getattr(self
, opt
['name']))
111 # Do the same for the native options.
112 for opt
in self
.NATIVE_OPTIONS
:
115 self
.get_ceph_option(opt
))
116 self
.log
.debug(' native option %s = %s', opt
, getattr(self
, opt
))
118 def handle_command(self
, inbuf
, cmd
):
122 if cmd
['prefix'] == 'alerts send':
123 status
= json
.loads(self
.get('health')['json'])
124 self
._send
_alert
(status
, {})
125 return HandleCommandResult(
126 retval
=ret
, # exit code
130 def _diff(self
, last
, new
):
132 for code
, alert
in new
.get('checks', {}).items():
133 self
.log
.debug('new code %s alert %s' % (code
, alert
))
134 if code
not in last
.get('checks', {}):
137 d
['new'][code
] = alert
138 elif alert
['summary'].get('count', 0) > \
139 last
['checks'][code
]['summary'].get('count', 0):
140 if 'updated' not in d
:
142 d
['updated'][code
] = alert
143 for code
, alert
in last
.get('checks', {}).items():
144 self
.log
.debug('old code %s alert %s' % (code
, alert
))
145 if code
not in new
.get('checks', {}):
146 if 'cleared' not in d
:
148 d
['cleared'][code
] = alert
151 def _send_alert(self
, status
, diff
):
154 r
= self
._send
_alert
_smtp
(status
, diff
)
156 for code
, alert
in r
.items():
158 self
.set_health_checks(checks
)
162 This method is called by the mgr when the module starts and can be
163 used for any background activity.
165 self
.log
.info("Starting")
168 # Do some useful background work here.
169 new_status
= json
.loads(self
.get('health')['json'])
170 if new_status
!= last_status
:
171 self
.log
.debug('last_status %s' % last_status
)
172 self
.log
.debug('new_status %s' % new_status
)
173 diff
= self
._diff
(last_status
,
175 self
.log
.debug('diff %s' % diff
)
177 self
._send
_alert
(new_status
, diff
)
178 last_status
= new_status
180 self
.log
.debug('Sleeping for %d seconds', self
.interval
)
181 ret
= self
.event
.wait(self
.interval
)
186 This method is called by the mgr when the module needs to shut
187 down (i.e., when the serve() function needs to exit).
189 self
.log
.info('Stopping')
194 def _smtp_format_alert(self
, code
, alert
):
195 r
= '[{sev}] {code}: {summary}\n'.format(
197 sev
=alert
['severity'].split('_')[1],
198 summary
=alert
['summary']['message'])
199 for detail
in alert
['detail']:
200 r
+= ' {message}\n'.format(
201 message
=detail
['message'])
204 def _send_alert_smtp(self
, status
, diff
):
206 self
.log
.debug('_send_alert_smtp')
207 message
= ('From: {from_name} <{sender}>\n'
208 'Subject: {status}\n'
212 sender
=self
.smtp_sender
,
213 from_name
=self
.smtp_from_name
,
214 status
=status
['status'],
215 target
=self
.smtp_destination
))
218 message
+= ('\n--- New ---\n')
219 for code
, alert
in diff
['new'].items():
220 message
+= self
._smtp
_format
_alert
(code
, alert
)
221 if 'updated' in diff
:
222 message
+= ('\n--- Updated ---\n')
223 for code
, alert
in diff
['updated'].items():
224 message
+= self
._smtp
_format
_alert
(code
, alert
)
225 if 'cleared' in diff
:
226 message
+= ('\n--- Cleared ---\n')
227 for code
, alert
in diff
['cleared'].items():
228 message
+= self
._smtp
_format
_alert
(code
, alert
)
230 message
+= ('\n\n=== Full health status ===\n')
231 for code
, alert
in status
['checks'].items():
232 message
+= self
._smtp
_format
_alert
(code
, alert
)
234 self
.log
.debug('message: %s' % message
)
239 server
= smtplib
.SMTP_SSL(self
.smtp_host
, self
.smtp_port
)
241 server
= smtplib
.SMTP(self
.smtp_host
, self
.smtp_port
)
242 if self
.smtp_password
:
243 server
.login(self
.smtp_user
, self
.smtp_password
)
244 server
.sendmail(self
.smtp_sender
, self
.smtp_destination
, message
)
246 except Exception as e
:
248 'ALERTS_SMTP_ERROR': {
249 'severity': 'warning',
250 'summary': 'unable to send alert email',
255 self
.log
.debug('Sent email to %s' % self
.smtp_destination
)