]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/crash/module.py
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / pybind / mgr / crash / module.py
1 from mgr_module import MgrModule
2 import datetime
3 import errno
4 import json
5 import six
6 from collections import defaultdict
7
8
9 DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
10
11
12 class Module(MgrModule):
13
14 def __init__(self, *args, **kwargs):
15 super(Module, self).__init__(*args, **kwargs)
16
17 def handle_command(self, inbuf, command):
18 for cmd in self.COMMANDS:
19 if cmd['cmd'].startswith(command['prefix']):
20 handler = cmd['handler']
21 break
22 if handler is None:
23 return errno.EINVAL, '', 'unknown command %s' % command['prefix']
24
25 return handler(self, command, inbuf)
26
27 @staticmethod
28 def validate_crash_metadata(inbuf):
29 # raise any exceptions to caller
30 metadata = json.loads(inbuf)
31 if 'crash_id' not in metadata:
32 raise AttributeError("missing 'crash_id' field")
33 return metadata
34
35 @staticmethod
36 def time_from_string(timestr):
37 # drop the 'Z' timezone indication, it's always UTC
38 timestr = timestr.rstrip('Z')
39 return datetime.datetime.strptime(timestr, DATEFMT)
40
41 def timestamp_filter(self, f):
42 """
43 Filter crash reports by timestamp.
44
45 :param f: f(time) return true to keep crash report
46 :returns: crash reports for which f(time) returns true
47 """
48 def inner(pair):
49 _, meta = pair
50 meta = json.loads(meta)
51 time = self.time_from_string(meta["timestamp"])
52 return f(time)
53 matches = filter(inner, six.iteritems(
54 self.get_store_prefix("crash/")))
55 return [(k, json.loads(m)) for k, m in matches]
56
57 # command handlers
58
59 def do_info(self, cmd, inbuf):
60 crashid = cmd['id']
61 key = 'crash/%s' % crashid
62 val = self.get_store(key)
63 if not val:
64 return errno.EINVAL, '', 'crash info: %s not found' % crashid
65 return 0, val, ''
66
67 def do_post(self, cmd, inbuf):
68 try:
69 metadata = self.validate_crash_metadata(inbuf)
70 except Exception as e:
71 return errno.EINVAL, '', 'malformed crash metadata: %s' % e
72
73 crashid = metadata['crash_id']
74 key = 'crash/%s' % crashid
75 # repeated stores of same item are ignored silently
76 if not self.get_store(key):
77 self.set_store(key, inbuf)
78 return 0, '', ''
79
80 def do_ls(self, cmd, inbuf):
81 keys = []
82 for k, meta in self.timestamp_filter(lambda ts: True):
83 entity_name = meta.get('entity_name', 'unknown')
84 keys.append("%s %s" % (k.replace('crash/', ''), entity_name))
85 keys.sort()
86 return 0, '\n'.join(keys), ''
87
88 def do_rm(self, cmd, inbuf):
89 crashid = cmd['id']
90 key = 'crash/%s' % crashid
91 self.set_store(key, None) # removes key
92 return 0, '', ''
93
94 def do_prune(self, cmd, inbuf):
95 now = datetime.datetime.utcnow()
96
97 keep = cmd['keep']
98 try:
99 keep = int(keep)
100 except ValueError:
101 return errno.EINVAL, '', 'keep argument must be integer'
102
103 cutoff = now - datetime.timedelta(days=keep)
104
105 for key, _ in self.timestamp_filter(lambda ts: ts <= cutoff):
106 self.set_store(key, None)
107
108 return 0, '', ''
109
110 def do_stat(self, cmd, inbuf):
111 # age in days for reporting, ordered smallest first
112 bins = [1, 3, 7]
113 retlines = list()
114
115 def binstr(bindict):
116 binlines = list()
117 count = len(bindict['idlist'])
118 if count:
119 binlines.append(
120 '%d older than %s days old:' % (count, bindict['age'])
121 )
122 for crashid in bindict['idlist']:
123 binlines.append(crashid)
124 return '\n'.join(binlines)
125
126 total = 0
127 now = datetime.datetime.utcnow()
128 for i, age in enumerate(bins):
129 agelimit = now - datetime.timedelta(days=age)
130 bins[i] = {
131 'age': age,
132 'agelimit': agelimit,
133 'idlist': list()
134 }
135
136 for key, meta in six.iteritems(self.get_store_prefix('crash/')):
137 total += 1
138 meta = json.loads(meta)
139 stamp = self.time_from_string(meta['timestamp'])
140 crashid = meta['crash_id']
141 for i, bindict in enumerate(bins):
142 if stamp <= bindict['agelimit']:
143 bindict['idlist'].append(crashid)
144 # don't count this one again
145 continue
146
147 retlines.append('%d crashes recorded' % total)
148
149 for bindict in bins:
150 retlines.append(binstr(bindict))
151 return 0, '\n'.join(retlines), ''
152
153 def do_json_report(self, cmd, inbuf):
154 """
155 Return a machine readable summary of recent crashes.
156 """
157 try:
158 hours = int(cmd['hours'])
159 except ValueError:
160 return errno.EINVAL, '', '<hours> argument must be integer'
161
162 report = defaultdict(lambda: 0)
163 cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=hours)
164 for _, meta in self.timestamp_filter(lambda ts: ts >= cutoff):
165 pname = meta.get("process_name", "unknown")
166 if not pname:
167 pname = "unknown"
168 report[pname] += 1
169
170 return 0, '', json.dumps(report)
171
172 def self_test(self):
173 # test time conversion
174 timestr = '2018-06-22 20:35:38.058818Z'
175 dt = self.time_from_string(timestr)
176 if dt != datetime.datetime(2018, 6, 22, 20, 35, 38, 58818):
177 raise RuntimeError('time_from_string() failed')
178
179 COMMANDS = [
180 {
181 'cmd': 'crash info name=id,type=CephString',
182 'desc': 'show crash dump metadata',
183 'perm': 'r',
184 'handler': do_info,
185 },
186 {
187 'cmd': 'crash ls',
188 'desc': 'Show saved crash dumps',
189 'perm': 'r',
190 'handler': do_ls,
191 },
192 {
193 'cmd': 'crash post',
194 'desc': 'Add a crash dump (use -i <jsonfile>)',
195 'perm': 'rw',
196 'handler': do_post,
197 },
198 {
199 'cmd': 'crash prune name=keep,type=CephString',
200 'desc': 'Remove crashes older than <keep> days',
201 'perm': 'rw',
202 'handler': do_prune,
203 },
204 {
205 'cmd': 'crash rm name=id,type=CephString',
206 'desc': 'Remove a saved crash <id>',
207 'perm': 'rw',
208 'handler': do_rm,
209 },
210 {
211 'cmd': 'crash stat',
212 'desc': 'Summarize recorded crashes',
213 'perm': 'r',
214 'handler': do_stat,
215 },
216 {
217 'cmd': 'crash json_report name=hours,type=CephString',
218 'desc': 'Crashes in the last <hours> hours',
219 'perm': 'r',
220 'handler': do_json_report,
221 },
222 ]