]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/crash/module.py
1 from mgr_module
import MgrModule
6 from collections
import defaultdict
9 DATEFMT
= '%Y-%m-%d %H:%M:%S.%f'
12 class Module(MgrModule
):
14 def __init__(self
, *args
, **kwargs
):
15 super(Module
, self
).__init
__(*args
, **kwargs
)
17 def handle_command(self
, inbuf
, command
):
18 for cmd
in self
.COMMANDS
:
19 if cmd
['cmd'].startswith(command
['prefix']):
20 handler
= cmd
['handler']
23 return errno
.EINVAL
, '', 'unknown command %s' % command
['prefix']
25 return handler(self
, command
, inbuf
)
28 def validate_crash_metadata(inbuf
):
29 # raise any exceptions to caller
30 metadata
= json
.loads(inbuf
)
31 if 'crash_id' not in metadata
:
32 raise AttributeError("missing 'crash_id' field")
36 def time_from_string(timestr
):
37 # drop the 'Z' timezone indication, it's always UTC
38 timestr
= timestr
.rstrip('Z')
39 return datetime
.datetime
.strptime(timestr
, DATEFMT
)
41 def timestamp_filter(self
, f
):
43 Filter crash reports by timestamp.
45 :param f: f(time) return true to keep crash report
46 :returns: crash reports for which f(time) returns true
50 meta
= json
.loads(meta
)
51 time
= self
.time_from_string(meta
["timestamp"])
53 matches
= filter(inner
, six
.iteritems(
54 self
.get_store_prefix("crash/")))
55 return [(k
, json
.loads(m
)) for k
, m
in matches
]
59 def do_info(self
, cmd
, inbuf
):
61 key
= 'crash/%s' % crashid
62 val
= self
.get_store(key
)
64 return errno
.EINVAL
, '', 'crash info: %s not found' % crashid
67 def do_post(self
, cmd
, inbuf
):
69 metadata
= self
.validate_crash_metadata(inbuf
)
70 except Exception as e
:
71 return errno
.EINVAL
, '', 'malformed crash metadata: %s' % e
73 crashid
= metadata
['crash_id']
74 key
= 'crash/%s' % crashid
75 # repeated stores of same item are ignored silently
76 if not self
.get_store(key
):
77 self
.set_store(key
, inbuf
)
80 def do_ls(self
, cmd
, inbuf
):
82 for k
, meta
in self
.timestamp_filter(lambda ts
: True):
83 entity_name
= meta
.get('entity_name', 'unknown')
84 keys
.append("%s %s" % (k
.replace('crash/', ''), entity_name
))
86 return 0, '\n'.join(keys
), ''
88 def do_rm(self
, cmd
, inbuf
):
90 key
= 'crash/%s' % crashid
91 self
.set_store(key
, None) # removes key
94 def do_prune(self
, cmd
, inbuf
):
95 now
= datetime
.datetime
.utcnow()
101 return errno
.EINVAL
, '', 'keep argument must be integer'
103 cutoff
= now
- datetime
.timedelta(days
=keep
)
105 for key
, _
in self
.timestamp_filter(lambda ts
: ts
<= cutoff
):
106 self
.set_store(key
, None)
110 def do_stat(self
, cmd
, inbuf
):
111 # age in days for reporting, ordered smallest first
117 count
= len(bindict
['idlist'])
120 '%d older than %s days old:' % (count
, bindict
['age'])
122 for crashid
in bindict
['idlist']:
123 binlines
.append(crashid
)
124 return '\n'.join(binlines
)
127 now
= datetime
.datetime
.utcnow()
128 for i
, age
in enumerate(bins
):
129 agelimit
= now
- datetime
.timedelta(days
=age
)
132 'agelimit': agelimit
,
136 for key
, meta
in six
.iteritems(self
.get_store_prefix('crash/')):
138 meta
= json
.loads(meta
)
139 stamp
= self
.time_from_string(meta
['timestamp'])
140 crashid
= meta
['crash_id']
141 for i
, bindict
in enumerate(bins
):
142 if stamp
<= bindict
['agelimit']:
143 bindict
['idlist'].append(crashid
)
144 # don't count this one again
147 retlines
.append('%d crashes recorded' % total
)
150 retlines
.append(binstr(bindict
))
151 return 0, '\n'.join(retlines
), ''
153 def do_json_report(self
, cmd
, inbuf
):
155 Return a machine readable summary of recent crashes.
158 hours
= int(cmd
['hours'])
160 return errno
.EINVAL
, '', '<hours> argument must be integer'
162 report
= defaultdict(lambda: 0)
163 cutoff
= datetime
.datetime
.utcnow() - datetime
.timedelta(hours
=hours
)
164 for _
, meta
in self
.timestamp_filter(lambda ts
: ts
>= cutoff
):
165 pname
= meta
.get("process_name", "unknown")
170 return 0, '', json
.dumps(report
)
173 # test time conversion
174 timestr
= '2018-06-22 20:35:38.058818Z'
175 dt
= self
.time_from_string(timestr
)
176 if dt
!= datetime
.datetime(2018, 6, 22, 20, 35, 38, 58818):
177 raise RuntimeError('time_from_string() failed')
181 'cmd': 'crash info name=id,type=CephString',
182 'desc': 'show crash dump metadata',
188 'desc': 'Show saved crash dumps',
194 'desc': 'Add a crash dump (use -i <jsonfile>)',
199 'cmd': 'crash prune name=keep,type=CephString',
200 'desc': 'Remove crashes older than <keep> days',
205 'cmd': 'crash rm name=id,type=CephString',
206 'desc': 'Remove a saved crash <id>',
212 'desc': 'Summarize recorded crashes',
217 'cmd': 'crash json_report name=hours,type=CephString',
218 'desc': 'Crashes in the last <hours> hours',
220 'handler': do_json_report
,