]>
Commit | Line | Data |
---|---|---|
1 | #!@Python3_EXECUTABLE@ | |
2 | # -*- mode:python -*- | |
3 | # vim: ts=4 sw=4 smarttab expandtab | |
4 | ||
5 | import argparse | |
6 | import logging | |
7 | import os | |
8 | import signal | |
9 | import socket | |
10 | import subprocess | |
11 | import sys | |
12 | import time | |
13 | ||
14 | logging.basicConfig(level=logging.INFO) | |
15 | log = logging.getLogger('ceph-crash') | |
16 | ||
17 | auth_names = ['client.crash.%s' % socket.gethostname(), | |
18 | 'client.crash', | |
19 | 'client.admin'] | |
20 | ||
21 | def parse_args(): | |
22 | parser = argparse.ArgumentParser() | |
23 | parser.add_argument( | |
24 | '-p', '--path', default='/var/lib/ceph/crash', | |
25 | help='base path to monitor for crash dumps') | |
26 | parser.add_argument( | |
27 | '-d', '--delay', default=10.0, type=float, | |
28 | help='minutes to delay between scans (0 to exit after one)', | |
29 | ) | |
30 | parser.add_argument( | |
31 | '--name', '-n', | |
32 | help='ceph name to authenticate as (default: try client.crash, client.admin)') | |
33 | return parser.parse_args() | |
34 | ||
35 | ||
36 | def post_crash(path): | |
37 | rc = 0 | |
38 | for n in auth_names: | |
39 | pr = subprocess.Popen( | |
40 | args=['timeout', '30', 'ceph', | |
41 | '-n', n, | |
42 | 'crash', 'post', '-i', '-'], | |
43 | stdin=subprocess.PIPE, | |
44 | stdout=subprocess.PIPE, | |
45 | stderr=subprocess.PIPE, | |
46 | ) | |
47 | f = open(os.path.join(path, 'meta'), 'rb') | |
48 | stdout, stderr = pr.communicate(input=f.read()) | |
49 | rc = pr.wait() | |
50 | f.close() | |
51 | if rc != 0: | |
52 | log.warning('post %s as %s failed: %s' % (path, n, stderr)) | |
53 | if rc == 0: | |
54 | break | |
55 | return rc | |
56 | ||
57 | ||
58 | def scrape_path(path): | |
59 | for p in os.listdir(path): | |
60 | crashpath = os.path.join(path, p) | |
61 | metapath = os.path.join(crashpath, 'meta') | |
62 | donepath = os.path.join(crashpath, 'done') | |
63 | if os.path.isfile(metapath): | |
64 | if not os.path.isfile(donepath): | |
65 | # hang out just for a bit; either we interrupted the dump | |
66 | # or the daemon crashed before finishing it | |
67 | time.sleep(1) | |
68 | if not os.path.isfile(donepath): | |
69 | return | |
70 | # ok, we can process this one | |
71 | rc = post_crash(crashpath) | |
72 | if rc == 0: | |
73 | os.rename(crashpath, os.path.join(path, 'posted/', p)) | |
74 | log.debug( | |
75 | "posted %s and renamed %s -> %s " % | |
76 | (metapath, p, os.path.join('posted/', p)) | |
77 | ) | |
78 | ||
79 | def handler(signum, frame): | |
80 | print('*** Interrupted with signal %d ***' % signum) | |
81 | sys.exit(0) | |
82 | ||
83 | def main(): | |
84 | # exit code 0 on SIGINT, SIGTERM | |
85 | signal.signal(signal.SIGINT, handler) | |
86 | signal.signal(signal.SIGTERM, handler) | |
87 | ||
88 | args = parse_args() | |
89 | postdir = os.path.join(args.path, 'posted') | |
90 | if args.name: | |
91 | auth_names = [args.name] | |
92 | ||
93 | while not os.path.isdir(postdir): | |
94 | log.error("directory %s does not exist; please create" % postdir) | |
95 | time.sleep(30) | |
96 | ||
97 | log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0)) | |
98 | while True: | |
99 | scrape_path(args.path) | |
100 | if args.delay == 0: | |
101 | sys.exit(0) | |
102 | time.sleep(args.delay * 60) | |
103 | ||
104 | ||
105 | if __name__ == "__main__": | |
106 | main() |