]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Raise exceptions on osd coredumps or test err directories | |
3 | """ | |
4 | import contextlib | |
5 | import logging | |
6 | import time | |
7 | from teuthology.orchestra import run | |
8 | ||
e306af50 | 9 | from tasks import ceph_manager |
7c673cae FG |
10 | from teuthology import misc as teuthology |
11 | ||
12 | log = logging.getLogger(__name__) | |
13 | ||
14 | @contextlib.contextmanager | |
15 | def task(ctx, config): | |
16 | """ | |
17 | Die if {testdir}/err exists or if an OSD dumps core | |
18 | """ | |
19 | if config is None: | |
20 | config = {} | |
21 | ||
22 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 23 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
24 | |
25 | num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') | |
26 | log.info('num_osds is %s' % num_osds) | |
27 | ||
28 | manager = ceph_manager.CephManager( | |
29 | mon, | |
30 | ctx=ctx, | |
31 | logger=log.getChild('ceph_manager'), | |
32 | ) | |
33 | ||
34 | while len(manager.get_osd_status()['up']) < num_osds: | |
35 | time.sleep(10) | |
36 | ||
37 | testdir = teuthology.get_testdir(ctx) | |
38 | ||
39 | while True: | |
40 | for i in range(num_osds): | |
9f95a23c | 41 | (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys() |
7c673cae FG |
42 | p = osd_remote.run( |
43 | args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], | |
44 | wait=True, | |
45 | check_status=False, | |
46 | ) | |
47 | exit_status = p.exitstatus | |
48 | ||
49 | if exit_status == 0: | |
50 | log.info("osd %d has an error" % i) | |
51 | raise Exception("osd %d error" % i) | |
52 | ||
53 | log_path = '/var/log/ceph/osd.%d.log' % (i) | |
54 | ||
55 | p = osd_remote.run( | |
56 | args = [ | |
57 | 'tail', '-1', log_path, | |
58 | run.Raw('|'), | |
59 | 'grep', '-q', 'end dump' | |
60 | ], | |
61 | wait=True, | |
62 | check_status=False, | |
63 | ) | |
64 | exit_status = p.exitstatus | |
65 | ||
66 | if exit_status == 0: | |
67 | log.info("osd %d dumped core" % i) | |
68 | raise Exception("osd %d dumped core" % i) | |
69 | ||
70 | time.sleep(5) |