]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/die_on_err.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / die_on_err.py
CommitLineData
7c673cae
FG
1"""
2Raise exceptions on osd coredumps or test err directories
3"""
4import contextlib
5import logging
6import time
7from teuthology.orchestra import run
8
e306af50 9from tasks import ceph_manager
7c673cae
FG
10from teuthology import misc as teuthology
11
12log = logging.getLogger(__name__)
13
14@contextlib.contextmanager
15def task(ctx, config):
16 """
17 Die if {testdir}/err exists or if an OSD dumps core
18 """
19 if config is None:
20 config = {}
21
22 first_mon = teuthology.get_first_mon(ctx, config)
9f95a23c 23 (mon,) = ctx.cluster.only(first_mon).remotes.keys()
7c673cae
FG
24
25 num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
26 log.info('num_osds is %s' % num_osds)
27
28 manager = ceph_manager.CephManager(
29 mon,
30 ctx=ctx,
31 logger=log.getChild('ceph_manager'),
32 )
33
34 while len(manager.get_osd_status()['up']) < num_osds:
35 time.sleep(10)
36
37 testdir = teuthology.get_testdir(ctx)
38
39 while True:
40 for i in range(num_osds):
9f95a23c 41 (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys()
7c673cae
FG
42 p = osd_remote.run(
43 args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
44 wait=True,
45 check_status=False,
46 )
47 exit_status = p.exitstatus
48
49 if exit_status == 0:
50 log.info("osd %d has an error" % i)
51 raise Exception("osd %d error" % i)
52
53 log_path = '/var/log/ceph/osd.%d.log' % (i)
54
55 p = osd_remote.run(
56 args = [
57 'tail', '-1', log_path,
58 run.Raw('|'),
59 'grep', '-q', 'end dump'
60 ],
61 wait=True,
62 check_status=False,
63 )
64 exit_status = p.exitstatus
65
66 if exit_status == 0:
67 log.info("osd %d dumped core" % i)
68 raise Exception("osd %d dumped core" % i)
69
70 time.sleep(5)