]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Monitor recovery | |
3 | """ | |
4 | import logging | |
e306af50 | 5 | from tasks import ceph_manager |
7c673cae FG |
6 | from teuthology import misc as teuthology |
7 | ||
8 | ||
9 | log = logging.getLogger(__name__) | |
10 | ||
11 | def task(ctx, config): | |
12 | """ | |
13 | Test monitor recovery. | |
14 | """ | |
15 | if config is None: | |
16 | config = {} | |
17 | assert isinstance(config, dict), \ | |
18 | 'task only accepts a dict for configuration' | |
19 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 20 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
21 | |
22 | manager = ceph_manager.CephManager( | |
23 | mon, | |
24 | ctx=ctx, | |
25 | logger=log.getChild('ceph_manager'), | |
26 | ) | |
27 | ||
28 | mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] | |
29 | log.info("mon ids = %s" % mons) | |
30 | ||
31 | manager.wait_for_mon_quorum_size(len(mons)) | |
32 | ||
33 | log.info('verifying all monitors are in the quorum') | |
34 | for m in mons: | |
35 | s = manager.get_mon_status(m) | |
36 | assert s['state'] == 'leader' or s['state'] == 'peon' | |
37 | assert len(s['quorum']) == len(mons) | |
38 | ||
39 | log.info('restarting each monitor in turn') | |
40 | for m in mons: | |
41 | # stop a monitor | |
42 | manager.kill_mon(m) | |
43 | manager.wait_for_mon_quorum_size(len(mons) - 1) | |
44 | ||
45 | # restart | |
46 | manager.revive_mon(m) | |
47 | manager.wait_for_mon_quorum_size(len(mons)) | |
48 | ||
49 | # in forward and reverse order, | |
50 | rmons = mons | |
51 | rmons.reverse() | |
52 | for mons in mons, rmons: | |
53 | log.info('stopping all monitors') | |
54 | for m in mons: | |
55 | manager.kill_mon(m) | |
56 | ||
57 | log.info('forming a minimal quorum for %s, then adding monitors' % mons) | |
e306af50 | 58 | qnum = (len(mons) // 2) + 1 |
7c673cae FG |
59 | num = 0 |
60 | for m in mons: | |
61 | manager.revive_mon(m) | |
62 | num += 1 | |
63 | if num >= qnum: | |
64 | manager.wait_for_mon_quorum_size(num) | |
65 | ||
66 | # on both leader and non-leader ranks... | |
67 | for rank in [0, 1]: | |
68 | # take one out | |
69 | log.info('removing mon %s' % mons[rank]) | |
70 | manager.kill_mon(mons[rank]) | |
71 | manager.wait_for_mon_quorum_size(len(mons) - 1) | |
72 | ||
73 | log.info('causing some monitor log activity') | |
74 | m = 30 | |
75 | for n in range(1, m): | |
76 | manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) | |
77 | ||
78 | log.info('adding mon %s back in' % mons[rank]) | |
79 | manager.revive_mon(mons[rank]) | |
80 | manager.wait_for_mon_quorum_size(len(mons)) |