]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mon_recovery.py
bump version to 18.2.4-pve3
[ceph.git] / ceph / qa / tasks / mon_recovery.py
CommitLineData
7c673cae
FG
1"""
2Monitor recovery
3"""
4import logging
e306af50 5from tasks import ceph_manager
7c673cae
FG
6from teuthology import misc as teuthology
7
8
9log = logging.getLogger(__name__)
10
11def task(ctx, config):
12 """
13 Test monitor recovery.
14 """
15 if config is None:
16 config = {}
17 assert isinstance(config, dict), \
18 'task only accepts a dict for configuration'
19 first_mon = teuthology.get_first_mon(ctx, config)
9f95a23c 20 (mon,) = ctx.cluster.only(first_mon).remotes.keys()
7c673cae
FG
21
22 manager = ceph_manager.CephManager(
23 mon,
24 ctx=ctx,
25 logger=log.getChild('ceph_manager'),
26 )
27
28 mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
29 log.info("mon ids = %s" % mons)
30
31 manager.wait_for_mon_quorum_size(len(mons))
32
33 log.info('verifying all monitors are in the quorum')
34 for m in mons:
35 s = manager.get_mon_status(m)
36 assert s['state'] == 'leader' or s['state'] == 'peon'
37 assert len(s['quorum']) == len(mons)
38
39 log.info('restarting each monitor in turn')
40 for m in mons:
41 # stop a monitor
42 manager.kill_mon(m)
43 manager.wait_for_mon_quorum_size(len(mons) - 1)
44
45 # restart
46 manager.revive_mon(m)
47 manager.wait_for_mon_quorum_size(len(mons))
48
49 # in forward and reverse order,
50 rmons = mons
51 rmons.reverse()
52 for mons in mons, rmons:
53 log.info('stopping all monitors')
54 for m in mons:
55 manager.kill_mon(m)
56
57 log.info('forming a minimal quorum for %s, then adding monitors' % mons)
e306af50 58 qnum = (len(mons) // 2) + 1
7c673cae
FG
59 num = 0
60 for m in mons:
61 manager.revive_mon(m)
62 num += 1
63 if num >= qnum:
64 manager.wait_for_mon_quorum_size(num)
65
66 # on both leader and non-leader ranks...
67 for rank in [0, 1]:
68 # take one out
69 log.info('removing mon %s' % mons[rank])
70 manager.kill_mon(mons[rank])
71 manager.wait_for_mon_quorum_size(len(mons) - 1)
72
73 log.info('causing some monitor log activity')
74 m = 30
75 for n in range(1, m):
76 manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
77
78 log.info('adding mon %s back in' % mons[rank])
79 manager.revive_mon(mons[rank])
80 manager.wait_for_mon_quorum_size(len(mons))