]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/peer.py
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / qa / tasks / peer.py
1 """
2 Peer test (Single test, not much configurable here)
3 """
4 import logging
5 import json
6 import time
7
8 import ceph_manager
9 from teuthology import misc as teuthology
10 from util.rados import rados
11
12 log = logging.getLogger(__name__)
13
14 def task(ctx, config):
15 """
16 Test peering.
17 """
18 if config is None:
19 config = {}
20 assert isinstance(config, dict), \
21 'peer task only accepts a dict for configuration'
22 first_mon = teuthology.get_first_mon(ctx, config)
23 (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
24
25 manager = ceph_manager.CephManager(
26 mon,
27 ctx=ctx,
28 logger=log.getChild('ceph_manager'),
29 )
30
31 while len(manager.get_osd_status()['up']) < 3:
32 time.sleep(10)
33 manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
34 manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
35 manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
36 manager.wait_for_clean()
37
38 for i in range(3):
39 manager.set_config(
40 i,
41 osd_recovery_delay_start=120)
42
43 # take on osd down
44 manager.kill_osd(2)
45 manager.mark_down_osd(2)
46
47 # kludge to make sure they get a map
48 rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])
49
50 manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
51 manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
52 manager.wait_for_recovery()
53
54 # kill another and revive 2, so that some pgs can't peer.
55 manager.kill_osd(1)
56 manager.mark_down_osd(1)
57 manager.revive_osd(2)
58 manager.wait_till_osd_is_up(2)
59
60 manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
61 manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
62
63 manager.wait_for_active_or_down()
64
65 manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
66 manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
67
68 # look for down pgs
69 num_down_pgs = 0
70 pgs = manager.get_pg_stats()
71 for pg in pgs:
72 out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
73 log.debug("out string %s",out)
74 j = json.loads(out)
75 log.info("pg is %s, query json is %s", pg, j)
76
77 if pg['state'].count('down'):
78 num_down_pgs += 1
79 # verify that it is blocked on osd.1
80 rs = j['recovery_state']
81 assert len(rs) >= 2
82 assert rs[0]['name'] == 'Started/Primary/Peering/Down'
83 assert rs[1]['name'] == 'Started/Primary/Peering'
84 assert rs[1]['blocked']
85 assert rs[1]['down_osds_we_would_probe'] == [1]
86 assert len(rs[1]['peering_blocked_by']) == 1
87 assert rs[1]['peering_blocked_by'][0]['osd'] == 1
88
89 assert num_down_pgs > 0
90
91 # bring it all back
92 manager.revive_osd(1)
93 manager.wait_till_osd_is_up(1)
94 manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
95 manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
96 manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
97 manager.wait_for_clean()