]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Peer test (Single test, not much configurable here) | |
3 | """ | |
4 | import logging | |
5 | import json | |
6 | import time | |
7 | ||
8 | import ceph_manager | |
9 | from teuthology import misc as teuthology | |
10 | from util.rados import rados | |
11 | ||
12 | log = logging.getLogger(__name__) | |
13 | ||
14 | def task(ctx, config): | |
15 | """ | |
16 | Test peering. | |
17 | """ | |
18 | if config is None: | |
19 | config = {} | |
20 | assert isinstance(config, dict), \ | |
21 | 'peer task only accepts a dict for configuration' | |
22 | first_mon = teuthology.get_first_mon(ctx, config) | |
23 | (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() | |
24 | ||
25 | manager = ceph_manager.CephManager( | |
26 | mon, | |
27 | ctx=ctx, | |
28 | logger=log.getChild('ceph_manager'), | |
29 | ) | |
30 | ||
31 | while len(manager.get_osd_status()['up']) < 3: | |
32 | time.sleep(10) | |
31f18b77 | 33 | manager.flush_pg_stats([0, 1, 2]) |
7c673cae FG |
34 | manager.wait_for_clean() |
35 | ||
36 | for i in range(3): | |
37 | manager.set_config( | |
38 | i, | |
39 | osd_recovery_delay_start=120) | |
40 | ||
41 | # take on osd down | |
42 | manager.kill_osd(2) | |
43 | manager.mark_down_osd(2) | |
44 | ||
45 | # kludge to make sure they get a map | |
46 | rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) | |
47 | ||
31f18b77 | 48 | manager.flush_pg_stats([0, 1]) |
7c673cae FG |
49 | manager.wait_for_recovery() |
50 | ||
51 | # kill another and revive 2, so that some pgs can't peer. | |
52 | manager.kill_osd(1) | |
53 | manager.mark_down_osd(1) | |
54 | manager.revive_osd(2) | |
55 | manager.wait_till_osd_is_up(2) | |
56 | ||
31f18b77 | 57 | manager.flush_pg_stats([0, 2]) |
7c673cae FG |
58 | |
59 | manager.wait_for_active_or_down() | |
60 | ||
31f18b77 | 61 | manager.flush_pg_stats([0, 2]) |
7c673cae FG |
62 | |
63 | # look for down pgs | |
64 | num_down_pgs = 0 | |
65 | pgs = manager.get_pg_stats() | |
66 | for pg in pgs: | |
67 | out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') | |
68 | log.debug("out string %s",out) | |
69 | j = json.loads(out) | |
70 | log.info("pg is %s, query json is %s", pg, j) | |
71 | ||
72 | if pg['state'].count('down'): | |
73 | num_down_pgs += 1 | |
74 | # verify that it is blocked on osd.1 | |
75 | rs = j['recovery_state'] | |
76 | assert len(rs) >= 2 | |
77 | assert rs[0]['name'] == 'Started/Primary/Peering/Down' | |
78 | assert rs[1]['name'] == 'Started/Primary/Peering' | |
79 | assert rs[1]['blocked'] | |
80 | assert rs[1]['down_osds_we_would_probe'] == [1] | |
81 | assert len(rs[1]['peering_blocked_by']) == 1 | |
82 | assert rs[1]['peering_blocked_by'][0]['osd'] == 1 | |
83 | ||
84 | assert num_down_pgs > 0 | |
85 | ||
86 | # bring it all back | |
87 | manager.revive_osd(1) | |
88 | manager.wait_till_osd_is_up(1) | |
31f18b77 | 89 | manager.flush_pg_stats([0, 1, 2]) |
7c673cae | 90 | manager.wait_for_clean() |