]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Special case divergence test | |
3 | """ | |
4 | import logging | |
5 | import time | |
6 | ||
7 | from teuthology import misc as teuthology | |
e306af50 | 8 | from tasks.util.rados import rados |
7c673cae FG |
9 | |
10 | ||
11 | log = logging.getLogger(__name__) | |
12 | ||
13 | ||
14 | def task(ctx, config): | |
15 | """ | |
16 | Test handling of divergent entries with prior_version | |
17 | prior to log_tail | |
18 | ||
19 | overrides: | |
20 | ceph: | |
21 | conf: | |
22 | osd: | |
23 | debug osd: 5 | |
24 | ||
25 | Requires 3 osds on a single test node. | |
26 | """ | |
27 | if config is None: | |
28 | config = {} | |
29 | assert isinstance(config, dict), \ | |
30 | 'divergent_priors task only accepts a dict for configuration' | |
31 | ||
32 | manager = ctx.managers['ceph'] | |
33 | ||
34 | while len(manager.get_osd_status()['up']) < 3: | |
35 | time.sleep(10) | |
31f18b77 | 36 | manager.flush_pg_stats([0, 1, 2]) |
7c673cae FG |
37 | manager.raw_cluster_cmd('osd', 'set', 'noout') |
38 | manager.raw_cluster_cmd('osd', 'set', 'noin') | |
39 | manager.raw_cluster_cmd('osd', 'set', 'nodown') | |
40 | manager.wait_for_clean() | |
41 | ||
42 | # something that is always there | |
43 | dummyfile = '/etc/fstab' | |
44 | dummyfile2 = '/etc/resolv.conf' | |
45 | ||
46 | # create 1 pg pool | |
47 | log.info('creating foo') | |
48 | manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') | |
49 | ||
50 | osds = [0, 1, 2] | |
51 | for i in osds: | |
52 | manager.set_config(i, osd_min_pg_log_entries=10) | |
53 | manager.set_config(i, osd_max_pg_log_entries=10) | |
54 | manager.set_config(i, osd_pg_log_trim_min=5) | |
55 | ||
56 | # determine primary | |
57 | divergent = manager.get_pg_primary('foo', 0) | |
58 | log.info("primary and soon to be divergent is %d", divergent) | |
59 | non_divergent = list(osds) | |
60 | non_divergent.remove(divergent) | |
61 | ||
62 | log.info('writing initial objects') | |
63 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 64 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
65 | # write 100 objects |
66 | for i in range(100): | |
67 | rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) | |
68 | ||
69 | manager.wait_for_clean() | |
70 | ||
71 | # blackhole non_divergent | |
72 | log.info("blackholing osds %s", str(non_divergent)) | |
73 | for i in non_divergent: | |
74 | manager.set_config(i, objectstore_blackhole=1) | |
75 | ||
76 | DIVERGENT_WRITE = 5 | |
77 | DIVERGENT_REMOVE = 5 | |
78 | # Write some soon to be divergent | |
79 | log.info('writing divergent objects') | |
80 | for i in range(DIVERGENT_WRITE): | |
81 | rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, | |
82 | dummyfile2], wait=False) | |
83 | # Remove some soon to be divergent | |
84 | log.info('remove divergent objects') | |
85 | for i in range(DIVERGENT_REMOVE): | |
86 | rados(ctx, mon, ['-p', 'foo', 'rm', | |
87 | 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) | |
88 | time.sleep(10) | |
89 | mon.run( | |
90 | args=['killall', '-9', 'rados'], | |
91 | wait=True, | |
92 | check_status=False) | |
93 | ||
94 | # kill all the osds but leave divergent in | |
95 | log.info('killing all the osds') | |
96 | for i in osds: | |
97 | manager.kill_osd(i) | |
98 | for i in osds: | |
99 | manager.mark_down_osd(i) | |
100 | for i in non_divergent: | |
101 | manager.mark_out_osd(i) | |
102 | ||
103 | # bring up non-divergent | |
104 | log.info("bringing up non_divergent %s", str(non_divergent)) | |
105 | for i in non_divergent: | |
106 | manager.revive_osd(i) | |
107 | for i in non_divergent: | |
108 | manager.mark_in_osd(i) | |
109 | ||
110 | # write 1 non-divergent object (ensure that old divergent one is divergent) | |
111 | objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) | |
112 | log.info('writing non-divergent object ' + objname) | |
113 | rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) | |
114 | ||
115 | manager.wait_for_recovery() | |
116 | ||
117 | # ensure no recovery of up osds first | |
118 | log.info('delay recovery') | |
119 | for i in non_divergent: | |
120 | manager.wait_run_admin_socket( | |
121 | 'osd', i, ['set_recovery_delay', '100000']) | |
122 | ||
123 | # bring in our divergent friend | |
124 | log.info("revive divergent %d", divergent) | |
125 | manager.raw_cluster_cmd('osd', 'set', 'noup') | |
126 | manager.revive_osd(divergent) | |
127 | ||
128 | log.info('delay recovery divergent') | |
129 | manager.wait_run_admin_socket( | |
130 | 'osd', divergent, ['set_recovery_delay', '100000']) | |
131 | ||
132 | manager.raw_cluster_cmd('osd', 'unset', 'noup') | |
133 | while len(manager.get_osd_status()['up']) < 3: | |
134 | time.sleep(10) | |
135 | ||
136 | log.info('wait for peering') | |
137 | rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) | |
138 | ||
139 | # At this point the divergent_priors should have been detected | |
140 | ||
141 | log.info("killing divergent %d", divergent) | |
142 | manager.kill_osd(divergent) | |
143 | log.info("reviving divergent %d", divergent) | |
144 | manager.revive_osd(divergent) | |
145 | ||
146 | time.sleep(20) | |
147 | ||
148 | log.info('allowing recovery') | |
149 | # Set osd_recovery_delay_start back to 0 and kick the queue | |
150 | for i in osds: | |
151 | manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', | |
152 | 'kick_recovery_wq', ' 0') | |
153 | ||
154 | log.info('reading divergent objects') | |
155 | for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): | |
156 | exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, | |
157 | '/tmp/existing']) | |
9f95a23c | 158 | assert exit_status == 0 |
7c673cae FG |
159 | |
160 | log.info("success") |