]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/reg11184.py
update sources to v12.1.1
[ceph.git] / ceph / qa / tasks / reg11184.py
CommitLineData
7c673cae
FG
1"""
2Special regression test for tracker #11184
3
4Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
5
6This is accomplished by moving a pg that wasn't part of split and still include
7divergent priors.
8"""
9import logging
10import time
11from cStringIO import StringIO
12
13from teuthology import misc as teuthology
14from util.rados import rados
15import os
16
17
18log = logging.getLogger(__name__)
19
20
21def task(ctx, config):
22 """
23 Test handling of divergent entries during export / import
24 to regression test tracker #11184
25
26 overrides:
27 ceph:
28 conf:
29 osd:
30 debug osd: 5
31
32 Requires 3 osds on a single test node.
33 """
34 if config is None:
35 config = {}
36 assert isinstance(config, dict), \
37 'divergent_priors task only accepts a dict for configuration'
38
39 manager = ctx.managers['ceph']
40
41 while len(manager.get_osd_status()['up']) < 3:
42 time.sleep(10)
224ce89b
WB
43 osds = [0, 1, 2]
44 manager.flush_pg_stats(osds)
7c673cae
FG
45 manager.raw_cluster_cmd('osd', 'set', 'noout')
46 manager.raw_cluster_cmd('osd', 'set', 'noin')
47 manager.raw_cluster_cmd('osd', 'set', 'nodown')
48 manager.wait_for_clean()
49
50 # something that is always there
51 dummyfile = '/etc/fstab'
52 dummyfile2 = '/etc/resolv.conf'
53 testdir = teuthology.get_testdir(ctx)
54
55 # create 1 pg pool
56 log.info('creating foo')
57 manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
58
224ce89b
WB
59 # Remove extra pool to simlify log output
60 manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it')
61
7c673cae
FG
62 for i in osds:
63 manager.set_config(i, osd_min_pg_log_entries=10)
64 manager.set_config(i, osd_max_pg_log_entries=10)
65 manager.set_config(i, osd_pg_log_trim_min=5)
66
67 # determine primary
68 divergent = manager.get_pg_primary('foo', 0)
69 log.info("primary and soon to be divergent is %d", divergent)
70 non_divergent = list(osds)
71 non_divergent.remove(divergent)
72
73 log.info('writing initial objects')
74 first_mon = teuthology.get_first_mon(ctx, config)
75 (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
76 # write 100 objects
77 for i in range(100):
78 rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
79
80 manager.wait_for_clean()
81
82 # blackhole non_divergent
83 log.info("blackholing osds %s", str(non_divergent))
84 for i in non_divergent:
85 manager.set_config(i, objectstore_blackhole=1)
86
87 DIVERGENT_WRITE = 5
88 DIVERGENT_REMOVE = 5
89 # Write some soon to be divergent
90 log.info('writing divergent objects')
91 for i in range(DIVERGENT_WRITE):
92 rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
93 dummyfile2], wait=False)
94 # Remove some soon to be divergent
95 log.info('remove divergent objects')
96 for i in range(DIVERGENT_REMOVE):
97 rados(ctx, mon, ['-p', 'foo', 'rm',
98 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
99 time.sleep(10)
100 mon.run(
101 args=['killall', '-9', 'rados'],
102 wait=True,
103 check_status=False)
104
105 # kill all the osds but leave divergent in
106 log.info('killing all the osds')
107 for i in osds:
108 manager.kill_osd(i)
109 for i in osds:
110 manager.mark_down_osd(i)
111 for i in non_divergent:
112 manager.mark_out_osd(i)
113
114 # bring up non-divergent
115 log.info("bringing up non_divergent %s", str(non_divergent))
116 for i in non_divergent:
117 manager.revive_osd(i)
118 for i in non_divergent:
119 manager.mark_in_osd(i)
120
121 # write 1 non-divergent object (ensure that old divergent one is divergent)
122 objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
123 log.info('writing non-divergent object ' + objname)
124 rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
125
126 manager.wait_for_recovery()
127
128 # ensure no recovery of up osds first
129 log.info('delay recovery')
130 for i in non_divergent:
131 manager.wait_run_admin_socket(
132 'osd', i, ['set_recovery_delay', '100000'])
133
134 # bring in our divergent friend
135 log.info("revive divergent %d", divergent)
136 manager.raw_cluster_cmd('osd', 'set', 'noup')
137 manager.revive_osd(divergent)
138
139 log.info('delay recovery divergent')
140 manager.wait_run_admin_socket(
141 'osd', divergent, ['set_recovery_delay', '100000'])
142
143 manager.raw_cluster_cmd('osd', 'unset', 'noup')
144 while len(manager.get_osd_status()['up']) < 3:
145 time.sleep(10)
146
147 log.info('wait for peering')
148 rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
149
150 # At this point the divergent_priors should have been detected
151
152 log.info("killing divergent %d", divergent)
153 manager.kill_osd(divergent)
154
155 # Split pgs for pool foo
156 manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
157 time.sleep(5)
158
224ce89b
WB
159 manager.raw_cluster_cmd('pg','dump')
160
7c673cae
FG
161 # Export a pg
162 (exp_remote,) = ctx.\
163 cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys()
164 FSPATH = manager.get_filepath()
165 JPATH = os.path.join(FSPATH, "journal")
166 prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
167 "--data-path {fpath} --journal-path {jpath} "
168 "--log-file="
169 "/var/log/ceph/objectstore_tool.$$.log ".
170 format(fpath=FSPATH, jpath=JPATH))
171 pid = os.getpid()
172 expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
224ce89b 173 cmd = ((prefix + "--op export --pgid 2.0 --file {file}").
7c673cae
FG
174 format(id=divergent, file=expfile))
175 proc = exp_remote.run(args=cmd, wait=True,
176 check_status=False, stdout=StringIO())
177 assert proc.exitstatus == 0
178
179 # Remove the same pg that was exported
224ce89b
WB
180 cmd = ((prefix + "--op remove --pgid 2.0").
181 format(id=divergent))
7c673cae
FG
182 proc = exp_remote.run(args=cmd, wait=True,
183 check_status=False, stdout=StringIO())
184 assert proc.exitstatus == 0
185
186 # Kill one of non-divergent OSDs
224ce89b
WB
187 log.info('killing osd.%d' % non_divergent[0])
188 manager.kill_osd(non_divergent[0])
189 manager.mark_down_osd(non_divergent[0])
190 # manager.mark_out_osd(non_divergent[0])
191
192 # An empty collection for pg 2.0 needs to be cleaned up
193 cmd = ((prefix + "--op remove --pgid 2.0").
194 format(id=non_divergent[0]))
195 proc = exp_remote.run(args=cmd, wait=True,
196 check_status=False, stdout=StringIO())
197 assert proc.exitstatus == 0
7c673cae
FG
198
199 cmd = ((prefix + "--op import --file {file}").
224ce89b 200 format(id=non_divergent[0], file=expfile))
7c673cae
FG
201 proc = exp_remote.run(args=cmd, wait=True,
202 check_status=False, stdout=StringIO())
203 assert proc.exitstatus == 0
204
205 # bring in our divergent friend and other node
206 log.info("revive divergent %d", divergent)
207 manager.revive_osd(divergent)
208 manager.mark_in_osd(divergent)
224ce89b
WB
209 log.info("revive %d", non_divergent[0])
210 manager.revive_osd(non_divergent[0])
7c673cae
FG
211
212 while len(manager.get_osd_status()['up']) < 3:
213 time.sleep(10)
214
215 log.info('delay recovery divergent')
216 manager.set_config(divergent, osd_recovery_delay_start=100000)
217 log.info('mark divergent in')
218 manager.mark_in_osd(divergent)
219
220 log.info('wait for peering')
221 rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
222
223 log.info("killing divergent %d", divergent)
224 manager.kill_osd(divergent)
225 log.info("reviving divergent %d", divergent)
226 manager.revive_osd(divergent)
227 time.sleep(3)
228
229 log.info('allowing recovery')
230 # Set osd_recovery_delay_start back to 0 and kick the queue
231 for i in osds:
232 manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
233 'kick_recovery_wq', ' 0')
234
235 log.info('reading divergent objects')
236 for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
237 exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
238 '/tmp/existing'])
239 assert exit_status is 0
240
241 (remote,) = ctx.\
242 cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys()
243 cmd = 'rm {file}'.format(file=expfile)
244 remote.run(args=cmd, wait=True)
245 log.info("success")