]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | """Scrub testing""" |
7c673cae FG |
2 | |
3 | import contextlib | |
4 | import json | |
5 | import logging | |
6 | import os | |
7 | import time | |
8 | import tempfile | |
9 | ||
e306af50 | 10 | from tasks import ceph_manager |
7c673cae FG |
11 | from teuthology import misc as teuthology |
12 | ||
13 | log = logging.getLogger(__name__) | |
14 | ||
15 | ||
16 | def wait_for_victim_pg(manager): | |
17 | """Return a PG with some data and its acting set""" | |
18 | # wait for some PG to have data that we can mess with | |
19 | victim = None | |
20 | while victim is None: | |
21 | stats = manager.get_pg_stats() | |
22 | for pg in stats: | |
23 | size = pg['stat_sum']['num_bytes'] | |
24 | if size > 0: | |
25 | victim = pg['pgid'] | |
26 | acting = pg['acting'] | |
27 | return victim, acting | |
28 | time.sleep(3) | |
29 | ||
30 | ||
31 | def find_victim_object(ctx, pg, osd): | |
32 | """Return a file to be fuzzed""" | |
9f95a23c | 33 | (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys() |
7c673cae FG |
34 | data_path = os.path.join( |
35 | '/var/lib/ceph/osd', | |
36 | 'ceph-{id}'.format(id=osd), | |
37 | 'fuse', | |
38 | '{pg}_head'.format(pg=pg), | |
39 | 'all', | |
40 | ) | |
41 | ||
42 | # fuzz time | |
9f95a23c | 43 | ls_out = osd_remote.sh('sudo ls %s' % data_path) |
7c673cae FG |
44 | |
45 | # find an object file we can mess with (and not the pg info object) | |
46 | osdfilename = next(line for line in ls_out.split('\n') | |
47 | if not line.endswith('::::head#')) | |
48 | assert osdfilename is not None | |
49 | ||
50 | # Get actual object name from osd stored filename | |
51 | objname = osdfilename.split(':')[4] | |
52 | return osd_remote, os.path.join(data_path, osdfilename), objname | |
53 | ||
54 | ||
55 | def corrupt_file(osd_remote, path): | |
56 | # put a single \0 at the beginning of the file | |
57 | osd_remote.run( | |
58 | args=['sudo', 'dd', | |
59 | 'if=/dev/zero', | |
60 | 'of=%s/data' % path, | |
61 | 'bs=1', 'count=1', 'conv=notrunc'] | |
62 | ) | |
63 | ||
64 | ||
65 | def get_pgnum(pgid): | |
66 | pos = pgid.find('.') | |
67 | assert pos != -1 | |
68 | return pgid[pos+1:] | |
69 | ||
70 | ||
71 | def deep_scrub(manager, victim, pool): | |
72 | # scrub, verify inconsistent | |
73 | pgnum = get_pgnum(victim) | |
74 | manager.do_pg_scrub(pool, pgnum, 'deep-scrub') | |
75 | ||
76 | stats = manager.get_single_pg_stats(victim) | |
77 | inconsistent = stats['state'].find('+inconsistent') != -1 | |
78 | assert inconsistent | |
79 | ||
80 | ||
81 | def repair(manager, victim, pool): | |
82 | # repair, verify no longer inconsistent | |
83 | pgnum = get_pgnum(victim) | |
84 | manager.do_pg_scrub(pool, pgnum, 'repair') | |
85 | ||
86 | stats = manager.get_single_pg_stats(victim) | |
87 | inconsistent = stats['state'].find('+inconsistent') != -1 | |
88 | assert not inconsistent | |
89 | ||
90 | ||
91 | def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool): | |
92 | corrupt_file(osd_remote, obj_path) | |
93 | deep_scrub(manager, pg, pool) | |
94 | repair(manager, pg, pool) | |
95 | ||
96 | ||
97 | def test_repair_bad_omap(ctx, manager, pg, osd, objname): | |
98 | # Test deep-scrub with various omap modifications | |
99 | # Modify omap on specific osd | |
100 | log.info('fuzzing omap of %s' % objname) | |
101 | manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']) | |
102 | manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, | |
103 | 'badkey', 'badval']) | |
104 | manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']) | |
105 | ||
106 | deep_scrub(manager, pg, 'rbd') | |
107 | # please note, the repair here is errnomous, it rewrites the correct omap | |
108 | # digest and data digest on the replicas with the corresponding digests | |
109 | # from the primary osd which is hosting the victim object, see | |
110 | # find_victim_object(). | |
111 | # so we need to either put this test and the end of this task or | |
112 | # undo the mess-up manually before the "repair()" that just ensures | |
113 | # the cleanup is sane, otherwise the succeeding tests will fail. if they | |
114 | # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub. | |
115 | manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr']) | |
116 | manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey']) | |
117 | manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, | |
118 | 'key', 'val']) | |
119 | repair(manager, pg, 'rbd') | |
120 | ||
121 | ||
122 | class MessUp: | |
123 | def __init__(self, manager, osd_remote, pool, osd_id, | |
124 | obj_name, obj_path, omap_key, omap_val): | |
125 | self.manager = manager | |
126 | self.osd = osd_remote | |
127 | self.pool = pool | |
128 | self.osd_id = osd_id | |
129 | self.obj = obj_name | |
130 | self.path = obj_path | |
131 | self.omap_key = omap_key | |
132 | self.omap_val = omap_val | |
133 | ||
134 | @contextlib.contextmanager | |
135 | def _test_with_file(self, messup_cmd, *checks): | |
136 | temp = tempfile.mktemp() | |
137 | backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp] | |
138 | self.osd.run(args=backup_cmd) | |
139 | self.osd.run(args=messup_cmd.split()) | |
140 | yield checks | |
141 | create_cmd = ['sudo', 'mkdir', self.path] | |
142 | self.osd.run(args=create_cmd, check_status=False) | |
143 | restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')] | |
144 | self.osd.run(args=restore_cmd) | |
145 | ||
146 | def remove(self): | |
147 | cmd = 'sudo rmdir {path}'.format(path=self.path) | |
148 | return self._test_with_file(cmd, 'missing') | |
149 | ||
150 | def append(self): | |
151 | cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ | |
152 | 'conv=notrunc oflag=append'.format(path=self.path) | |
153 | return self._test_with_file(cmd, | |
154 | 'data_digest_mismatch', | |
155 | 'size_mismatch') | |
156 | ||
157 | def truncate(self): | |
158 | cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path) | |
159 | return self._test_with_file(cmd, | |
160 | 'data_digest_mismatch', | |
161 | 'size_mismatch') | |
162 | ||
163 | def change_obj(self): | |
164 | cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ | |
165 | 'conv=notrunc'.format(path=self.path) | |
166 | return self._test_with_file(cmd, | |
167 | 'data_digest_mismatch') | |
168 | ||
169 | @contextlib.contextmanager | |
170 | def rm_omap(self): | |
171 | cmd = ['rmomapkey', self.pool, self.obj, self.omap_key] | |
172 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
173 | yield ('omap_digest_mismatch',) | |
174 | cmd = ['setomapval', self.pool, self.obj, | |
175 | self.omap_key, self.omap_val] | |
176 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
177 | ||
178 | @contextlib.contextmanager | |
179 | def add_omap(self): | |
180 | cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval'] | |
181 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
182 | yield ('omap_digest_mismatch',) | |
183 | cmd = ['rmomapkey', self.pool, self.obj, 'badkey'] | |
184 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
185 | ||
186 | @contextlib.contextmanager | |
187 | def change_omap(self): | |
188 | cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval'] | |
189 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
190 | yield ('omap_digest_mismatch',) | |
191 | cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val] | |
192 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
193 | ||
194 | ||
195 | class InconsistentObjChecker: | |
196 | """Check the returned inconsistents/inconsistent info""" | |
197 | ||
198 | def __init__(self, osd, acting, obj_name): | |
199 | self.osd = osd | |
200 | self.acting = acting | |
201 | self.obj = obj_name | |
202 | assert self.osd in self.acting | |
203 | ||
204 | def basic_checks(self, inc): | |
205 | assert inc['object']['name'] == self.obj | |
206 | assert inc['object']['snap'] == "head" | |
207 | assert len(inc['shards']) == len(self.acting), \ | |
208 | "the number of returned shard does not match with the acting set" | |
209 | ||
210 | def run(self, check, inc): | |
211 | func = getattr(self, check) | |
212 | func(inc) | |
213 | ||
214 | def _check_errors(self, inc, err_name): | |
215 | bad_found = False | |
216 | good_found = False | |
217 | for shard in inc['shards']: | |
218 | log.info('shard = %r' % shard) | |
219 | log.info('err = %s' % err_name) | |
220 | assert 'osd' in shard | |
221 | osd = shard['osd'] | |
222 | err = err_name in shard['errors'] | |
223 | if osd == self.osd: | |
224 | assert bad_found is False, \ | |
225 | "multiple entries found for the given OSD" | |
226 | assert err is True, \ | |
227 | "Didn't find '{err}' in errors".format(err=err_name) | |
228 | bad_found = True | |
229 | else: | |
230 | assert osd in self.acting, "shard not in acting set" | |
231 | assert err is False, \ | |
232 | "Expected '{err}' in errors".format(err=err_name) | |
233 | good_found = True | |
234 | assert bad_found is True, \ | |
235 | "Shard for osd.{osd} not found".format(osd=self.osd) | |
236 | assert good_found is True, \ | |
237 | "No other acting shards found" | |
238 | ||
239 | def _check_attrs(self, inc, attr_name): | |
240 | bad_attr = None | |
241 | good_attr = None | |
242 | for shard in inc['shards']: | |
243 | log.info('shard = %r' % shard) | |
244 | log.info('attr = %s' % attr_name) | |
245 | assert 'osd' in shard | |
246 | osd = shard['osd'] | |
247 | attr = shard.get(attr_name, False) | |
248 | if osd == self.osd: | |
249 | assert bad_attr is None, \ | |
250 | "multiple entries found for the given OSD" | |
251 | bad_attr = attr | |
252 | else: | |
253 | assert osd in self.acting, "shard not in acting set" | |
254 | assert good_attr is None or good_attr == attr, \ | |
255 | "multiple good attrs found" | |
256 | good_attr = attr | |
257 | assert bad_attr is not None, \ | |
258 | "bad {attr} not found".format(attr=attr_name) | |
259 | assert good_attr is not None, \ | |
260 | "good {attr} not found".format(attr=attr_name) | |
261 | assert good_attr != bad_attr, \ | |
262 | "bad attr is identical to the good ones: " \ | |
263 | "{0} == {1}".format(good_attr, bad_attr) | |
264 | ||
265 | def data_digest_mismatch(self, inc): | |
266 | assert 'data_digest_mismatch' in inc['errors'] | |
267 | self._check_attrs(inc, 'data_digest') | |
268 | ||
269 | def missing(self, inc): | |
270 | assert 'missing' in inc['union_shard_errors'] | |
271 | self._check_errors(inc, 'missing') | |
272 | ||
273 | def size_mismatch(self, inc): | |
274 | assert 'size_mismatch' in inc['errors'] | |
275 | self._check_attrs(inc, 'size') | |
276 | ||
277 | def omap_digest_mismatch(self, inc): | |
278 | assert 'omap_digest_mismatch' in inc['errors'] | |
279 | self._check_attrs(inc, 'omap_digest') | |
280 | ||
281 | ||
282 | def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id, | |
283 | obj_name, obj_path): | |
284 | mon = manager.controller | |
285 | pool = 'rbd' | |
286 | omap_key = 'key' | |
287 | omap_val = 'val' | |
288 | manager.do_rados(mon, ['-p', pool, 'setomapval', obj_name, | |
289 | omap_key, omap_val]) | |
290 | # Update missing digests, requires "osd deep scrub update digest min age: 0" | |
291 | pgnum = get_pgnum(pg) | |
292 | manager.do_pg_scrub(pool, pgnum, 'deep-scrub') | |
293 | ||
294 | messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path, | |
295 | omap_key, omap_val) | |
296 | for test in [messup.rm_omap, messup.add_omap, messup.change_omap, | |
297 | messup.append, messup.truncate, messup.change_obj, | |
298 | messup.remove]: | |
299 | with test() as checks: | |
300 | deep_scrub(manager, pg, pool) | |
301 | cmd = 'rados list-inconsistent-pg {pool} ' \ | |
302 | '--format=json'.format(pool=pool) | |
9f95a23c | 303 | pgs = json.loads(mon.sh(cmd)) |
7c673cae FG |
304 | assert pgs == [pg] |
305 | ||
306 | cmd = 'rados list-inconsistent-obj {pg} ' \ | |
307 | '--format=json'.format(pg=pg) | |
9f95a23c | 308 | objs = json.loads(mon.sh(cmd)) |
7c673cae FG |
309 | assert len(objs['inconsistents']) == 1 |
310 | ||
311 | checker = InconsistentObjChecker(osd_id, acting, obj_name) | |
312 | inc_obj = objs['inconsistents'][0] | |
313 | log.info('inc = %r', inc_obj) | |
314 | checker.basic_checks(inc_obj) | |
315 | for check in checks: | |
316 | checker.run(check, inc_obj) | |
317 | ||
318 | ||
319 | def task(ctx, config): | |
320 | """ | |
321 | Test [deep] scrub | |
322 | ||
323 | tasks: | |
324 | - chef: | |
325 | - install: | |
326 | - ceph: | |
327 | log-whitelist: | |
328 | - '!= data_digest' | |
329 | - '!= omap_digest' | |
330 | - '!= size' | |
331 | - deep-scrub 0 missing, 1 inconsistent objects | |
332 | - deep-scrub [0-9]+ errors | |
333 | - repair 0 missing, 1 inconsistent objects | |
334 | - repair [0-9]+ errors, [0-9]+ fixed | |
91327a77 | 335 | - shard [0-9]+ .* : missing |
7c673cae FG |
336 | - deep-scrub 1 missing, 1 inconsistent objects |
337 | - does not match object info size | |
338 | - attr name mistmatch | |
339 | - deep-scrub 1 missing, 0 inconsistent objects | |
340 | - failed to pick suitable auth object | |
91327a77 | 341 | - candidate size [0-9]+ info size [0-9]+ mismatch |
7c673cae FG |
342 | conf: |
343 | osd: | |
344 | osd deep scrub update digest min age: 0 | |
345 | - scrub_test: | |
346 | """ | |
347 | if config is None: | |
348 | config = {} | |
349 | assert isinstance(config, dict), \ | |
350 | 'scrub_test task only accepts a dict for configuration' | |
351 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 352 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
353 | |
354 | num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') | |
355 | log.info('num_osds is %s' % num_osds) | |
356 | ||
357 | manager = ceph_manager.CephManager( | |
358 | mon, | |
359 | ctx=ctx, | |
360 | logger=log.getChild('ceph_manager'), | |
361 | ) | |
362 | ||
363 | while len(manager.get_osd_status()['up']) < num_osds: | |
364 | time.sleep(10) | |
365 | ||
366 | for i in range(num_osds): | |
367 | manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', | |
368 | '--', '--osd-objectstore-fuse') | |
31f18b77 | 369 | manager.flush_pg_stats(range(num_osds)) |
7c673cae FG |
370 | manager.wait_for_clean() |
371 | ||
372 | # write some data | |
373 | p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', | |
374 | 'write', '-b', '4096']) | |
375 | log.info('err is %d' % p.exitstatus) | |
376 | ||
377 | # wait for some PG to have data that we can mess with | |
378 | pg, acting = wait_for_victim_pg(manager) | |
379 | osd = acting[0] | |
380 | ||
381 | osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) | |
382 | manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val']) | |
383 | log.info('err is %d' % p.exitstatus) | |
384 | manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr']) | |
385 | log.info('err is %d' % p.exitstatus) | |
386 | ||
387 | # Update missing digests, requires "osd deep scrub update digest min age: 0" | |
388 | pgnum = get_pgnum(pg) | |
389 | manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') | |
390 | ||
391 | log.info('messing with PG %s on osd %d' % (pg, osd)) | |
392 | test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') | |
393 | test_repair_bad_omap(ctx, manager, pg, osd, obj_name) | |
394 | test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, | |
395 | obj_name, obj_path) | |
396 | log.info('test successful!') | |
397 | ||
398 | # shut down fuse mount | |
399 | for i in range(num_osds): | |
400 | manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', | |
401 | '--', '--no-osd-objectstore-fuse') | |
402 | time.sleep(5) | |
403 | log.info('done') |