]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | """Scrub testing""" |
7c673cae FG |
2 | |
3 | import contextlib | |
4 | import json | |
5 | import logging | |
6 | import os | |
7 | import time | |
8 | import tempfile | |
9 | ||
e306af50 | 10 | from tasks import ceph_manager |
7c673cae FG |
11 | from teuthology import misc as teuthology |
12 | ||
13 | log = logging.getLogger(__name__) | |
14 | ||
15 | ||
20effc67 | 16 | def wait_for_victim_pg(manager, poolid): |
7c673cae FG |
17 | """Return a PG with some data and its acting set""" |
18 | # wait for some PG to have data that we can mess with | |
19 | victim = None | |
20 | while victim is None: | |
21 | stats = manager.get_pg_stats() | |
22 | for pg in stats: | |
20effc67 TL |
23 | pgid = str(pg['pgid']) |
24 | pgpool = int(pgid.split('.')[0]) | |
25 | if poolid != pgpool: | |
26 | continue | |
7c673cae FG |
27 | size = pg['stat_sum']['num_bytes'] |
28 | if size > 0: | |
29 | victim = pg['pgid'] | |
30 | acting = pg['acting'] | |
31 | return victim, acting | |
32 | time.sleep(3) | |
33 | ||
34 | ||
35 | def find_victim_object(ctx, pg, osd): | |
36 | """Return a file to be fuzzed""" | |
9f95a23c | 37 | (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys() |
7c673cae FG |
38 | data_path = os.path.join( |
39 | '/var/lib/ceph/osd', | |
40 | 'ceph-{id}'.format(id=osd), | |
41 | 'fuse', | |
42 | '{pg}_head'.format(pg=pg), | |
43 | 'all', | |
44 | ) | |
45 | ||
46 | # fuzz time | |
9f95a23c | 47 | ls_out = osd_remote.sh('sudo ls %s' % data_path) |
7c673cae FG |
48 | |
49 | # find an object file we can mess with (and not the pg info object) | |
50 | osdfilename = next(line for line in ls_out.split('\n') | |
51 | if not line.endswith('::::head#')) | |
52 | assert osdfilename is not None | |
53 | ||
54 | # Get actual object name from osd stored filename | |
55 | objname = osdfilename.split(':')[4] | |
56 | return osd_remote, os.path.join(data_path, osdfilename), objname | |
57 | ||
58 | ||
59 | def corrupt_file(osd_remote, path): | |
60 | # put a single \0 at the beginning of the file | |
61 | osd_remote.run( | |
62 | args=['sudo', 'dd', | |
63 | 'if=/dev/zero', | |
64 | 'of=%s/data' % path, | |
65 | 'bs=1', 'count=1', 'conv=notrunc'] | |
66 | ) | |
67 | ||
68 | ||
69 | def get_pgnum(pgid): | |
70 | pos = pgid.find('.') | |
71 | assert pos != -1 | |
72 | return pgid[pos+1:] | |
73 | ||
74 | ||
75 | def deep_scrub(manager, victim, pool): | |
76 | # scrub, verify inconsistent | |
77 | pgnum = get_pgnum(victim) | |
78 | manager.do_pg_scrub(pool, pgnum, 'deep-scrub') | |
79 | ||
80 | stats = manager.get_single_pg_stats(victim) | |
81 | inconsistent = stats['state'].find('+inconsistent') != -1 | |
82 | assert inconsistent | |
83 | ||
84 | ||
85 | def repair(manager, victim, pool): | |
86 | # repair, verify no longer inconsistent | |
87 | pgnum = get_pgnum(victim) | |
88 | manager.do_pg_scrub(pool, pgnum, 'repair') | |
89 | ||
90 | stats = manager.get_single_pg_stats(victim) | |
91 | inconsistent = stats['state'].find('+inconsistent') != -1 | |
92 | assert not inconsistent | |
93 | ||
94 | ||
95 | def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool): | |
96 | corrupt_file(osd_remote, obj_path) | |
97 | deep_scrub(manager, pg, pool) | |
98 | repair(manager, pg, pool) | |
99 | ||
100 | ||
101 | def test_repair_bad_omap(ctx, manager, pg, osd, objname): | |
102 | # Test deep-scrub with various omap modifications | |
103 | # Modify omap on specific osd | |
104 | log.info('fuzzing omap of %s' % objname) | |
105 | manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']) | |
106 | manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, | |
107 | 'badkey', 'badval']) | |
108 | manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']) | |
109 | ||
110 | deep_scrub(manager, pg, 'rbd') | |
111 | # please note, the repair here is errnomous, it rewrites the correct omap | |
112 | # digest and data digest on the replicas with the corresponding digests | |
113 | # from the primary osd which is hosting the victim object, see | |
114 | # find_victim_object(). | |
115 | # so we need to either put this test and the end of this task or | |
116 | # undo the mess-up manually before the "repair()" that just ensures | |
117 | # the cleanup is sane, otherwise the succeeding tests will fail. if they | |
118 | # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub. | |
119 | manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr']) | |
120 | manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey']) | |
121 | manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, | |
122 | 'key', 'val']) | |
123 | repair(manager, pg, 'rbd') | |
124 | ||
125 | ||
126 | class MessUp: | |
127 | def __init__(self, manager, osd_remote, pool, osd_id, | |
128 | obj_name, obj_path, omap_key, omap_val): | |
129 | self.manager = manager | |
130 | self.osd = osd_remote | |
131 | self.pool = pool | |
132 | self.osd_id = osd_id | |
133 | self.obj = obj_name | |
134 | self.path = obj_path | |
135 | self.omap_key = omap_key | |
136 | self.omap_val = omap_val | |
137 | ||
138 | @contextlib.contextmanager | |
139 | def _test_with_file(self, messup_cmd, *checks): | |
140 | temp = tempfile.mktemp() | |
141 | backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp] | |
142 | self.osd.run(args=backup_cmd) | |
143 | self.osd.run(args=messup_cmd.split()) | |
144 | yield checks | |
145 | create_cmd = ['sudo', 'mkdir', self.path] | |
146 | self.osd.run(args=create_cmd, check_status=False) | |
147 | restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')] | |
148 | self.osd.run(args=restore_cmd) | |
149 | ||
150 | def remove(self): | |
151 | cmd = 'sudo rmdir {path}'.format(path=self.path) | |
152 | return self._test_with_file(cmd, 'missing') | |
153 | ||
154 | def append(self): | |
155 | cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ | |
156 | 'conv=notrunc oflag=append'.format(path=self.path) | |
157 | return self._test_with_file(cmd, | |
158 | 'data_digest_mismatch', | |
159 | 'size_mismatch') | |
160 | ||
161 | def truncate(self): | |
162 | cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path) | |
163 | return self._test_with_file(cmd, | |
164 | 'data_digest_mismatch', | |
165 | 'size_mismatch') | |
166 | ||
167 | def change_obj(self): | |
168 | cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ | |
169 | 'conv=notrunc'.format(path=self.path) | |
170 | return self._test_with_file(cmd, | |
171 | 'data_digest_mismatch') | |
172 | ||
173 | @contextlib.contextmanager | |
174 | def rm_omap(self): | |
175 | cmd = ['rmomapkey', self.pool, self.obj, self.omap_key] | |
176 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
177 | yield ('omap_digest_mismatch',) | |
178 | cmd = ['setomapval', self.pool, self.obj, | |
179 | self.omap_key, self.omap_val] | |
180 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
181 | ||
182 | @contextlib.contextmanager | |
183 | def add_omap(self): | |
184 | cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval'] | |
185 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
186 | yield ('omap_digest_mismatch',) | |
187 | cmd = ['rmomapkey', self.pool, self.obj, 'badkey'] | |
188 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
189 | ||
190 | @contextlib.contextmanager | |
191 | def change_omap(self): | |
192 | cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval'] | |
193 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
194 | yield ('omap_digest_mismatch',) | |
195 | cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val] | |
196 | self.manager.osd_admin_socket(self.osd_id, cmd) | |
197 | ||
198 | ||
199 | class InconsistentObjChecker: | |
200 | """Check the returned inconsistents/inconsistent info""" | |
201 | ||
202 | def __init__(self, osd, acting, obj_name): | |
203 | self.osd = osd | |
204 | self.acting = acting | |
205 | self.obj = obj_name | |
206 | assert self.osd in self.acting | |
207 | ||
208 | def basic_checks(self, inc): | |
209 | assert inc['object']['name'] == self.obj | |
210 | assert inc['object']['snap'] == "head" | |
211 | assert len(inc['shards']) == len(self.acting), \ | |
212 | "the number of returned shard does not match with the acting set" | |
213 | ||
214 | def run(self, check, inc): | |
215 | func = getattr(self, check) | |
216 | func(inc) | |
217 | ||
218 | def _check_errors(self, inc, err_name): | |
219 | bad_found = False | |
220 | good_found = False | |
221 | for shard in inc['shards']: | |
222 | log.info('shard = %r' % shard) | |
223 | log.info('err = %s' % err_name) | |
224 | assert 'osd' in shard | |
225 | osd = shard['osd'] | |
226 | err = err_name in shard['errors'] | |
227 | if osd == self.osd: | |
228 | assert bad_found is False, \ | |
229 | "multiple entries found for the given OSD" | |
230 | assert err is True, \ | |
231 | "Didn't find '{err}' in errors".format(err=err_name) | |
232 | bad_found = True | |
233 | else: | |
234 | assert osd in self.acting, "shard not in acting set" | |
235 | assert err is False, \ | |
236 | "Expected '{err}' in errors".format(err=err_name) | |
237 | good_found = True | |
238 | assert bad_found is True, \ | |
239 | "Shard for osd.{osd} not found".format(osd=self.osd) | |
240 | assert good_found is True, \ | |
241 | "No other acting shards found" | |
242 | ||
243 | def _check_attrs(self, inc, attr_name): | |
244 | bad_attr = None | |
245 | good_attr = None | |
246 | for shard in inc['shards']: | |
247 | log.info('shard = %r' % shard) | |
248 | log.info('attr = %s' % attr_name) | |
249 | assert 'osd' in shard | |
250 | osd = shard['osd'] | |
251 | attr = shard.get(attr_name, False) | |
252 | if osd == self.osd: | |
253 | assert bad_attr is None, \ | |
254 | "multiple entries found for the given OSD" | |
255 | bad_attr = attr | |
256 | else: | |
257 | assert osd in self.acting, "shard not in acting set" | |
258 | assert good_attr is None or good_attr == attr, \ | |
259 | "multiple good attrs found" | |
260 | good_attr = attr | |
261 | assert bad_attr is not None, \ | |
262 | "bad {attr} not found".format(attr=attr_name) | |
263 | assert good_attr is not None, \ | |
264 | "good {attr} not found".format(attr=attr_name) | |
265 | assert good_attr != bad_attr, \ | |
266 | "bad attr is identical to the good ones: " \ | |
267 | "{0} == {1}".format(good_attr, bad_attr) | |
268 | ||
269 | def data_digest_mismatch(self, inc): | |
270 | assert 'data_digest_mismatch' in inc['errors'] | |
271 | self._check_attrs(inc, 'data_digest') | |
272 | ||
273 | def missing(self, inc): | |
274 | assert 'missing' in inc['union_shard_errors'] | |
275 | self._check_errors(inc, 'missing') | |
276 | ||
277 | def size_mismatch(self, inc): | |
278 | assert 'size_mismatch' in inc['errors'] | |
279 | self._check_attrs(inc, 'size') | |
280 | ||
281 | def omap_digest_mismatch(self, inc): | |
282 | assert 'omap_digest_mismatch' in inc['errors'] | |
283 | self._check_attrs(inc, 'omap_digest') | |
284 | ||
285 | ||
286 | def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id, | |
287 | obj_name, obj_path): | |
288 | mon = manager.controller | |
289 | pool = 'rbd' | |
290 | omap_key = 'key' | |
291 | omap_val = 'val' | |
f67539c2 | 292 | manager.do_rados(['setomapval', obj_name, omap_key, omap_val], pool=pool) |
7c673cae FG |
293 | # Update missing digests, requires "osd deep scrub update digest min age: 0" |
294 | pgnum = get_pgnum(pg) | |
295 | manager.do_pg_scrub(pool, pgnum, 'deep-scrub') | |
296 | ||
297 | messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path, | |
298 | omap_key, omap_val) | |
299 | for test in [messup.rm_omap, messup.add_omap, messup.change_omap, | |
300 | messup.append, messup.truncate, messup.change_obj, | |
301 | messup.remove]: | |
302 | with test() as checks: | |
303 | deep_scrub(manager, pg, pool) | |
304 | cmd = 'rados list-inconsistent-pg {pool} ' \ | |
305 | '--format=json'.format(pool=pool) | |
9f95a23c | 306 | pgs = json.loads(mon.sh(cmd)) |
7c673cae FG |
307 | assert pgs == [pg] |
308 | ||
309 | cmd = 'rados list-inconsistent-obj {pg} ' \ | |
310 | '--format=json'.format(pg=pg) | |
9f95a23c | 311 | objs = json.loads(mon.sh(cmd)) |
7c673cae FG |
312 | assert len(objs['inconsistents']) == 1 |
313 | ||
314 | checker = InconsistentObjChecker(osd_id, acting, obj_name) | |
315 | inc_obj = objs['inconsistents'][0] | |
316 | log.info('inc = %r', inc_obj) | |
317 | checker.basic_checks(inc_obj) | |
318 | for check in checks: | |
319 | checker.run(check, inc_obj) | |
320 | ||
321 | ||
322 | def task(ctx, config): | |
323 | """ | |
324 | Test [deep] scrub | |
325 | ||
326 | tasks: | |
327 | - chef: | |
328 | - install: | |
329 | - ceph: | |
cd265ab1 | 330 | log-ignorelist: |
7c673cae FG |
331 | - '!= data_digest' |
332 | - '!= omap_digest' | |
333 | - '!= size' | |
334 | - deep-scrub 0 missing, 1 inconsistent objects | |
335 | - deep-scrub [0-9]+ errors | |
336 | - repair 0 missing, 1 inconsistent objects | |
337 | - repair [0-9]+ errors, [0-9]+ fixed | |
91327a77 | 338 | - shard [0-9]+ .* : missing |
7c673cae FG |
339 | - deep-scrub 1 missing, 1 inconsistent objects |
340 | - does not match object info size | |
341 | - attr name mistmatch | |
342 | - deep-scrub 1 missing, 0 inconsistent objects | |
343 | - failed to pick suitable auth object | |
91327a77 | 344 | - candidate size [0-9]+ info size [0-9]+ mismatch |
7c673cae FG |
345 | conf: |
346 | osd: | |
347 | osd deep scrub update digest min age: 0 | |
348 | - scrub_test: | |
349 | """ | |
350 | if config is None: | |
351 | config = {} | |
352 | assert isinstance(config, dict), \ | |
353 | 'scrub_test task only accepts a dict for configuration' | |
354 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 355 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
356 | |
357 | num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') | |
358 | log.info('num_osds is %s' % num_osds) | |
359 | ||
360 | manager = ceph_manager.CephManager( | |
361 | mon, | |
362 | ctx=ctx, | |
363 | logger=log.getChild('ceph_manager'), | |
364 | ) | |
365 | ||
366 | while len(manager.get_osd_status()['up']) < num_osds: | |
367 | time.sleep(10) | |
368 | ||
369 | for i in range(num_osds): | |
370 | manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', | |
371 | '--', '--osd-objectstore-fuse') | |
31f18b77 | 372 | manager.flush_pg_stats(range(num_osds)) |
7c673cae FG |
373 | manager.wait_for_clean() |
374 | ||
20effc67 TL |
375 | osd_dump = manager.get_osd_dump_json() |
376 | poolid = -1 | |
377 | for p in osd_dump['pools']: | |
378 | if p['pool_name'] == 'rbd': | |
379 | poolid = p['pool'] | |
380 | break | |
381 | assert poolid != -1 | |
382 | ||
7c673cae | 383 | # write some data |
f67539c2 | 384 | p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'], pool='rbd') |
7c673cae FG |
385 | log.info('err is %d' % p.exitstatus) |
386 | ||
387 | # wait for some PG to have data that we can mess with | |
20effc67 | 388 | pg, acting = wait_for_victim_pg(manager, poolid) |
7c673cae FG |
389 | osd = acting[0] |
390 | ||
391 | osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) | |
f67539c2 | 392 | manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd') |
7c673cae | 393 | log.info('err is %d' % p.exitstatus) |
f67539c2 | 394 | manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd') |
7c673cae FG |
395 | log.info('err is %d' % p.exitstatus) |
396 | ||
397 | # Update missing digests, requires "osd deep scrub update digest min age: 0" | |
398 | pgnum = get_pgnum(pg) | |
399 | manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') | |
400 | ||
401 | log.info('messing with PG %s on osd %d' % (pg, osd)) | |
402 | test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') | |
403 | test_repair_bad_omap(ctx, manager, pg, osd, obj_name) | |
404 | test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, | |
405 | obj_name, obj_path) | |
406 | log.info('test successful!') | |
407 | ||
408 | # shut down fuse mount | |
409 | for i in range(num_osds): | |
410 | manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', | |
411 | '--', '--no-osd-objectstore-fuse') | |
412 | time.sleep(5) | |
413 | log.info('done') |