]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Test that the forward scrub functionality can traverse metadata and apply | |
4 | requested tags, on well formed metadata. | |
5 | ||
6 | This is *not* the real testing for forward scrub, which will need to test | |
7 | how the functionality responds to damaged metadata. | |
8 | ||
9 | """ | |
7c673cae | 10 | import logging |
f67539c2 | 11 | import json |
f38dd50b | 12 | import errno |
e306af50 | 13 | |
7c673cae | 14 | from collections import namedtuple |
e306af50 | 15 | from io import BytesIO |
7c673cae FG |
16 | from textwrap import dedent |
17 | ||
20effc67 | 18 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
19 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
20 | ||
21 | import struct | |
22 | ||
23 | log = logging.getLogger(__name__) | |
24 | ||
25 | ||
26 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
27 | ||
28 | ||
29 | class TestForwardScrub(CephFSTestCase): | |
30 | MDSS_REQUIRED = 1 | |
31 | ||
32 | def _read_str_xattr(self, pool, obj, attr): | |
33 | """ | |
34 | Read a ceph-encoded string from a rados xattr | |
35 | """ | |
f67539c2 TL |
36 | output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool, |
37 | stdout=BytesIO()).stdout.getvalue() | |
7c673cae | 38 | strlen = struct.unpack('i', output[0:4])[0] |
f67539c2 | 39 | return output[4:(4 + strlen)].decode(encoding='ascii') |
7c673cae FG |
40 | |
41 | def _get_paths_to_ino(self): | |
42 | inos = {} | |
43 | p = self.mount_a.run_shell(["find", "./"]) | |
44 | paths = p.stdout.getvalue().strip().split() | |
45 | for path in paths: | |
46 | inos[path] = self.mount_a.path_to_ino(path) | |
47 | ||
48 | return inos | |
49 | ||
f38dd50b TL |
50 | def _is_MDS_damage(self): |
51 | return "MDS_DAMAGE" in self.mds_cluster.mon_manager.get_mon_health()['checks'] | |
52 | ||
7c673cae FG |
53 | def test_apply_tag(self): |
54 | self.mount_a.run_shell(["mkdir", "parentdir"]) | |
55 | self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) | |
56 | self.mount_a.run_shell(["touch", "rfile"]) | |
57 | self.mount_a.run_shell(["touch", "parentdir/pfile"]) | |
58 | self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) | |
59 | ||
60 | # Build a structure mapping path to inode, as we will later want | |
61 | # to check object by object and objects are named after ino number | |
62 | inos = self._get_paths_to_ino() | |
63 | ||
64 | # Flush metadata: this is a friendly test of forward scrub so we're skipping | |
65 | # the part where it's meant to cope with dirty metadata | |
66 | self.mount_a.umount_wait() | |
67 | self.fs.mds_asok(["flush", "journal"]) | |
68 | ||
69 | tag = "mytag" | |
70 | ||
71 | # Execute tagging forward scrub | |
72 | self.fs.mds_asok(["tag", "path", "/parentdir", tag]) | |
73 | # Wait for completion | |
74 | import time | |
75 | time.sleep(10) | |
76 | # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll | |
77 | # watch that instead | |
78 | ||
79 | # Check that dirs were tagged | |
80 | for dirpath in ["./parentdir", "./parentdir/childdir"]: | |
81 | self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) | |
82 | ||
83 | # Check that files were tagged | |
84 | for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: | |
85 | self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) | |
86 | ||
87 | # This guy wasn't in the tag path, shouldn't have been tagged | |
88 | self.assertUntagged(inos["./rfile"]) | |
89 | ||
90 | def assertUntagged(self, ino): | |
91 | file_obj_name = "{0:x}.00000000".format(ino) | |
92 | with self.assertRaises(CommandFailedError): | |
93 | self._read_str_xattr( | |
94 | self.fs.get_data_pool_name(), | |
95 | file_obj_name, | |
96 | "scrub_tag" | |
97 | ) | |
98 | ||
99 | def assertTagged(self, ino, tag, pool): | |
100 | file_obj_name = "{0:x}.00000000".format(ino) | |
101 | wrote = self._read_str_xattr( | |
102 | pool, | |
103 | file_obj_name, | |
104 | "scrub_tag" | |
105 | ) | |
106 | self.assertEqual(wrote, tag) | |
107 | ||
108 | def _validate_linkage(self, expected): | |
109 | inos = self._get_paths_to_ino() | |
110 | try: | |
111 | self.assertDictEqual(inos, expected) | |
112 | except AssertionError: | |
113 | log.error("Expected: {0}".format(json.dumps(expected, indent=2))) | |
114 | log.error("Actual: {0}".format(json.dumps(inos, indent=2))) | |
115 | raise | |
116 | ||
117 | def test_orphan_scan(self): | |
118 | # Create some files whose metadata we will flush | |
119 | self.mount_a.run_python(dedent(""" | |
120 | import os | |
121 | mount_point = "{mount_point}" | |
122 | parent = os.path.join(mount_point, "parent") | |
123 | os.mkdir(parent) | |
124 | flushed = os.path.join(parent, "flushed") | |
125 | os.mkdir(flushed) | |
126 | for f in ["alpha", "bravo", "charlie"]: | |
127 | open(os.path.join(flushed, f), 'w').write(f) | |
128 | """.format(mount_point=self.mount_a.mountpoint))) | |
129 | ||
130 | inos = self._get_paths_to_ino() | |
131 | ||
132 | # Flush journal | |
133 | # Umount before flush to avoid cap releases putting | |
134 | # things we don't want in the journal later. | |
135 | self.mount_a.umount_wait() | |
1e59de90 | 136 | self.fs.flush() |
7c673cae FG |
137 | |
138 | # Create a new inode that's just in the log, i.e. would | |
139 | # look orphaned to backward scan if backward scan wisnae | |
140 | # respectin' tha scrub_tag xattr. | |
e306af50 | 141 | self.mount_a.mount_wait() |
7c673cae FG |
142 | self.mount_a.run_shell(["mkdir", "parent/unflushed"]) |
143 | self.mount_a.run_shell(["dd", "if=/dev/urandom", | |
144 | "of=./parent/unflushed/jfile", | |
145 | "bs=1M", "count=8"]) | |
146 | inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") | |
147 | inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") | |
148 | self.mount_a.umount_wait() | |
149 | ||
150 | # Orphan an inode by deleting its dentry | |
151 | # Our victim will be.... bravo. | |
152 | self.mount_a.umount_wait() | |
f67539c2 | 153 | self.fs.fail() |
7c673cae FG |
154 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) |
155 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
156 | frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) | |
f67539c2 | 157 | self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"]) |
7c673cae | 158 | |
f67539c2 | 159 | self.fs.set_joinable() |
7c673cae FG |
160 | self.fs.wait_for_daemons() |
161 | ||
162 | # See that the orphaned file is indeed missing from a client's POV | |
e306af50 | 163 | self.mount_a.mount_wait() |
7c673cae FG |
164 | damaged_state = self._get_paths_to_ino() |
165 | self.assertNotIn("./parent/flushed/bravo", damaged_state) | |
166 | self.mount_a.umount_wait() | |
167 | ||
168 | # Run a tagging forward scrub | |
169 | tag = "mytag123" | |
1e59de90 | 170 | self.fs.rank_asok(["tag", "path", "/parent", tag]) |
7c673cae FG |
171 | |
172 | # See that the orphan wisnae tagged | |
173 | self.assertUntagged(inos['./parent/flushed/bravo']) | |
174 | ||
175 | # See that the flushed-metadata-and-still-present files are tagged | |
176 | self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) | |
177 | self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) | |
178 | ||
179 | # See that journalled-but-not-flushed file *was* tagged | |
180 | self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) | |
181 | ||
1e59de90 TL |
182 | # okay, now we are going to run cephfs-data-scan. It's necessary to |
183 | # have a clean journal otherwise replay will blowup on mismatched | |
184 | # inotable versions (due to scan_links) | |
185 | self.fs.flush() | |
f67539c2 | 186 | self.fs.fail() |
1e59de90 TL |
187 | self.fs.journal_tool(["journal", "reset", "--force"], 0) |
188 | ||
189 | # Run cephfs-data-scan targeting only orphans | |
7c673cae FG |
190 | self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) |
191 | self.fs.data_scan([ | |
192 | "scan_inodes", | |
193 | "--filter-tag", tag, | |
194 | self.fs.get_data_pool_name() | |
195 | ]) | |
1e59de90 | 196 | self.fs.data_scan(["scan_links"]) |
7c673cae FG |
197 | |
198 | # After in-place injection stats should be kosher again | |
199 | self.fs.set_ceph_conf('mds', 'mds verify scatter', True) | |
200 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) | |
201 | ||
202 | # And we should have all the same linkage we started with, | |
203 | # and no lost+found, and no extra inodes! | |
f67539c2 | 204 | self.fs.set_joinable() |
7c673cae | 205 | self.fs.wait_for_daemons() |
e306af50 | 206 | self.mount_a.mount_wait() |
7c673cae FG |
207 | self._validate_linkage(inos) |
208 | ||
209 | def _stash_inotable(self): | |
210 | # Get all active ranks | |
211 | ranks = self.fs.get_all_mds_rank() | |
212 | ||
213 | inotable_dict = {} | |
214 | for rank in ranks: | |
215 | inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" | |
9f95a23c | 216 | print("Trying to fetch inotable object: " + inotable_oid) |
7c673cae FG |
217 | |
218 | #self.fs.get_metadata_object("InoTable", "mds0_inotable") | |
f67539c2 | 219 | inotable_raw = self.fs.radosmo(['get', inotable_oid, '-']) |
7c673cae FG |
220 | inotable_dict[inotable_oid] = inotable_raw |
221 | return inotable_dict | |
222 | ||
223 | def test_inotable_sync(self): | |
224 | self.mount_a.write_n_mb("file1_sixmegs", 6) | |
225 | ||
226 | # Flush journal | |
227 | self.mount_a.umount_wait() | |
228 | self.fs.mds_asok(["flush", "journal"]) | |
229 | ||
230 | inotable_copy = self._stash_inotable() | |
231 | ||
e306af50 | 232 | self.mount_a.mount_wait() |
7c673cae FG |
233 | |
234 | self.mount_a.write_n_mb("file2_sixmegs", 6) | |
235 | self.mount_a.write_n_mb("file3_sixmegs", 6) | |
236 | ||
237 | inos = self._get_paths_to_ino() | |
238 | ||
239 | # Flush journal | |
240 | self.mount_a.umount_wait() | |
241 | self.fs.mds_asok(["flush", "journal"]) | |
242 | ||
243 | self.mount_a.umount_wait() | |
244 | ||
245 | with self.assert_cluster_log("inode table repaired", invert_match=True): | |
b3b6e05e | 246 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 247 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
248 | self.assertEqual(out_json["return_code"], 0) |
249 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 250 | |
f67539c2 | 251 | self.fs.fail() |
7c673cae FG |
252 | |
253 | # Truncate the journal (to ensure the inotable on disk | |
254 | # is all that will be in the InoTable in memory) | |
255 | ||
256 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 257 | "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) |
7c673cae FG |
258 | |
259 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 260 | "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) |
7c673cae FG |
261 | |
262 | # Revert to old inotable. | |
9f95a23c | 263 | for key, value in inotable_copy.items(): |
f67539c2 | 264 | self.fs.radosm(["put", key, "-"], stdin=BytesIO(value)) |
7c673cae | 265 | |
f67539c2 | 266 | self.fs.set_joinable() |
7c673cae FG |
267 | self.fs.wait_for_daemons() |
268 | ||
269 | with self.assert_cluster_log("inode table repaired"): | |
b3b6e05e | 270 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 271 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
272 | self.assertEqual(out_json["return_code"], 0) |
273 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 274 | |
f67539c2 | 275 | self.fs.fail() |
7c673cae FG |
276 | table_text = self.fs.table_tool(["0", "show", "inode"]) |
277 | table = json.loads(table_text) | |
278 | self.assertGreater( | |
279 | table['0']['data']['inotable']['free'][0]['start'], | |
280 | inos['./file3_sixmegs']) | |
281 | ||
282 | def test_backtrace_repair(self): | |
283 | """ | |
284 | That the MDS can repair an inodes backtrace in the data pool | |
285 | if it is found to be damaged. | |
286 | """ | |
287 | # Create a file for subsequent checks | |
288 | self.mount_a.run_shell(["mkdir", "parent_a"]) | |
289 | self.mount_a.run_shell(["touch", "parent_a/alpha"]) | |
290 | file_ino = self.mount_a.path_to_ino("parent_a/alpha") | |
291 | ||
292 | # That backtrace and layout are written after initial flush | |
293 | self.fs.mds_asok(["flush", "journal"]) | |
294 | backtrace = self.fs.read_backtrace(file_ino) | |
295 | self.assertEqual(['alpha', 'parent_a'], | |
296 | [a['dname'] for a in backtrace['ancestors']]) | |
297 | ||
298 | # Go corrupt the backtrace | |
299 | self.fs._write_data_xattr(file_ino, "parent", | |
300 | "oh i'm sorry did i overwrite your xattr?") | |
301 | ||
302 | with self.assert_cluster_log("bad backtrace on inode"): | |
b3b6e05e | 303 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 304 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
305 | self.assertEqual(out_json["return_code"], 0) |
306 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
307 | ||
7c673cae FG |
308 | self.fs.mds_asok(["flush", "journal"]) |
309 | backtrace = self.fs.read_backtrace(file_ino) | |
310 | self.assertEqual(['alpha', 'parent_a'], | |
311 | [a['dname'] for a in backtrace['ancestors']]) | |
f38dd50b TL |
312 | |
313 | def test_health_status_after_dentry_repair(self): | |
314 | """ | |
315 | Test that the damage health status is cleared | |
316 | after the damaged dentry is repaired | |
317 | """ | |
318 | # Create a file for checks | |
319 | self.mount_a.run_shell(["mkdir", "subdir/"]) | |
320 | ||
321 | self.mount_a.run_shell(["touch", "subdir/file_undamaged"]) | |
322 | self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"]) | |
323 | ||
324 | subdir_ino = self.mount_a.path_to_ino("subdir") | |
325 | ||
326 | self.mount_a.umount_wait() | |
327 | for mds_name in self.fs.get_active_names(): | |
328 | self.fs.mds_asok(["flush", "journal"], mds_name) | |
329 | ||
330 | self.fs.fail() | |
331 | ||
332 | # Corrupt a dentry | |
333 | junk = "deadbeef" * 10 | |
334 | dirfrag_obj = "{0:x}.00000000".format(subdir_ino) | |
335 | self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) | |
336 | ||
337 | # Start up and try to list it | |
338 | self.fs.set_joinable() | |
339 | self.fs.wait_for_daemons() | |
340 | ||
341 | self.mount_a.mount_wait() | |
342 | dentries = self.mount_a.ls("subdir/") | |
343 | ||
344 | # The damaged guy should have disappeared | |
345 | self.assertEqual(dentries, ["file_undamaged"]) | |
346 | ||
347 | # I should get ENOENT if I try and read it normally, because | |
348 | # the dir is considered complete | |
349 | try: | |
350 | self.mount_a.stat("subdir/file_to_be_damaged", wait=True) | |
351 | except CommandFailedError as e: | |
352 | self.assertEqual(e.exitstatus, errno.ENOENT) | |
353 | else: | |
354 | raise AssertionError("Expected ENOENT") | |
355 | ||
356 | nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") | |
357 | self.assertEqual(nfiles, "2") | |
358 | ||
359 | self.mount_a.umount_wait() | |
360 | ||
361 | out_json = self.fs.run_scrub(["start", "/subdir", "recursive"]) | |
362 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
363 | ||
364 | # Check that an entry for dentry damage is created in the damage table | |
365 | damage = json.loads( | |
366 | self.fs.mon_manager.raw_cluster_cmd( | |
367 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
368 | "damage", "ls", '--format=json-pretty')) | |
369 | self.assertEqual(len(damage), 1) | |
370 | self.assertEqual(damage[0]['damage_type'], "dentry") | |
371 | self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) | |
372 | ||
373 | out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"]) | |
374 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
375 | ||
376 | # Check that the entry is cleared from the damage table | |
377 | damage = json.loads( | |
378 | self.fs.mon_manager.raw_cluster_cmd( | |
379 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
380 | "damage", "ls", '--format=json-pretty')) | |
381 | self.assertEqual(len(damage), 0) | |
382 | self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) | |
383 | ||
384 | self.mount_a.mount_wait() | |
385 | ||
386 | # Check that the file count is now correct | |
387 | nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") | |
388 | self.assertEqual(nfiles, "1") | |
389 | ||
390 | # Clean up the omap object | |
391 | self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) | |
392 | ||
393 | def test_health_status_after_dirfrag_repair(self): | |
394 | """ | |
395 | Test that the damage health status is cleared | |
396 | after the damaged dirfrag is repaired | |
397 | """ | |
398 | self.mount_a.run_shell(["mkdir", "dir"]) | |
399 | self.mount_a.run_shell(["touch", "dir/file"]) | |
400 | self.mount_a.run_shell(["mkdir", "testdir"]) | |
401 | self.mount_a.run_shell(["ln", "dir/file", "testdir/hardlink"]) | |
402 | ||
403 | dir_ino = self.mount_a.path_to_ino("dir") | |
404 | ||
405 | # Ensure everything is written to backing store | |
406 | self.mount_a.umount_wait() | |
407 | self.fs.mds_asok(["flush", "journal"]) | |
408 | ||
409 | # Drop everything from the MDS cache | |
410 | self.fs.fail() | |
411 | ||
412 | self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)]) | |
413 | ||
414 | self.fs.journal_tool(['journal', 'reset'], 0) | |
415 | self.fs.set_joinable() | |
416 | self.fs.wait_for_daemons() | |
417 | self.mount_a.mount_wait() | |
418 | ||
419 | # Check that touching the hardlink gives EIO | |
420 | ran = self.mount_a.run_shell(["stat", "testdir/hardlink"], wait=False) | |
421 | try: | |
422 | ran.wait() | |
423 | except CommandFailedError: | |
424 | self.assertTrue("Input/output error" in ran.stderr.getvalue()) | |
425 | ||
426 | out_json = self.fs.run_scrub(["start", "/dir", "recursive"]) | |
427 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
428 | ||
429 | # Check that an entry is created in the damage table | |
430 | damage = json.loads( | |
431 | self.fs.mon_manager.raw_cluster_cmd( | |
432 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
433 | "damage", "ls", '--format=json-pretty')) | |
434 | self.assertEqual(len(damage), 3) | |
435 | damage_types = set() | |
436 | for i in range(0, 3): | |
437 | damage_types.add(damage[i]['damage_type']) | |
438 | self.assertIn("dir_frag", damage_types) | |
439 | self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) | |
440 | ||
441 | out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"]) | |
442 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
443 | ||
444 | # Check that the entry is cleared from the damage table | |
445 | damage = json.loads( | |
446 | self.fs.mon_manager.raw_cluster_cmd( | |
447 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
448 | "damage", "ls", '--format=json-pretty')) | |
449 | self.assertEqual(len(damage), 1) | |
450 | self.assertNotEqual(damage[0]['damage_type'], "dir_frag") | |
451 | ||
452 | self.mount_a.umount_wait() | |
453 | self.fs.mds_asok(["flush", "journal"]) | |
454 | self.fs.fail() | |
455 | ||
456 | # Run cephfs-data-scan | |
457 | self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) | |
458 | self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) | |
459 | self.fs.data_scan(["scan_links"]) | |
460 | ||
461 | self.fs.set_joinable() | |
462 | self.fs.wait_for_daemons() | |
463 | self.mount_a.mount_wait() | |
464 | ||
465 | out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"]) | |
466 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
467 | damage = json.loads( | |
468 | self.fs.mon_manager.raw_cluster_cmd( | |
469 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
470 | "damage", "ls", '--format=json-pretty')) | |
471 | self.assertEqual(len(damage), 0) | |
472 | self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) | |
473 | ||
474 | def test_health_status_after_backtrace_repair(self): | |
475 | """ | |
476 | Test that the damage health status is cleared | |
477 | after the damaged backtrace is repaired | |
478 | """ | |
479 | # Create a file for checks | |
480 | self.mount_a.run_shell(["mkdir", "dir_test"]) | |
481 | self.mount_a.run_shell(["touch", "dir_test/file"]) | |
482 | file_ino = self.mount_a.path_to_ino("dir_test/file") | |
483 | ||
484 | # That backtrace and layout are written after initial flush | |
485 | self.fs.mds_asok(["flush", "journal"]) | |
486 | backtrace = self.fs.read_backtrace(file_ino) | |
487 | self.assertEqual(['file', 'dir_test'], | |
488 | [a['dname'] for a in backtrace['ancestors']]) | |
489 | ||
490 | # Corrupt the backtrace | |
491 | self.fs._write_data_xattr(file_ino, "parent", | |
492 | "The backtrace is corrupted") | |
493 | ||
494 | out_json = self.fs.run_scrub(["start", "/", "recursive"]) | |
495 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
496 | ||
497 | # Check that an entry for backtrace damage is created in the damage table | |
498 | damage = json.loads( | |
499 | self.fs.mon_manager.raw_cluster_cmd( | |
500 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
501 | "damage", "ls", '--format=json-pretty')) | |
502 | self.assertEqual(len(damage), 1) | |
503 | self.assertEqual(damage[0]['damage_type'], "backtrace") | |
504 | self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) | |
505 | ||
506 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive,force"]) | |
507 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
508 | ||
509 | # Check that the entry is cleared from the damage table | |
510 | damage = json.loads( | |
511 | self.fs.mon_manager.raw_cluster_cmd( | |
512 | 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), | |
513 | "damage", "ls", '--format=json-pretty')) | |
514 | self.assertEqual(len(damage), 0) | |
515 | self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) |