]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_forward_scrub.py
bump version to 18.2.4-pve3
[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py
CommitLineData
7c673cae
FG
1
2"""
3Test that the forward scrub functionality can traverse metadata and apply
4requested tags, on well formed metadata.
5
6This is *not* the real testing for forward scrub, which will need to test
7how the functionality responds to damaged metadata.
8
9"""
7c673cae 10import logging
f67539c2 11import json
f38dd50b 12import errno
e306af50 13
7c673cae 14from collections import namedtuple
e306af50 15from io import BytesIO
7c673cae
FG
16from textwrap import dedent
17
20effc67 18from teuthology.exceptions import CommandFailedError
7c673cae
FG
19from tasks.cephfs.cephfs_test_case import CephFSTestCase
20
21import struct
22
23log = logging.getLogger(__name__)
24
25
26ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
27
28
29class TestForwardScrub(CephFSTestCase):
30 MDSS_REQUIRED = 1
31
32 def _read_str_xattr(self, pool, obj, attr):
33 """
34 Read a ceph-encoded string from a rados xattr
35 """
f67539c2
TL
36 output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
37 stdout=BytesIO()).stdout.getvalue()
7c673cae 38 strlen = struct.unpack('i', output[0:4])[0]
f67539c2 39 return output[4:(4 + strlen)].decode(encoding='ascii')
7c673cae
FG
40
41 def _get_paths_to_ino(self):
42 inos = {}
43 p = self.mount_a.run_shell(["find", "./"])
44 paths = p.stdout.getvalue().strip().split()
45 for path in paths:
46 inos[path] = self.mount_a.path_to_ino(path)
47
48 return inos
49
f38dd50b
TL
50 def _is_MDS_damage(self):
51 return "MDS_DAMAGE" in self.mds_cluster.mon_manager.get_mon_health()['checks']
52
7c673cae
FG
53 def test_apply_tag(self):
54 self.mount_a.run_shell(["mkdir", "parentdir"])
55 self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
56 self.mount_a.run_shell(["touch", "rfile"])
57 self.mount_a.run_shell(["touch", "parentdir/pfile"])
58 self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
59
60 # Build a structure mapping path to inode, as we will later want
61 # to check object by object and objects are named after ino number
62 inos = self._get_paths_to_ino()
63
64 # Flush metadata: this is a friendly test of forward scrub so we're skipping
65 # the part where it's meant to cope with dirty metadata
66 self.mount_a.umount_wait()
67 self.fs.mds_asok(["flush", "journal"])
68
69 tag = "mytag"
70
71 # Execute tagging forward scrub
72 self.fs.mds_asok(["tag", "path", "/parentdir", tag])
73 # Wait for completion
74 import time
75 time.sleep(10)
76 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
77 # watch that instead
78
79 # Check that dirs were tagged
80 for dirpath in ["./parentdir", "./parentdir/childdir"]:
81 self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
82
83 # Check that files were tagged
84 for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
85 self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
86
87 # This guy wasn't in the tag path, shouldn't have been tagged
88 self.assertUntagged(inos["./rfile"])
89
90 def assertUntagged(self, ino):
91 file_obj_name = "{0:x}.00000000".format(ino)
92 with self.assertRaises(CommandFailedError):
93 self._read_str_xattr(
94 self.fs.get_data_pool_name(),
95 file_obj_name,
96 "scrub_tag"
97 )
98
99 def assertTagged(self, ino, tag, pool):
100 file_obj_name = "{0:x}.00000000".format(ino)
101 wrote = self._read_str_xattr(
102 pool,
103 file_obj_name,
104 "scrub_tag"
105 )
106 self.assertEqual(wrote, tag)
107
108 def _validate_linkage(self, expected):
109 inos = self._get_paths_to_ino()
110 try:
111 self.assertDictEqual(inos, expected)
112 except AssertionError:
113 log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
114 log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
115 raise
116
117 def test_orphan_scan(self):
118 # Create some files whose metadata we will flush
119 self.mount_a.run_python(dedent("""
120 import os
121 mount_point = "{mount_point}"
122 parent = os.path.join(mount_point, "parent")
123 os.mkdir(parent)
124 flushed = os.path.join(parent, "flushed")
125 os.mkdir(flushed)
126 for f in ["alpha", "bravo", "charlie"]:
127 open(os.path.join(flushed, f), 'w').write(f)
128 """.format(mount_point=self.mount_a.mountpoint)))
129
130 inos = self._get_paths_to_ino()
131
132 # Flush journal
133 # Umount before flush to avoid cap releases putting
134 # things we don't want in the journal later.
135 self.mount_a.umount_wait()
1e59de90 136 self.fs.flush()
7c673cae
FG
137
138 # Create a new inode that's just in the log, i.e. would
139 # look orphaned to backward scan if backward scan wisnae
140 # respectin' tha scrub_tag xattr.
e306af50 141 self.mount_a.mount_wait()
7c673cae
FG
142 self.mount_a.run_shell(["mkdir", "parent/unflushed"])
143 self.mount_a.run_shell(["dd", "if=/dev/urandom",
144 "of=./parent/unflushed/jfile",
145 "bs=1M", "count=8"])
146 inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
147 inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
148 self.mount_a.umount_wait()
149
150 # Orphan an inode by deleting its dentry
151 # Our victim will be.... bravo.
152 self.mount_a.umount_wait()
f67539c2 153 self.fs.fail()
7c673cae
FG
154 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
155 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
156 frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
f67539c2 157 self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
7c673cae 158
f67539c2 159 self.fs.set_joinable()
7c673cae
FG
160 self.fs.wait_for_daemons()
161
162 # See that the orphaned file is indeed missing from a client's POV
e306af50 163 self.mount_a.mount_wait()
7c673cae
FG
164 damaged_state = self._get_paths_to_ino()
165 self.assertNotIn("./parent/flushed/bravo", damaged_state)
166 self.mount_a.umount_wait()
167
168 # Run a tagging forward scrub
169 tag = "mytag123"
1e59de90 170 self.fs.rank_asok(["tag", "path", "/parent", tag])
7c673cae
FG
171
172 # See that the orphan wisnae tagged
173 self.assertUntagged(inos['./parent/flushed/bravo'])
174
175 # See that the flushed-metadata-and-still-present files are tagged
176 self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
177 self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
178
179 # See that journalled-but-not-flushed file *was* tagged
180 self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
181
1e59de90
TL
182 # okay, now we are going to run cephfs-data-scan. It's necessary to
183 # have a clean journal otherwise replay will blowup on mismatched
184 # inotable versions (due to scan_links)
185 self.fs.flush()
f67539c2 186 self.fs.fail()
1e59de90
TL
187 self.fs.journal_tool(["journal", "reset", "--force"], 0)
188
189 # Run cephfs-data-scan targeting only orphans
7c673cae
FG
190 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
191 self.fs.data_scan([
192 "scan_inodes",
193 "--filter-tag", tag,
194 self.fs.get_data_pool_name()
195 ])
1e59de90 196 self.fs.data_scan(["scan_links"])
7c673cae
FG
197
198 # After in-place injection stats should be kosher again
199 self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
200 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
201
202 # And we should have all the same linkage we started with,
203 # and no lost+found, and no extra inodes!
f67539c2 204 self.fs.set_joinable()
7c673cae 205 self.fs.wait_for_daemons()
e306af50 206 self.mount_a.mount_wait()
7c673cae
FG
207 self._validate_linkage(inos)
208
209 def _stash_inotable(self):
210 # Get all active ranks
211 ranks = self.fs.get_all_mds_rank()
212
213 inotable_dict = {}
214 for rank in ranks:
215 inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
9f95a23c 216 print("Trying to fetch inotable object: " + inotable_oid)
7c673cae
FG
217
218 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
f67539c2 219 inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
7c673cae
FG
220 inotable_dict[inotable_oid] = inotable_raw
221 return inotable_dict
222
223 def test_inotable_sync(self):
224 self.mount_a.write_n_mb("file1_sixmegs", 6)
225
226 # Flush journal
227 self.mount_a.umount_wait()
228 self.fs.mds_asok(["flush", "journal"])
229
230 inotable_copy = self._stash_inotable()
231
e306af50 232 self.mount_a.mount_wait()
7c673cae
FG
233
234 self.mount_a.write_n_mb("file2_sixmegs", 6)
235 self.mount_a.write_n_mb("file3_sixmegs", 6)
236
237 inos = self._get_paths_to_ino()
238
239 # Flush journal
240 self.mount_a.umount_wait()
241 self.fs.mds_asok(["flush", "journal"])
242
243 self.mount_a.umount_wait()
244
245 with self.assert_cluster_log("inode table repaired", invert_match=True):
b3b6e05e 246 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 247 self.assertNotEqual(out_json, None)
f67539c2
TL
248 self.assertEqual(out_json["return_code"], 0)
249 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 250
f67539c2 251 self.fs.fail()
7c673cae
FG
252
253 # Truncate the journal (to ensure the inotable on disk
254 # is all that will be in the InoTable in memory)
255
256 self.fs.journal_tool(["event", "splice",
f64942e4 257 "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
7c673cae
FG
258
259 self.fs.journal_tool(["event", "splice",
f64942e4 260 "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
7c673cae
FG
261
262 # Revert to old inotable.
9f95a23c 263 for key, value in inotable_copy.items():
f67539c2 264 self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
7c673cae 265
f67539c2 266 self.fs.set_joinable()
7c673cae
FG
267 self.fs.wait_for_daemons()
268
269 with self.assert_cluster_log("inode table repaired"):
b3b6e05e 270 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 271 self.assertNotEqual(out_json, None)
f67539c2
TL
272 self.assertEqual(out_json["return_code"], 0)
273 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 274
f67539c2 275 self.fs.fail()
7c673cae
FG
276 table_text = self.fs.table_tool(["0", "show", "inode"])
277 table = json.loads(table_text)
278 self.assertGreater(
279 table['0']['data']['inotable']['free'][0]['start'],
280 inos['./file3_sixmegs'])
281
282 def test_backtrace_repair(self):
283 """
284 That the MDS can repair an inodes backtrace in the data pool
285 if it is found to be damaged.
286 """
287 # Create a file for subsequent checks
288 self.mount_a.run_shell(["mkdir", "parent_a"])
289 self.mount_a.run_shell(["touch", "parent_a/alpha"])
290 file_ino = self.mount_a.path_to_ino("parent_a/alpha")
291
292 # That backtrace and layout are written after initial flush
293 self.fs.mds_asok(["flush", "journal"])
294 backtrace = self.fs.read_backtrace(file_ino)
295 self.assertEqual(['alpha', 'parent_a'],
296 [a['dname'] for a in backtrace['ancestors']])
297
298 # Go corrupt the backtrace
299 self.fs._write_data_xattr(file_ino, "parent",
300 "oh i'm sorry did i overwrite your xattr?")
301
302 with self.assert_cluster_log("bad backtrace on inode"):
b3b6e05e 303 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 304 self.assertNotEqual(out_json, None)
f67539c2
TL
305 self.assertEqual(out_json["return_code"], 0)
306 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
307
7c673cae
FG
308 self.fs.mds_asok(["flush", "journal"])
309 backtrace = self.fs.read_backtrace(file_ino)
310 self.assertEqual(['alpha', 'parent_a'],
311 [a['dname'] for a in backtrace['ancestors']])
f38dd50b
TL
312
313 def test_health_status_after_dentry_repair(self):
314 """
315 Test that the damage health status is cleared
316 after the damaged dentry is repaired
317 """
318 # Create a file for checks
319 self.mount_a.run_shell(["mkdir", "subdir/"])
320
321 self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
322 self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
323
324 subdir_ino = self.mount_a.path_to_ino("subdir")
325
326 self.mount_a.umount_wait()
327 for mds_name in self.fs.get_active_names():
328 self.fs.mds_asok(["flush", "journal"], mds_name)
329
330 self.fs.fail()
331
332 # Corrupt a dentry
333 junk = "deadbeef" * 10
334 dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
335 self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
336
337 # Start up and try to list it
338 self.fs.set_joinable()
339 self.fs.wait_for_daemons()
340
341 self.mount_a.mount_wait()
342 dentries = self.mount_a.ls("subdir/")
343
344 # The damaged guy should have disappeared
345 self.assertEqual(dentries, ["file_undamaged"])
346
347 # I should get ENOENT if I try and read it normally, because
348 # the dir is considered complete
349 try:
350 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
351 except CommandFailedError as e:
352 self.assertEqual(e.exitstatus, errno.ENOENT)
353 else:
354 raise AssertionError("Expected ENOENT")
355
356 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
357 self.assertEqual(nfiles, "2")
358
359 self.mount_a.umount_wait()
360
361 out_json = self.fs.run_scrub(["start", "/subdir", "recursive"])
362 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
363
364 # Check that an entry for dentry damage is created in the damage table
365 damage = json.loads(
366 self.fs.mon_manager.raw_cluster_cmd(
367 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
368 "damage", "ls", '--format=json-pretty'))
369 self.assertEqual(len(damage), 1)
370 self.assertEqual(damage[0]['damage_type'], "dentry")
371 self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
372
373 out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
374 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
375
376 # Check that the entry is cleared from the damage table
377 damage = json.loads(
378 self.fs.mon_manager.raw_cluster_cmd(
379 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
380 "damage", "ls", '--format=json-pretty'))
381 self.assertEqual(len(damage), 0)
382 self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
383
384 self.mount_a.mount_wait()
385
386 # Check that the file count is now correct
387 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
388 self.assertEqual(nfiles, "1")
389
390 # Clean up the omap object
391 self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
392
393 def test_health_status_after_dirfrag_repair(self):
394 """
395 Test that the damage health status is cleared
396 after the damaged dirfrag is repaired
397 """
398 self.mount_a.run_shell(["mkdir", "dir"])
399 self.mount_a.run_shell(["touch", "dir/file"])
400 self.mount_a.run_shell(["mkdir", "testdir"])
401 self.mount_a.run_shell(["ln", "dir/file", "testdir/hardlink"])
402
403 dir_ino = self.mount_a.path_to_ino("dir")
404
405 # Ensure everything is written to backing store
406 self.mount_a.umount_wait()
407 self.fs.mds_asok(["flush", "journal"])
408
409 # Drop everything from the MDS cache
410 self.fs.fail()
411
412 self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)])
413
414 self.fs.journal_tool(['journal', 'reset'], 0)
415 self.fs.set_joinable()
416 self.fs.wait_for_daemons()
417 self.mount_a.mount_wait()
418
419 # Check that touching the hardlink gives EIO
420 ran = self.mount_a.run_shell(["stat", "testdir/hardlink"], wait=False)
421 try:
422 ran.wait()
423 except CommandFailedError:
424 self.assertTrue("Input/output error" in ran.stderr.getvalue())
425
426 out_json = self.fs.run_scrub(["start", "/dir", "recursive"])
427 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
428
429 # Check that an entry is created in the damage table
430 damage = json.loads(
431 self.fs.mon_manager.raw_cluster_cmd(
432 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
433 "damage", "ls", '--format=json-pretty'))
434 self.assertEqual(len(damage), 3)
435 damage_types = set()
436 for i in range(0, 3):
437 damage_types.add(damage[i]['damage_type'])
438 self.assertIn("dir_frag", damage_types)
439 self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
440
441 out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
442 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
443
444 # Check that the entry is cleared from the damage table
445 damage = json.loads(
446 self.fs.mon_manager.raw_cluster_cmd(
447 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
448 "damage", "ls", '--format=json-pretty'))
449 self.assertEqual(len(damage), 1)
450 self.assertNotEqual(damage[0]['damage_type'], "dir_frag")
451
452 self.mount_a.umount_wait()
453 self.fs.mds_asok(["flush", "journal"])
454 self.fs.fail()
455
456 # Run cephfs-data-scan
457 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
458 self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
459 self.fs.data_scan(["scan_links"])
460
461 self.fs.set_joinable()
462 self.fs.wait_for_daemons()
463 self.mount_a.mount_wait()
464
465 out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
466 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
467 damage = json.loads(
468 self.fs.mon_manager.raw_cluster_cmd(
469 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
470 "damage", "ls", '--format=json-pretty'))
471 self.assertEqual(len(damage), 0)
472 self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
473
474 def test_health_status_after_backtrace_repair(self):
475 """
476 Test that the damage health status is cleared
477 after the damaged backtrace is repaired
478 """
479 # Create a file for checks
480 self.mount_a.run_shell(["mkdir", "dir_test"])
481 self.mount_a.run_shell(["touch", "dir_test/file"])
482 file_ino = self.mount_a.path_to_ino("dir_test/file")
483
484 # That backtrace and layout are written after initial flush
485 self.fs.mds_asok(["flush", "journal"])
486 backtrace = self.fs.read_backtrace(file_ino)
487 self.assertEqual(['file', 'dir_test'],
488 [a['dname'] for a in backtrace['ancestors']])
489
490 # Corrupt the backtrace
491 self.fs._write_data_xattr(file_ino, "parent",
492 "The backtrace is corrupted")
493
494 out_json = self.fs.run_scrub(["start", "/", "recursive"])
495 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
496
497 # Check that an entry for backtrace damage is created in the damage table
498 damage = json.loads(
499 self.fs.mon_manager.raw_cluster_cmd(
500 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
501 "damage", "ls", '--format=json-pretty'))
502 self.assertEqual(len(damage), 1)
503 self.assertEqual(damage[0]['damage_type'], "backtrace")
504 self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
505
506 out_json = self.fs.run_scrub(["start", "/", "repair,recursive,force"])
507 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
508
509 # Check that the entry is cleared from the damage table
510 damage = json.loads(
511 self.fs.mon_manager.raw_cluster_cmd(
512 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
513 "damage", "ls", '--format=json-pretty'))
514 self.assertEqual(len(damage), 0)
515 self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)