]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_data_scan.py
import ceph pacific 16.2.5
[ceph.git] / ceph / qa / tasks / cephfs / test_data_scan.py
CommitLineData
7c673cae
FG
1
2"""
3Test our tools for recovering metadata from the data pool
4"""
5import json
6
7import logging
8import os
11fdf7f2 9import time
7c673cae 10import traceback
e306af50 11
f67539c2 12from io import BytesIO, StringIO
7c673cae 13from collections import namedtuple, defaultdict
e306af50 14from textwrap import dedent
7c673cae
FG
15
16from teuthology.orchestra.run import CommandFailedError
17from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
18
19log = logging.getLogger(__name__)
20
21
22ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
23
24
25class Workload(object):
26 def __init__(self, filesystem, mount):
27 self._mount = mount
28 self._filesystem = filesystem
29 self._initial_state = None
30
31 # Accumulate backtraces for every failed validation, and return them. Backtraces
32 # are rather verbose, but we only see them when something breaks, and they
33 # let us see which check failed without having to decorate each check with
34 # a string
35 self._errors = []
36
37 def assert_equal(self, a, b):
38 try:
39 if a != b:
40 raise AssertionError("{0} != {1}".format(a, b))
41 except AssertionError as e:
42 self._errors.append(
43 ValidationError(e, traceback.format_exc(3))
44 )
45
46 def write(self):
47 """
48 Write the workload files to the mount
49 """
50 raise NotImplementedError()
51
52 def validate(self):
53 """
54 Read from the mount and validate that the workload files are present (i.e. have
55 survived or been reconstructed from the test scenario)
56 """
57 raise NotImplementedError()
58
59 def damage(self):
60 """
61 Damage the filesystem pools in ways that will be interesting to recover from. By
62 default just wipe everything in the metadata pool
63 """
64 # Delete every object in the metadata pool
f67539c2
TL
65 pool = self._filesystem.get_metadata_pool_name()
66 self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
7c673cae
FG
67
68 def flush(self):
69 """
70 Called after client unmount, after write: flush whatever you want
71 """
72 self._filesystem.mds_asok(["flush", "journal"])
73
74
75class SimpleWorkload(Workload):
76 """
77 Single file, single directory, check that it gets recovered and so does its size
78 """
79 def write(self):
80 self._mount.run_shell(["mkdir", "subdir"])
81 self._mount.write_n_mb("subdir/sixmegs", 6)
82 self._initial_state = self._mount.stat("subdir/sixmegs")
83
84 def validate(self):
85 self._mount.run_shell(["ls", "subdir"])
86 st = self._mount.stat("subdir/sixmegs")
87 self.assert_equal(st['st_size'], self._initial_state['st_size'])
88 return self._errors
89
90
91class MovedFile(Workload):
92 def write(self):
93 # Create a file whose backtrace disagrees with his eventual position
94 # in the metadata. We will see that he gets reconstructed in his
95 # original position according to his backtrace.
96 self._mount.run_shell(["mkdir", "subdir_alpha"])
97 self._mount.run_shell(["mkdir", "subdir_bravo"])
98 self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
99 self._filesystem.mds_asok(["flush", "journal"])
100 self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
101 self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
102
103 def flush(self):
104 pass
105
106 def validate(self):
107 self.assert_equal(self._mount.ls(), ["subdir_alpha"])
108 st = self._mount.stat("subdir_alpha/sixmegs")
109 self.assert_equal(st['st_size'], self._initial_state['st_size'])
110 return self._errors
111
112
113class BacktracelessFile(Workload):
114 def write(self):
115 self._mount.run_shell(["mkdir", "subdir"])
116 self._mount.write_n_mb("subdir/sixmegs", 6)
117 self._initial_state = self._mount.stat("subdir/sixmegs")
118
119 def flush(self):
120 # Never flush metadata, so backtrace won't be written
121 pass
122
123 def validate(self):
124 ino_name = "%x" % self._initial_state["st_ino"]
125
126 # The inode should be linked into lost+found because we had no path for it
127 self.assert_equal(self._mount.ls(), ["lost+found"])
128 self.assert_equal(self._mount.ls("lost+found"), [ino_name])
129 st = self._mount.stat("lost+found/{ino_name}".format(ino_name=ino_name))
130
131 # We might not have got the name or path, but we should still get the size
132 self.assert_equal(st['st_size'], self._initial_state['st_size'])
133
134 return self._errors
135
136
137class StripedStashedLayout(Workload):
138 def __init__(self, fs, m):
139 super(StripedStashedLayout, self).__init__(fs, m)
140
141 # Nice small stripes so we can quickly do our writes+validates
142 self.sc = 4
143 self.ss = 65536
144 self.os = 262144
145
146 self.interesting_sizes = [
147 # Exactly stripe_count objects will exist
148 self.os * self.sc,
149 # Fewer than stripe_count objects will exist
e306af50
TL
150 self.os * self.sc // 2,
151 self.os * (self.sc - 1) + self.os // 2,
152 self.os * (self.sc - 1) + self.os // 2 - 1,
153 self.os * (self.sc + 1) + self.os // 2,
154 self.os * (self.sc + 1) + self.os // 2 + 1,
7c673cae 155 # More than stripe_count objects will exist
e306af50 156 self.os * self.sc + self.os * self.sc // 2
7c673cae
FG
157 ]
158
159 def write(self):
160 # Create a dir with a striped layout set on it
161 self._mount.run_shell(["mkdir", "stripey"])
162
163 self._mount.setfattr("./stripey", "ceph.dir.layout",
164 "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
165 ss=self.ss, os=self.os, sc=self.sc,
166 pool=self._filesystem.get_data_pool_name()
167 ))
168
169 # Write files, then flush metadata so that its layout gets written into an xattr
170 for i, n_bytes in enumerate(self.interesting_sizes):
171 self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
172 # This is really just validating the validator
173 self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
174 self._filesystem.mds_asok(["flush", "journal"])
175
176 # Write another file in the same way, but this time don't flush the metadata,
177 # so that it won't have the layout xattr
178 self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
179 self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
180
181 self._initial_state = {
182 "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
183 }
184
185 def flush(self):
186 # Pass because we already selectively flushed during write
187 pass
188
189 def validate(self):
190 # The first files should have been recovered into its original location
191 # with the correct layout: read back correct data
192 for i, n_bytes in enumerate(self.interesting_sizes):
193 try:
194 self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
195 except CommandFailedError as e:
196 self._errors.append(
197 ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
198 )
199
200 # The unflushed file should have been recovered into lost+found without
201 # the correct layout: read back junk
202 ino_name = "%x" % self._initial_state["unflushed_ino"]
203 self.assert_equal(self._mount.ls("lost+found"), [ino_name])
204 try:
205 self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
206 except CommandFailedError:
207 pass
208 else:
209 self._errors.append(
210 ValidationError("Unexpectedly valid data in unflushed striped file", "")
211 )
212
213 return self._errors
214
215
216class ManyFilesWorkload(Workload):
217 def __init__(self, filesystem, mount, file_count):
218 super(ManyFilesWorkload, self).__init__(filesystem, mount)
219 self.file_count = file_count
220
221 def write(self):
222 self._mount.run_shell(["mkdir", "subdir"])
223 for n in range(0, self.file_count):
224 self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
225
226 def validate(self):
227 for n in range(0, self.file_count):
228 try:
229 self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
230 except CommandFailedError as e:
231 self._errors.append(
232 ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
233 )
234
235 return self._errors
236
237
238class MovedDir(Workload):
239 def write(self):
240 # Create a nested dir that we will then move. Two files with two different
241 # backtraces referring to the moved dir, claiming two different locations for
242 # it. We will see that only one backtrace wins and the dir ends up with
243 # single linkage.
244 self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
245 self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
246 self._filesystem.mds_asok(["flush", "journal"])
247 self._mount.run_shell(["mkdir", "grandfather"])
248 self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
249 self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
250 self._filesystem.mds_asok(["flush", "journal"])
251
252 self._initial_state = (
253 self._mount.stat("grandfather/parent/orig_pos_file"),
254 self._mount.stat("grandfather/parent/new_pos_file")
255 )
256
257 def validate(self):
258 root_files = self._mount.ls()
259 self.assert_equal(len(root_files), 1)
260 self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
261 winner = root_files[0]
262 st_opf = self._mount.stat("{0}/parent/orig_pos_file".format(winner))
263 st_npf = self._mount.stat("{0}/parent/new_pos_file".format(winner))
264
265 self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
266 self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
267
268
269class MissingZerothObject(Workload):
270 def write(self):
271 self._mount.run_shell(["mkdir", "subdir"])
272 self._mount.write_n_mb("subdir/sixmegs", 6)
273 self._initial_state = self._mount.stat("subdir/sixmegs")
274
275 def damage(self):
276 super(MissingZerothObject, self).damage()
277 zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
278 self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
279
280 def validate(self):
281 st = self._mount.stat("lost+found/{0:x}".format(self._initial_state['st_ino']))
282 self.assert_equal(st['st_size'], self._initial_state['st_size'])
283
284
285class NonDefaultLayout(Workload):
286 """
287 Check that the reconstruction copes with files that have a different
288 object size in their layout
289 """
290 def write(self):
291 self._mount.run_shell(["touch", "datafile"])
292 self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
293 self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
294 self._initial_state = self._mount.stat("datafile")
295
296 def validate(self):
297 # Check we got the layout reconstructed properly
298 object_size = int(self._mount.getfattr(
299 "./datafile", "ceph.file.layout.object_size"))
300 self.assert_equal(object_size, 8388608)
301
302 # Check we got the file size reconstructed properly
303 st = self._mount.stat("datafile")
304 self.assert_equal(st['st_size'], self._initial_state['st_size'])
305
306
307class TestDataScan(CephFSTestCase):
308 MDSS_REQUIRED = 2
309
310 def is_marked_damaged(self, rank):
311 mds_map = self.fs.get_mds_map()
312 return rank in mds_map['damaged']
313
181888fb 314 def _rebuild_metadata(self, workload, workers=1):
7c673cae
FG
315 """
316 That when all objects in metadata pool are removed, we can rebuild a metadata pool
317 based on the contents of a data pool, and a client can see and read our files.
318 """
319
320 # First, inject some files
321
7c673cae
FG
322 workload.write()
323
324 # Unmount the client and flush the journal: the tool should also cope with
325 # situations where there is dirty metadata, but we'll test that separately
326 self.mount_a.umount_wait()
327 workload.flush()
328
7c673cae 329 # Stop the MDS
f67539c2 330 self.fs.fail()
7c673cae
FG
331
332 # After recovery, we need the MDS to not be strict about stats (in production these options
333 # are off by default, but in QA we need to explicitly disable them)
334 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
335 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
336
337 # Apply any data damage the workload wants
338 workload.damage()
339
340 # Reset the MDS map in case multiple ranks were in play: recovery procedure
341 # only understands how to rebuild metadata under rank 0
f67539c2 342 self.fs.reset()
7c673cae 343
f67539c2 344 self.fs.set_joinable() # redundant with reset
7c673cae
FG
345
346 def get_state(mds_id):
347 info = self.mds_cluster.get_mds_info(mds_id)
348 return info['state'] if info is not None else None
349
181888fb
FG
350 self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
351 for mds_id in self.fs.mds_ids:
352 self.wait_until_equal(
353 lambda: get_state(mds_id),
354 "up:standby",
355 timeout=60)
7c673cae
FG
356
357 self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
358 self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
359 self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
360
361 # Run the recovery procedure
362 if False:
363 with self.assertRaises(CommandFailedError):
364 # Normal reset should fail when no objects are present, we'll use --force instead
f64942e4 365 self.fs.journal_tool(["journal", "reset"], 0)
7c673cae 366
f64942e4 367 self.fs.journal_tool(["journal", "reset", "--force"], 0)
181888fb
FG
368 self.fs.data_scan(["init"])
369 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers)
370 self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers)
7c673cae
FG
371
372 # Mark the MDS repaired
373 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
374
375 # Start the MDS
376 self.fs.mds_restart()
377 self.fs.wait_for_daemons()
7c673cae
FG
378 log.info(str(self.mds_cluster.status()))
379
380 # Mount a client
e306af50 381 self.mount_a.mount_wait()
7c673cae
FG
382
383 # See that the files are present and correct
384 errors = workload.validate()
385 if errors:
386 log.error("Validation errors found: {0}".format(len(errors)))
387 for e in errors:
388 log.error(e.exception)
389 log.error(e.backtrace)
390 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
391 errors[0].exception, errors[0].backtrace
392 ))
393
394 def test_rebuild_simple(self):
395 self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
396
397 def test_rebuild_moved_file(self):
398 self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
399
400 def test_rebuild_backtraceless(self):
401 self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
402
403 def test_rebuild_moved_dir(self):
404 self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
405
406 def test_rebuild_missing_zeroth(self):
407 self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
408
409 def test_rebuild_nondefault_layout(self):
410 self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
411
412 def test_stashed_layout(self):
413 self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
414
7c673cae 415 def _dirfrag_keys(self, object_id):
f67539c2 416 keys_str = self.fs.radosmo(["listomapkeys", object_id], stdout=StringIO())
7c673cae 417 if keys_str:
f67539c2 418 return keys_str.strip().split("\n")
7c673cae
FG
419 else:
420 return []
421
422 def test_fragmented_injection(self):
423 """
424 That when injecting a dentry into a fragmented directory, we put it in the right fragment.
425 """
426
7c673cae
FG
427 file_count = 100
428 file_names = ["%s" % n for n in range(0, file_count)]
429
1911f103
TL
430 # Make sure and disable dirfrag auto merging and splitting
431 self.fs.set_ceph_conf('mds', 'mds bal merge size', 0)
432 self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count)
433
7c673cae
FG
434 # Create a directory of `file_count` files, each named after its
435 # decimal number and containing the string of its decimal number
436 self.mount_a.run_python(dedent("""
437 import os
438 path = os.path.join("{path}", "subdir")
439 os.mkdir(path)
440 for n in range(0, {file_count}):
441 open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
442 """.format(
443 path=self.mount_a.mountpoint,
444 file_count=file_count
445 )))
446
447 dir_ino = self.mount_a.path_to_ino("subdir")
448
449 # Only one MDS should be active!
450 self.assertEqual(len(self.fs.get_active_names()), 1)
451
452 # Ensure that one directory is fragmented
453 mds_id = self.fs.get_active_names()[0]
454 self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
455
456 # Flush journal and stop MDS
457 self.mount_a.umount_wait()
458 self.fs.mds_asok(["flush", "journal"], mds_id)
f67539c2 459 self.fs.fail()
7c673cae
FG
460
461 # Pick a dentry and wipe out its key
462 # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
463 frag_obj_id = "{0:x}.01000000".format(dir_ino)
464 keys = self._dirfrag_keys(frag_obj_id)
465 victim_key = keys[7] # arbitrary choice
466 log.info("victim_key={0}".format(victim_key))
467 victim_dentry = victim_key.split("_head")[0]
f67539c2 468 self.fs.radosm(["rmomapkey", frag_obj_id, victim_key])
7c673cae
FG
469
470 # Start filesystem back up, observe that the file appears to be gone in an `ls`
f67539c2 471 self.fs.set_joinable()
7c673cae 472 self.fs.wait_for_daemons()
e306af50 473 self.mount_a.mount_wait()
7c673cae
FG
474 files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
475 self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
476
477 # Stop the filesystem
478 self.mount_a.umount_wait()
f67539c2 479 self.fs.fail()
7c673cae
FG
480
481 # Run data-scan, observe that it inserts our dentry back into the correct fragment
482 # by checking the omap now has the dentry's key again
483 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
484 self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
1911f103 485 self.fs.data_scan(["scan_links"])
7c673cae
FG
486 self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
487
488 # Start the filesystem and check that the dentry we deleted is now once again visible
489 # and points to the correct file data.
f67539c2 490 self.fs.set_joinable()
7c673cae 491 self.fs.wait_for_daemons()
e306af50 492 self.mount_a.mount_wait()
7c673cae
FG
493 out = self.mount_a.run_shell(["cat", "subdir/{0}".format(victim_dentry)]).stdout.getvalue().strip()
494 self.assertEqual(out, victim_dentry)
495
496 # Finally, close the loop by checking our injected dentry survives a merge
497 mds_id = self.fs.get_active_names()[0]
498 self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work
499 self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
500 self.fs.mds_asok(["flush", "journal"], mds_id)
501 frag_obj_id = "{0:x}.00000000".format(dir_ino)
502 keys = self._dirfrag_keys(frag_obj_id)
503 self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
504
1911f103
TL
505 # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag
506 # are matched
b3b6e05e 507 out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
1911f103 508 self.assertNotEqual(out_json, None)
f67539c2
TL
509 self.assertEqual(out_json["return_code"], 0)
510 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
1911f103
TL
511
512 # Remove the whole 'sudbdir' directory
513 self.mount_a.run_shell(["rm", "-rf", "subdir/"])
514
7c673cae
FG
515 @for_teuthology
516 def test_parallel_execution(self):
517 self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
518
519 def test_pg_files(self):
520 """
521 That the pg files command tells us which files are associated with
522 a particular PG
523 """
524 file_count = 20
525 self.mount_a.run_shell(["mkdir", "mydir"])
526 self.mount_a.create_n_files("mydir/myfile", file_count)
527
528 # Some files elsewhere in the system that we will ignore
529 # to check that the tool is filtering properly
530 self.mount_a.run_shell(["mkdir", "otherdir"])
531 self.mount_a.create_n_files("otherdir/otherfile", file_count)
532
533 pgs_to_files = defaultdict(list)
534 # Rough (slow) reimplementation of the logic
535 for i in range(0, file_count):
536 file_path = "mydir/myfile_{0}".format(i)
537 ino = self.mount_a.path_to_ino(file_path)
538 obj = "{0:x}.{1:08x}".format(ino, 0)
539 pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
540 "osd", "map", self.fs.get_data_pool_name(), obj,
541 "--format=json-pretty"
542 ))['pgid']
543 pgs_to_files[pgid].append(file_path)
544 log.info("{0}: {1}".format(file_path, pgid))
545
9f95a23c 546 pg_count = self.fs.pgs_per_fs_pool
7c673cae 547 for pg_n in range(0, pg_count):
b3b6e05e 548 pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n)
7c673cae
FG
549 out = self.fs.data_scan(["pg_files", "mydir", pg_str])
550 lines = [l for l in out.split("\n") if l]
551 log.info("{0}: {1}".format(pg_str, lines))
552 self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
553
11fdf7f2 554 def test_rebuild_linkage(self):
7c673cae
FG
555 """
556 The scan_links command fixes linkage errors
557 """
558 self.mount_a.run_shell(["mkdir", "testdir1"])
559 self.mount_a.run_shell(["mkdir", "testdir2"])
560 dir1_ino = self.mount_a.path_to_ino("testdir1")
561 dir2_ino = self.mount_a.path_to_ino("testdir2")
562 dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
563 dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
564
565 self.mount_a.run_shell(["touch", "testdir1/file1"])
566 self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
567 self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
568
569 mds_id = self.fs.get_active_names()[0]
570 self.fs.mds_asok(["flush", "journal"], mds_id)
571
572 dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
573
574 # introduce duplicated primary link
575 file1_key = "file1_head"
576 self.assertIn(file1_key, dirfrag1_keys)
f67539c2
TL
577 file1_omap_data = self.fs.radosmo(["getomapval", dirfrag1_oid, file1_key, '-'])
578 self.fs.radosm(["setomapval", dirfrag2_oid, file1_key], stdin=BytesIO(file1_omap_data))
7c673cae
FG
579 self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
580
581 # remove a remote link, make inode link count incorrect
582 link1_key = 'link1_head'
583 self.assertIn(link1_key, dirfrag1_keys)
f67539c2 584 self.fs.radosm(["rmomapkey", dirfrag1_oid, link1_key])
7c673cae
FG
585
586 # increase good primary link's version
587 self.mount_a.run_shell(["touch", "testdir1/file1"])
588 self.mount_a.umount_wait()
589
590 self.fs.mds_asok(["flush", "journal"], mds_id)
f67539c2 591 self.fs.fail()
7c673cae
FG
592
593 # repair linkage errors
594 self.fs.data_scan(["scan_links"])
595
596 # primary link in testdir2 was deleted?
597 self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
598
f67539c2 599 self.fs.set_joinable()
7c673cae
FG
600 self.fs.wait_for_daemons()
601
e306af50 602 self.mount_a.mount_wait()
7c673cae
FG
603
604 # link count was adjusted?
605 file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
606 self.assertEqual(file1_nlink, 2)
11fdf7f2
TL
607
608 def test_rebuild_inotable(self):
609 """
610 The scan_links command repair inotables
611 """
612 self.fs.set_max_mds(2)
613 self.fs.wait_for_daemons()
614
615 active_mds_names = self.fs.get_active_names()
616 mds0_id = active_mds_names[0]
617 mds1_id = active_mds_names[1]
618
619 self.mount_a.run_shell(["mkdir", "dir1"])
620 dir_ino = self.mount_a.path_to_ino("dir1")
621 self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
622 # wait for subtree migration
623
624 file_ino = 0;
625 while True:
626 time.sleep(1)
627 # allocate an inode from mds.1
628 self.mount_a.run_shell(["touch", "dir1/file1"])
629 file_ino = self.mount_a.path_to_ino("dir1/file1")
630 if file_ino >= (2 << 40):
631 break
632 self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
633
634 self.mount_a.umount_wait()
635
636 self.fs.mds_asok(["flush", "journal"], mds0_id)
637 self.fs.mds_asok(["flush", "journal"], mds1_id)
f67539c2 638 self.fs.fail()
11fdf7f2 639
f67539c2
TL
640 self.fs.radosm(["rm", "mds0_inotable"])
641 self.fs.radosm(["rm", "mds1_inotable"])
11fdf7f2
TL
642
643 self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
644
645 mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
646 self.assertGreaterEqual(
647 mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
648
649 mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
650 self.assertGreaterEqual(
651 mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
652
653 def test_rebuild_snaptable(self):
654 """
655 The scan_links command repair snaptable
656 """
657 self.fs.set_allow_new_snaps(True)
658
659 self.mount_a.run_shell(["mkdir", "dir1"])
660 self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"])
661 self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"])
662 self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"])
663
664 self.mount_a.umount_wait()
665
666 mds0_id = self.fs.get_active_names()[0]
667 self.fs.mds_asok(["flush", "journal"], mds0_id)
668
669 # wait for mds to update removed snaps
670 time.sleep(10)
671
672 old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
673 # stamps may have minor difference
674 for item in old_snaptable['snapserver']['snaps']:
675 del item['stamp']
676
f67539c2 677 self.fs.radosm(["rm", "mds_snaptable"])
11fdf7f2
TL
678 self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
679
680 new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
681 for item in new_snaptable['snapserver']['snaps']:
682 del item['stamp']
683 self.assertGreaterEqual(
684 new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap'])
685 self.assertEqual(
686 new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps'])