ceph/qa/tasks/cephfs/test_data_scan.py

   1
   2 """
   3 Test our tools for recovering metadata from the data pool
   4 """
   5 import json
   6
   7 import logging
   8 import os
   9 import time
  10 from textwrap import dedent
  11 import traceback
  12 from collections import namedtuple, defaultdict
  13
  14 from teuthology.orchestra.run import CommandFailedError
  15 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  16
  17 log = logging.getLogger(__name__)
  18
  19
  20 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  21
  22
  23 class Workload(object):
  24     def __init__(self, filesystem, mount):
  25         self._mount = mount
  26         self._filesystem = filesystem
  27         self._initial_state = None
  28
  29         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  30         # are rather verbose, but we only see them when something breaks, and they
  31         # let us see which check failed without having to decorate each check with
  32         # a string
  33         self._errors = []
  34
  35     def assert_equal(self, a, b):
  36         try:
  37             if a != b:
  38                 raise AssertionError("{0} != {1}".format(a, b))
  39         except AssertionError as e:
  40             self._errors.append(
  41                 ValidationError(e, traceback.format_exc(3))
  42             )
  43
  44     def write(self):
  45         """
  46         Write the workload files to the mount
  47         """
  48         raise NotImplementedError()
  49
  50     def validate(self):
  51         """
  52         Read from the mount and validate that the workload files are present (i.e. have
  53         survived or been reconstructed from the test scenario)
  54         """
  55         raise NotImplementedError()
  56
  57     def damage(self):
  58         """
  59         Damage the filesystem pools in ways that will be interesting to recover from.  By
  60         default just wipe everything in the metadata pool
  61         """
  62         # Delete every object in the metadata pool
  63         objects = self._filesystem.rados(["ls"]).split("\n")
  64         for o in objects:
  65             self._filesystem.rados(["rm", o])
  66
  67     def flush(self):
  68         """
  69         Called after client unmount, after write: flush whatever you want
  70         """
  71         self._filesystem.mds_asok(["flush", "journal"])
  72
  73
  74 class SimpleWorkload(Workload):
  75     """
  76     Single file, single directory, check that it gets recovered and so does its size
  77     """
  78     def write(self):
  79         self._mount.run_shell(["mkdir", "subdir"])
  80         self._mount.write_n_mb("subdir/sixmegs", 6)
  81         self._initial_state = self._mount.stat("subdir/sixmegs")
  82
  83     def validate(self):
  84         self._mount.run_shell(["ls", "subdir"])
  85         st = self._mount.stat("subdir/sixmegs")
  86         self.assert_equal(st['st_size'], self._initial_state['st_size'])
  87         return self._errors
  88
  89
  90 class MovedFile(Workload):
  91     def write(self):
  92         # Create a file whose backtrace disagrees with his eventual position
  93         # in the metadata.  We will see that he gets reconstructed in his
  94         # original position according to his backtrace.
  95         self._mount.run_shell(["mkdir", "subdir_alpha"])
  96         self._mount.run_shell(["mkdir", "subdir_bravo"])
  97         self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
  98         self._filesystem.mds_asok(["flush", "journal"])
  99         self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
 100         self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
 101
 102     def flush(self):
 103         pass
 104
 105     def validate(self):
 106         self.assert_equal(self._mount.ls(), ["subdir_alpha"])
 107         st = self._mount.stat("subdir_alpha/sixmegs")
 108         self.assert_equal(st['st_size'], self._initial_state['st_size'])
 109         return self._errors
 110
 111
 112 class BacktracelessFile(Workload):
 113     def write(self):
 114         self._mount.run_shell(["mkdir", "subdir"])
 115         self._mount.write_n_mb("subdir/sixmegs", 6)
 116         self._initial_state = self._mount.stat("subdir/sixmegs")
 117
 118     def flush(self):
 119         # Never flush metadata, so backtrace won't be written
 120         pass
 121
 122     def validate(self):
 123         ino_name = "%x" % self._initial_state["st_ino"]
 124
 125         # The inode should be linked into lost+found because we had no path for it
 126         self.assert_equal(self._mount.ls(), ["lost+found"])
 127         self.assert_equal(self._mount.ls("lost+found"), [ino_name])
 128         st = self._mount.stat("lost+found/{ino_name}".format(ino_name=ino_name))
 129
 130         # We might not have got the name or path, but we should still get the size
 131         self.assert_equal(st['st_size'], self._initial_state['st_size'])
 132
 133         return self._errors
 134
 135
 136 class StripedStashedLayout(Workload):
 137     def __init__(self, fs, m):
 138         super(StripedStashedLayout, self).__init__(fs, m)
 139
 140         # Nice small stripes so we can quickly do our writes+validates
 141         self.sc = 4
 142         self.ss = 65536
 143         self.os = 262144
 144
 145         self.interesting_sizes = [
 146             # Exactly stripe_count objects will exist
 147             self.os * self.sc,
 148             # Fewer than stripe_count objects will exist
 149             self.os * self.sc / 2,
 150             self.os * (self.sc - 1) + self.os / 2,
 151             self.os * (self.sc - 1) + self.os / 2 - 1,
 152             self.os * (self.sc + 1) + self.os / 2,
 153             self.os * (self.sc + 1) + self.os / 2 + 1,
 154             # More than stripe_count objects will exist
 155             self.os * self.sc + self.os * self.sc / 2
 156         ]
 157
 158     def write(self):
 159         # Create a dir with a striped layout set on it
 160         self._mount.run_shell(["mkdir", "stripey"])
 161
 162         self._mount.setfattr("./stripey", "ceph.dir.layout",
 163              "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
 164                  ss=self.ss, os=self.os, sc=self.sc,
 165                  pool=self._filesystem.get_data_pool_name()
 166              ))
 167
 168         # Write files, then flush metadata so that its layout gets written into an xattr
 169         for i, n_bytes in enumerate(self.interesting_sizes):
 170             self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
 171             # This is really just validating the validator
 172             self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
 173         self._filesystem.mds_asok(["flush", "journal"])
 174
 175         # Write another file in the same way, but this time don't flush the metadata,
 176         # so that it won't have the layout xattr
 177         self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
 178         self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
 179
 180         self._initial_state = {
 181             "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
 182         }
 183
 184     def flush(self):
 185         # Pass because we already selectively flushed during write
 186         pass
 187
 188     def validate(self):
 189         # The first files should have been recovered into its original location
 190         # with the correct layout: read back correct data
 191         for i, n_bytes in enumerate(self.interesting_sizes):
 192             try:
 193                 self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
 194             except CommandFailedError as e:
 195                 self._errors.append(
 196                     ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
 197                 )
 198
 199         # The unflushed file should have been recovered into lost+found without
 200         # the correct layout: read back junk
 201         ino_name = "%x" % self._initial_state["unflushed_ino"]
 202         self.assert_equal(self._mount.ls("lost+found"), [ino_name])
 203         try:
 204             self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
 205         except CommandFailedError:
 206             pass
 207         else:
 208             self._errors.append(
 209                 ValidationError("Unexpectedly valid data in unflushed striped file", "")
 210             )
 211
 212         return self._errors
 213
 214
 215 class ManyFilesWorkload(Workload):
 216     def __init__(self, filesystem, mount, file_count):
 217         super(ManyFilesWorkload, self).__init__(filesystem, mount)
 218         self.file_count = file_count
 219
 220     def write(self):
 221         self._mount.run_shell(["mkdir", "subdir"])
 222         for n in range(0, self.file_count):
 223             self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
 224
 225     def validate(self):
 226         for n in range(0, self.file_count):
 227             try:
 228                 self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
 229             except CommandFailedError as e:
 230                 self._errors.append(
 231                     ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
 232                 )
 233
 234         return self._errors
 235
 236
 237 class MovedDir(Workload):
 238     def write(self):
 239         # Create a nested dir that we will then move.  Two files with two different
 240         # backtraces referring to the moved dir, claiming two different locations for
 241         # it.  We will see that only one backtrace wins and the dir ends up with
 242         # single linkage.
 243         self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
 244         self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
 245         self._filesystem.mds_asok(["flush", "journal"])
 246         self._mount.run_shell(["mkdir", "grandfather"])
 247         self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
 248         self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
 249         self._filesystem.mds_asok(["flush", "journal"])
 250
 251         self._initial_state = (
 252             self._mount.stat("grandfather/parent/orig_pos_file"),
 253             self._mount.stat("grandfather/parent/new_pos_file")
 254         )
 255
 256     def validate(self):
 257         root_files = self._mount.ls()
 258         self.assert_equal(len(root_files), 1)
 259         self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
 260         winner = root_files[0]
 261         st_opf = self._mount.stat("{0}/parent/orig_pos_file".format(winner))
 262         st_npf = self._mount.stat("{0}/parent/new_pos_file".format(winner))
 263
 264         self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
 265         self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
 266
 267
 268 class MissingZerothObject(Workload):
 269     def write(self):
 270         self._mount.run_shell(["mkdir", "subdir"])
 271         self._mount.write_n_mb("subdir/sixmegs", 6)
 272         self._initial_state = self._mount.stat("subdir/sixmegs")
 273
 274     def damage(self):
 275         super(MissingZerothObject, self).damage()
 276         zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
 277         self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
 278
 279     def validate(self):
 280         st = self._mount.stat("lost+found/{0:x}".format(self._initial_state['st_ino']))
 281         self.assert_equal(st['st_size'], self._initial_state['st_size'])
 282
 283
 284 class NonDefaultLayout(Workload):
 285     """
 286     Check that the reconstruction copes with files that have a different
 287     object size in their layout
 288     """
 289     def write(self):
 290         self._mount.run_shell(["touch", "datafile"])
 291         self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
 292         self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
 293         self._initial_state = self._mount.stat("datafile")
 294
 295     def validate(self):
 296         # Check we got the layout reconstructed properly
 297         object_size = int(self._mount.getfattr(
 298             "./datafile", "ceph.file.layout.object_size"))
 299         self.assert_equal(object_size, 8388608)
 300
 301         # Check we got the file size reconstructed properly
 302         st = self._mount.stat("datafile")
 303         self.assert_equal(st['st_size'], self._initial_state['st_size'])
 304
 305
 306 class TestDataScan(CephFSTestCase):
 307     MDSS_REQUIRED = 2
 308
 309     def is_marked_damaged(self, rank):
 310         mds_map = self.fs.get_mds_map()
 311         return rank in mds_map['damaged']
 312
 313     def _rebuild_metadata(self, workload, workers=1):
 314         """
 315         That when all objects in metadata pool are removed, we can rebuild a metadata pool
 316         based on the contents of a data pool, and a client can see and read our files.
 317         """
 318
 319         # First, inject some files
 320
 321         workload.write()
 322
 323         # Unmount the client and flush the journal: the tool should also cope with
 324         # situations where there is dirty metadata, but we'll test that separately
 325         self.mount_a.umount_wait()
 326         workload.flush()
 327
 328         # Stop the MDS
 329         self.fs.mds_stop()
 330         self.fs.mds_fail()
 331
 332         # After recovery, we need the MDS to not be strict about stats (in production these options
 333         # are off by default, but in QA we need to explicitly disable them)
 334         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 335         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 336
 337         # Apply any data damage the workload wants
 338         workload.damage()
 339
 340         # Reset the MDS map in case multiple ranks were in play: recovery procedure
 341         # only understands how to rebuild metadata under rank 0
 342         self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
 343                 '--yes-i-really-mean-it')
 344
 345         self.fs.mds_restart()
 346
 347         def get_state(mds_id):
 348             info = self.mds_cluster.get_mds_info(mds_id)
 349             return info['state'] if info is not None else None
 350
 351         self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
 352         for mds_id in self.fs.mds_ids:
 353             self.wait_until_equal(
 354                     lambda: get_state(mds_id),
 355                     "up:standby",
 356                     timeout=60)
 357
 358         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
 359         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
 360         self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
 361
 362         # Run the recovery procedure
 363         if False:
 364             with self.assertRaises(CommandFailedError):
 365                 # Normal reset should fail when no objects are present, we'll use --force instead
 366                 self.fs.journal_tool(["journal", "reset"], 0)
 367
 368         self.fs.journal_tool(["journal", "reset", "--force"], 0)
 369         self.fs.data_scan(["init"])
 370         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers)
 371         self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers)
 372
 373         # Mark the MDS repaired
 374         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 375
 376         # Start the MDS
 377         self.fs.mds_restart()
 378         self.fs.wait_for_daemons()
 379         log.info(str(self.mds_cluster.status()))
 380
 381         # Mount a client
 382         self.mount_a.mount()
 383         self.mount_a.wait_until_mounted()
 384
 385         # See that the files are present and correct
 386         errors = workload.validate()
 387         if errors:
 388             log.error("Validation errors found: {0}".format(len(errors)))
 389             for e in errors:
 390                 log.error(e.exception)
 391                 log.error(e.backtrace)
 392             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 393                 errors[0].exception, errors[0].backtrace
 394             ))
 395
 396     def test_rebuild_simple(self):
 397         self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
 398
 399     def test_rebuild_moved_file(self):
 400         self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
 401
 402     def test_rebuild_backtraceless(self):
 403         self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
 404
 405     def test_rebuild_moved_dir(self):
 406         self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
 407
 408     def test_rebuild_missing_zeroth(self):
 409         self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
 410
 411     def test_rebuild_nondefault_layout(self):
 412         self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
 413
 414     def test_stashed_layout(self):
 415         self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
 416
 417     def _dirfrag_keys(self, object_id):
 418         keys_str = self.fs.rados(["listomapkeys", object_id])
 419         if keys_str:
 420             return keys_str.split("\n")
 421         else:
 422             return []
 423
 424     def test_fragmented_injection(self):
 425         """
 426         That when injecting a dentry into a fragmented directory, we put it in the right fragment.
 427         """
 428
 429         file_count = 100
 430         file_names = ["%s" % n for n in range(0, file_count)]
 431
 432         # Create a directory of `file_count` files, each named after its
 433         # decimal number and containing the string of its decimal number
 434         self.mount_a.run_python(dedent("""
 435         import os
 436         path = os.path.join("{path}", "subdir")
 437         os.mkdir(path)
 438         for n in range(0, {file_count}):
 439             open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
 440         """.format(
 441             path=self.mount_a.mountpoint,
 442             file_count=file_count
 443         )))
 444
 445         dir_ino = self.mount_a.path_to_ino("subdir")
 446
 447         # Only one MDS should be active!
 448         self.assertEqual(len(self.fs.get_active_names()), 1)
 449
 450         # Ensure that one directory is fragmented
 451         mds_id = self.fs.get_active_names()[0]
 452         self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
 453
 454         # Flush journal and stop MDS
 455         self.mount_a.umount_wait()
 456         self.fs.mds_asok(["flush", "journal"], mds_id)
 457         self.fs.mds_stop()
 458         self.fs.mds_fail()
 459
 460         # Pick a dentry and wipe out its key
 461         # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
 462         frag_obj_id = "{0:x}.01000000".format(dir_ino)
 463         keys = self._dirfrag_keys(frag_obj_id)
 464         victim_key = keys[7]  # arbitrary choice
 465         log.info("victim_key={0}".format(victim_key))
 466         victim_dentry = victim_key.split("_head")[0]
 467         self.fs.rados(["rmomapkey", frag_obj_id, victim_key])
 468
 469         # Start filesystem back up, observe that the file appears to be gone in an `ls`
 470         self.fs.mds_restart()
 471         self.fs.wait_for_daemons()
 472         self.mount_a.mount()
 473         self.mount_a.wait_until_mounted()
 474         files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
 475         self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
 476
 477         # Stop the filesystem
 478         self.mount_a.umount_wait()
 479         self.fs.mds_stop()
 480         self.fs.mds_fail()
 481
 482         # Run data-scan, observe that it inserts our dentry back into the correct fragment
 483         # by checking the omap now has the dentry's key again
 484         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
 485         self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
 486         self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
 487
 488         # Start the filesystem and check that the dentry we deleted is now once again visible
 489         # and points to the correct file data.
 490         self.fs.mds_restart()
 491         self.fs.wait_for_daemons()
 492         self.mount_a.mount()
 493         self.mount_a.wait_until_mounted()
 494         out = self.mount_a.run_shell(["cat", "subdir/{0}".format(victim_dentry)]).stdout.getvalue().strip()
 495         self.assertEqual(out, victim_dentry)
 496
 497         # Finally, close the loop by checking our injected dentry survives a merge
 498         mds_id = self.fs.get_active_names()[0]
 499         self.mount_a.ls("subdir")  # Do an ls to ensure both frags are in cache so the merge will work
 500         self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
 501         self.fs.mds_asok(["flush", "journal"], mds_id)
 502         frag_obj_id = "{0:x}.00000000".format(dir_ino)
 503         keys = self._dirfrag_keys(frag_obj_id)
 504         self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
 505
 506     @for_teuthology
 507     def test_parallel_execution(self):
 508         self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
 509
 510     def test_pg_files(self):
 511         """
 512         That the pg files command tells us which files are associated with
 513         a particular PG
 514         """
 515         file_count = 20
 516         self.mount_a.run_shell(["mkdir", "mydir"])
 517         self.mount_a.create_n_files("mydir/myfile", file_count)
 518
 519         # Some files elsewhere in the system that we will ignore
 520         # to check that the tool is filtering properly
 521         self.mount_a.run_shell(["mkdir", "otherdir"])
 522         self.mount_a.create_n_files("otherdir/otherfile", file_count)
 523
 524         pgs_to_files = defaultdict(list)
 525         # Rough (slow) reimplementation of the logic
 526         for i in range(0, file_count):
 527             file_path = "mydir/myfile_{0}".format(i)
 528             ino = self.mount_a.path_to_ino(file_path)
 529             obj = "{0:x}.{1:08x}".format(ino, 0)
 530             pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
 531                 "osd", "map", self.fs.get_data_pool_name(), obj,
 532                 "--format=json-pretty"
 533             ))['pgid']
 534             pgs_to_files[pgid].append(file_path)
 535             log.info("{0}: {1}".format(file_path, pgid))
 536
 537         pg_count = self.fs.pgs_per_fs_pool
 538         for pg_n in range(0, pg_count):
 539             pg_str = "{0}.{1}".format(self.fs.get_data_pool_id(), pg_n)
 540             out = self.fs.data_scan(["pg_files", "mydir", pg_str])
 541             lines = [l for l in out.split("\n") if l]
 542             log.info("{0}: {1}".format(pg_str, lines))
 543             self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
 544
 545     def test_rebuild_linkage(self):
 546         """
 547         The scan_links command fixes linkage errors
 548         """
 549         self.mount_a.run_shell(["mkdir", "testdir1"])
 550         self.mount_a.run_shell(["mkdir", "testdir2"])
 551         dir1_ino = self.mount_a.path_to_ino("testdir1")
 552         dir2_ino = self.mount_a.path_to_ino("testdir2")
 553         dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
 554         dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
 555
 556         self.mount_a.run_shell(["touch", "testdir1/file1"])
 557         self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
 558         self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
 559
 560         mds_id = self.fs.get_active_names()[0]
 561         self.fs.mds_asok(["flush", "journal"], mds_id)
 562
 563         dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
 564
 565         # introduce duplicated primary link
 566         file1_key = "file1_head"
 567         self.assertIn(file1_key, dirfrag1_keys)
 568         file1_omap_data = self.fs.rados(["getomapval", dirfrag1_oid, file1_key, '-'])
 569         self.fs.rados(["setomapval", dirfrag2_oid, file1_key], stdin_data=file1_omap_data)
 570         self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
 571
 572         # remove a remote link, make inode link count incorrect
 573         link1_key = 'link1_head'
 574         self.assertIn(link1_key, dirfrag1_keys)
 575         self.fs.rados(["rmomapkey", dirfrag1_oid, link1_key])
 576
 577         # increase good primary link's version
 578         self.mount_a.run_shell(["touch", "testdir1/file1"])
 579         self.mount_a.umount_wait()
 580
 581         self.fs.mds_asok(["flush", "journal"], mds_id)
 582         self.fs.mds_stop()
 583         self.fs.mds_fail()
 584
 585         # repair linkage errors
 586         self.fs.data_scan(["scan_links"])
 587
 588         # primary link in testdir2 was deleted?
 589         self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
 590
 591         self.fs.mds_restart()
 592         self.fs.wait_for_daemons()
 593
 594         self.mount_a.mount()
 595         self.mount_a.wait_until_mounted()
 596
 597         # link count was adjusted?
 598         file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
 599         self.assertEqual(file1_nlink, 2)
 600
 601     def test_rebuild_inotable(self):
 602         """
 603         The scan_links command repair inotables
 604         """
 605         self.fs.set_max_mds(2)
 606         self.fs.wait_for_daemons()
 607
 608         active_mds_names = self.fs.get_active_names()
 609         mds0_id = active_mds_names[0]
 610         mds1_id = active_mds_names[1]
 611
 612         self.mount_a.run_shell(["mkdir", "dir1"])
 613         dir_ino = self.mount_a.path_to_ino("dir1")
 614         self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
 615         # wait for subtree migration
 616
 617         file_ino = 0;
 618         while True:
 619             time.sleep(1)
 620             # allocate an inode from mds.1
 621             self.mount_a.run_shell(["touch", "dir1/file1"])
 622             file_ino = self.mount_a.path_to_ino("dir1/file1")
 623             if file_ino >= (2 << 40):
 624                 break
 625             self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
 626
 627         self.mount_a.umount_wait()
 628
 629         self.fs.mds_asok(["flush", "journal"], mds0_id)
 630         self.fs.mds_asok(["flush", "journal"], mds1_id)
 631         self.mds_cluster.mds_stop()
 632
 633         self.fs.rados(["rm", "mds0_inotable"])
 634         self.fs.rados(["rm", "mds1_inotable"])
 635
 636         self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
 637
 638         mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
 639         self.assertGreaterEqual(
 640             mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
 641
 642         mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
 643         self.assertGreaterEqual(
 644             mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
 645
 646     def test_rebuild_snaptable(self):
 647         """
 648         The scan_links command repair snaptable
 649         """
 650         self.fs.set_allow_new_snaps(True)
 651
 652         self.mount_a.run_shell(["mkdir", "dir1"])
 653         self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"])
 654         self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"])
 655         self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"])
 656
 657         self.mount_a.umount_wait()
 658
 659         mds0_id = self.fs.get_active_names()[0]
 660         self.fs.mds_asok(["flush", "journal"], mds0_id)
 661
 662         # wait for mds to update removed snaps
 663         time.sleep(10)
 664
 665         old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
 666         # stamps may have minor difference
 667         for item in old_snaptable['snapserver']['snaps']:
 668             del item['stamp']
 669
 670         self.fs.rados(["rm", "mds_snaptable"])
 671         self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
 672
 673         new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
 674         for item in new_snaptable['snapserver']['snaps']:
 675             del item['stamp']
 676         self.assertGreaterEqual(
 677             new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap'])
 678         self.assertEqual(
 679             new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps'])