ceph/qa/tasks/cephfs/test_forward_scrub.py

   1
   2 """
   3 Test that the forward scrub functionality can traverse metadata and apply
   4 requested tags, on well formed metadata.
   5
   6 This is *not* the real testing for forward scrub, which will need to test
   7 how the functionality responds to damaged metadata.
   8
   9 """
  10 import logging
  11 import json
  12
  13 from collections import namedtuple
  14 from io import BytesIO
  15 from textwrap import dedent
  16
  17 from teuthology.exceptions import CommandFailedError
  18 from tasks.cephfs.cephfs_test_case import CephFSTestCase
  19
  20 import struct
  21
  22 log = logging.getLogger(__name__)
  23
  24
  25 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  26
  27
  28 class TestForwardScrub(CephFSTestCase):
  29     MDSS_REQUIRED = 1
  30
  31     def _read_str_xattr(self, pool, obj, attr):
  32         """
  33         Read a ceph-encoded string from a rados xattr
  34         """
  35         output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
  36                                stdout=BytesIO()).stdout.getvalue()
  37         strlen = struct.unpack('i', output[0:4])[0]
  38         return output[4:(4 + strlen)].decode(encoding='ascii')
  39
  40     def _get_paths_to_ino(self):
  41         inos = {}
  42         p = self.mount_a.run_shell(["find", "./"])
  43         paths = p.stdout.getvalue().strip().split()
  44         for path in paths:
  45             inos[path] = self.mount_a.path_to_ino(path)
  46
  47         return inos
  48
  49     def test_apply_tag(self):
  50         self.mount_a.run_shell(["mkdir", "parentdir"])
  51         self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
  52         self.mount_a.run_shell(["touch", "rfile"])
  53         self.mount_a.run_shell(["touch", "parentdir/pfile"])
  54         self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
  55
  56         # Build a structure mapping path to inode, as we will later want
  57         # to check object by object and objects are named after ino number
  58         inos = self._get_paths_to_ino()
  59
  60         # Flush metadata: this is a friendly test of forward scrub so we're skipping
  61         # the part where it's meant to cope with dirty metadata
  62         self.mount_a.umount_wait()
  63         self.fs.mds_asok(["flush", "journal"])
  64
  65         tag = "mytag"
  66
  67         # Execute tagging forward scrub
  68         self.fs.mds_asok(["tag", "path", "/parentdir", tag])
  69         # Wait for completion
  70         import time
  71         time.sleep(10)
  72         # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
  73         # watch that instead
  74
  75         # Check that dirs were tagged
  76         for dirpath in ["./parentdir", "./parentdir/childdir"]:
  77             self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
  78
  79         # Check that files were tagged
  80         for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
  81             self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
  82
  83         # This guy wasn't in the tag path, shouldn't have been tagged
  84         self.assertUntagged(inos["./rfile"])
  85
  86     def assertUntagged(self, ino):
  87         file_obj_name = "{0:x}.00000000".format(ino)
  88         with self.assertRaises(CommandFailedError):
  89             self._read_str_xattr(
  90                 self.fs.get_data_pool_name(),
  91                 file_obj_name,
  92                 "scrub_tag"
  93             )
  94
  95     def assertTagged(self, ino, tag, pool):
  96         file_obj_name = "{0:x}.00000000".format(ino)
  97         wrote = self._read_str_xattr(
  98             pool,
  99             file_obj_name,
 100             "scrub_tag"
 101         )
 102         self.assertEqual(wrote, tag)
 103
 104     def _validate_linkage(self, expected):
 105         inos = self._get_paths_to_ino()
 106         try:
 107             self.assertDictEqual(inos, expected)
 108         except AssertionError:
 109             log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
 110             log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
 111             raise
 112
 113     def test_orphan_scan(self):
 114         # Create some files whose metadata we will flush
 115         self.mount_a.run_python(dedent("""
 116             import os
 117             mount_point = "{mount_point}"
 118             parent = os.path.join(mount_point, "parent")
 119             os.mkdir(parent)
 120             flushed = os.path.join(parent, "flushed")
 121             os.mkdir(flushed)
 122             for f in ["alpha", "bravo", "charlie"]:
 123                 open(os.path.join(flushed, f), 'w').write(f)
 124         """.format(mount_point=self.mount_a.mountpoint)))
 125
 126         inos = self._get_paths_to_ino()
 127
 128         # Flush journal
 129         # Umount before flush to avoid cap releases putting
 130         # things we don't want in the journal later.
 131         self.mount_a.umount_wait()
 132         self.fs.mds_asok(["flush", "journal"])
 133
 134         # Create a new inode that's just in the log, i.e. would
 135         # look orphaned to backward scan if backward scan wisnae
 136         # respectin' tha scrub_tag xattr.
 137         self.mount_a.mount_wait()
 138         self.mount_a.run_shell(["mkdir", "parent/unflushed"])
 139         self.mount_a.run_shell(["dd", "if=/dev/urandom",
 140                                 "of=./parent/unflushed/jfile",
 141                                 "bs=1M", "count=8"])
 142         inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
 143         inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
 144         self.mount_a.umount_wait()
 145
 146         # Orphan an inode by deleting its dentry
 147         # Our victim will be.... bravo.
 148         self.mount_a.umount_wait()
 149         self.fs.fail()
 150         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 151         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 152         frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
 153         self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
 154
 155         self.fs.set_joinable()
 156         self.fs.wait_for_daemons()
 157
 158         # See that the orphaned file is indeed missing from a client's POV
 159         self.mount_a.mount_wait()
 160         damaged_state = self._get_paths_to_ino()
 161         self.assertNotIn("./parent/flushed/bravo", damaged_state)
 162         self.mount_a.umount_wait()
 163
 164         # Run a tagging forward scrub
 165         tag = "mytag123"
 166         self.fs.mds_asok(["tag", "path", "/parent", tag])
 167
 168         # See that the orphan wisnae tagged
 169         self.assertUntagged(inos['./parent/flushed/bravo'])
 170
 171         # See that the flushed-metadata-and-still-present files are tagged
 172         self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
 173         self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
 174
 175         # See that journalled-but-not-flushed file *was* tagged
 176         self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
 177
 178         # Run cephfs-data-scan targeting only orphans
 179         self.fs.fail()
 180         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
 181         self.fs.data_scan([
 182             "scan_inodes",
 183             "--filter-tag", tag,
 184             self.fs.get_data_pool_name()
 185         ])
 186
 187         # After in-place injection stats should be kosher again
 188         self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
 189         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
 190
 191         # And we should have all the same linkage we started with,
 192         # and no lost+found, and no extra inodes!
 193         self.fs.set_joinable()
 194         self.fs.wait_for_daemons()
 195         self.mount_a.mount_wait()
 196         self._validate_linkage(inos)
 197
 198     def _stash_inotable(self):
 199         # Get all active ranks
 200         ranks = self.fs.get_all_mds_rank()
 201
 202         inotable_dict = {}
 203         for rank in ranks:
 204             inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
 205             print("Trying to fetch inotable object: " + inotable_oid)
 206
 207             #self.fs.get_metadata_object("InoTable", "mds0_inotable")
 208             inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
 209             inotable_dict[inotable_oid] = inotable_raw
 210         return inotable_dict
 211
 212     def test_inotable_sync(self):
 213         self.mount_a.write_n_mb("file1_sixmegs", 6)
 214
 215         # Flush journal
 216         self.mount_a.umount_wait()
 217         self.fs.mds_asok(["flush", "journal"])
 218
 219         inotable_copy = self._stash_inotable()
 220
 221         self.mount_a.mount_wait()
 222
 223         self.mount_a.write_n_mb("file2_sixmegs", 6)
 224         self.mount_a.write_n_mb("file3_sixmegs", 6)
 225
 226         inos = self._get_paths_to_ino()
 227
 228         # Flush journal
 229         self.mount_a.umount_wait()
 230         self.fs.mds_asok(["flush", "journal"])
 231
 232         self.mount_a.umount_wait()
 233
 234         with self.assert_cluster_log("inode table repaired", invert_match=True):
 235             out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
 236             self.assertNotEqual(out_json, None)
 237             self.assertEqual(out_json["return_code"], 0)
 238             self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
 239
 240         self.fs.fail()
 241
 242         # Truncate the journal (to ensure the inotable on disk
 243         # is all that will be in the InoTable in memory)
 244
 245         self.fs.journal_tool(["event", "splice",
 246                               "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
 247
 248         self.fs.journal_tool(["event", "splice",
 249                               "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
 250
 251         # Revert to old inotable.
 252         for key, value in inotable_copy.items():
 253             self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
 254
 255         self.fs.set_joinable()
 256         self.fs.wait_for_daemons()
 257
 258         with self.assert_cluster_log("inode table repaired"):
 259             out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
 260             self.assertNotEqual(out_json, None)
 261             self.assertEqual(out_json["return_code"], 0)
 262             self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
 263
 264         self.fs.fail()
 265         table_text = self.fs.table_tool(["0", "show", "inode"])
 266         table = json.loads(table_text)
 267         self.assertGreater(
 268                 table['0']['data']['inotable']['free'][0]['start'],
 269                 inos['./file3_sixmegs'])
 270
 271     def test_backtrace_repair(self):
 272         """
 273         That the MDS can repair an inodes backtrace in the data pool
 274         if it is found to be damaged.
 275         """
 276         # Create a file for subsequent checks
 277         self.mount_a.run_shell(["mkdir", "parent_a"])
 278         self.mount_a.run_shell(["touch", "parent_a/alpha"])
 279         file_ino = self.mount_a.path_to_ino("parent_a/alpha")
 280
 281         # That backtrace and layout are written after initial flush
 282         self.fs.mds_asok(["flush", "journal"])
 283         backtrace = self.fs.read_backtrace(file_ino)
 284         self.assertEqual(['alpha', 'parent_a'],
 285                          [a['dname'] for a in backtrace['ancestors']])
 286
 287         # Go corrupt the backtrace
 288         self.fs._write_data_xattr(file_ino, "parent",
 289                                   "oh i'm sorry did i overwrite your xattr?")
 290
 291         with self.assert_cluster_log("bad backtrace on inode"):
 292             out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
 293             self.assertNotEqual(out_json, None)
 294             self.assertEqual(out_json["return_code"], 0)
 295             self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
 296
 297         self.fs.mds_asok(["flush", "journal"])
 298         backtrace = self.fs.read_backtrace(file_ino)
 299         self.assertEqual(['alpha', 'parent_a'],
 300                          [a['dname'] for a in backtrace['ancestors']])