ceph/qa/tasks/cephfs/test_forward_scrub.py

   1
   2 """
   3 Test that the forward scrub functionality can traverse metadata and apply
   4 requested tags, on well formed metadata.
   5
   6 This is *not* the real testing for forward scrub, which will need to test
   7 how the functionality responds to damaged metadata.
   8
   9 """
  10 import json
  11
  12 import logging
  13 import six
  14
  15 from collections import namedtuple
  16 from io import BytesIO
  17 from textwrap import dedent
  18
  19 from teuthology.orchestra.run import CommandFailedError
  20 from tasks.cephfs.cephfs_test_case import CephFSTestCase
  21
  22 import struct
  23
  24 log = logging.getLogger(__name__)
  25
  26
  27 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  28
  29
  30 class TestForwardScrub(CephFSTestCase):
  31     MDSS_REQUIRED = 1
  32
  33     def _read_str_xattr(self, pool, obj, attr):
  34         """
  35         Read a ceph-encoded string from a rados xattr
  36         """
  37         output = self.fs.rados(["getxattr", obj, attr], pool=pool,
  38                                stdout_data=BytesIO())
  39         strlen = struct.unpack('i', output[0:4])[0]
  40         return six.ensure_str(output[4:(4 + strlen)], encoding='ascii')
  41
  42     def _get_paths_to_ino(self):
  43         inos = {}
  44         p = self.mount_a.run_shell(["find", "./"])
  45         paths = p.stdout.getvalue().strip().split()
  46         for path in paths:
  47             inos[path] = self.mount_a.path_to_ino(path)
  48
  49         return inos
  50
  51     def test_apply_tag(self):
  52         self.mount_a.run_shell(["mkdir", "parentdir"])
  53         self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
  54         self.mount_a.run_shell(["touch", "rfile"])
  55         self.mount_a.run_shell(["touch", "parentdir/pfile"])
  56         self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
  57
  58         # Build a structure mapping path to inode, as we will later want
  59         # to check object by object and objects are named after ino number
  60         inos = self._get_paths_to_ino()
  61
  62         # Flush metadata: this is a friendly test of forward scrub so we're skipping
  63         # the part where it's meant to cope with dirty metadata
  64         self.mount_a.umount_wait()
  65         self.fs.mds_asok(["flush", "journal"])
  66
  67         tag = "mytag"
  68
  69         # Execute tagging forward scrub
  70         self.fs.mds_asok(["tag", "path", "/parentdir", tag])
  71         # Wait for completion
  72         import time
  73         time.sleep(10)
  74         # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
  75         # watch that instead
  76
  77         # Check that dirs were tagged
  78         for dirpath in ["./parentdir", "./parentdir/childdir"]:
  79             self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
  80
  81         # Check that files were tagged
  82         for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
  83             self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
  84
  85         # This guy wasn't in the tag path, shouldn't have been tagged
  86         self.assertUntagged(inos["./rfile"])
  87
  88     def assertUntagged(self, ino):
  89         file_obj_name = "{0:x}.00000000".format(ino)
  90         with self.assertRaises(CommandFailedError):
  91             self._read_str_xattr(
  92                 self.fs.get_data_pool_name(),
  93                 file_obj_name,
  94                 "scrub_tag"
  95             )
  96
  97     def assertTagged(self, ino, tag, pool):
  98         file_obj_name = "{0:x}.00000000".format(ino)
  99         wrote = self._read_str_xattr(
 100             pool,
 101             file_obj_name,
 102             "scrub_tag"
 103         )
 104         self.assertEqual(wrote, tag)
 105
 106     def _validate_linkage(self, expected):
 107         inos = self._get_paths_to_ino()
 108         try:
 109             self.assertDictEqual(inos, expected)
 110         except AssertionError:
 111             log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
 112             log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
 113             raise
 114
 115     def test_orphan_scan(self):
 116         # Create some files whose metadata we will flush
 117         self.mount_a.run_python(dedent("""
 118             import os
 119             mount_point = "{mount_point}"
 120             parent = os.path.join(mount_point, "parent")
 121             os.mkdir(parent)
 122             flushed = os.path.join(parent, "flushed")
 123             os.mkdir(flushed)
 124             for f in ["alpha", "bravo", "charlie"]:
 125                 open(os.path.join(flushed, f), 'w').write(f)
 126         """.format(mount_point=self.mount_a.mountpoint)))
 127
 128         inos = self._get_paths_to_ino()
 129
 130         # Flush journal
 131         # Umount before flush to avoid cap releases putting
 132         # things we don't want in the journal later.
 133         self.mount_a.umount_wait()
 134         self.fs.mds_asok(["flush", "journal"])
 135
 136         # Create a new inode that's just in the log, i.e. would
 137         # look orphaned to backward scan if backward scan wisnae
 138         # respectin' tha scrub_tag xattr.
 139         self.mount_a.mount_wait()
 140         self.mount_a.run_shell(["mkdir", "parent/unflushed"])
 141         self.mount_a.run_shell(["dd", "if=/dev/urandom",
 142                                 "of=./parent/unflushed/jfile",
 143                                 "bs=1M", "count=8"])
 144         inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
 145         inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
 146         self.mount_a.umount_wait()
 147
 148         # Orphan an inode by deleting its dentry
 149         # Our victim will be.... bravo.
 150         self.mount_a.umount_wait()
 151         self.fs.mds_stop()
 152         self.fs.mds_fail()
 153         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 154         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 155         frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
 156         self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
 157
 158         self.fs.mds_restart()
 159         self.fs.wait_for_daemons()
 160
 161         # See that the orphaned file is indeed missing from a client's POV
 162         self.mount_a.mount_wait()
 163         damaged_state = self._get_paths_to_ino()
 164         self.assertNotIn("./parent/flushed/bravo", damaged_state)
 165         self.mount_a.umount_wait()
 166
 167         # Run a tagging forward scrub
 168         tag = "mytag123"
 169         self.fs.mds_asok(["tag", "path", "/parent", tag])
 170
 171         # See that the orphan wisnae tagged
 172         self.assertUntagged(inos['./parent/flushed/bravo'])
 173
 174         # See that the flushed-metadata-and-still-present files are tagged
 175         self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
 176         self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
 177
 178         # See that journalled-but-not-flushed file *was* tagged
 179         self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
 180
 181         # Run cephfs-data-scan targeting only orphans
 182         self.fs.mds_stop()
 183         self.fs.mds_fail()
 184         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
 185         self.fs.data_scan([
 186             "scan_inodes",
 187             "--filter-tag", tag,
 188             self.fs.get_data_pool_name()
 189         ])
 190
 191         # After in-place injection stats should be kosher again
 192         self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
 193         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
 194
 195         # And we should have all the same linkage we started with,
 196         # and no lost+found, and no extra inodes!
 197         self.fs.mds_restart()
 198         self.fs.wait_for_daemons()
 199         self.mount_a.mount_wait()
 200         self._validate_linkage(inos)
 201
 202     def _stash_inotable(self):
 203         # Get all active ranks
 204         ranks = self.fs.get_all_mds_rank()
 205
 206         inotable_dict = {}
 207         for rank in ranks:
 208             inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
 209             print("Trying to fetch inotable object: " + inotable_oid)
 210
 211             #self.fs.get_metadata_object("InoTable", "mds0_inotable")
 212             inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
 213             inotable_dict[inotable_oid] = inotable_raw
 214         return inotable_dict
 215
 216     def test_inotable_sync(self):
 217         self.mount_a.write_n_mb("file1_sixmegs", 6)
 218
 219         # Flush journal
 220         self.mount_a.umount_wait()
 221         self.fs.mds_asok(["flush", "journal"])
 222
 223         inotable_copy = self._stash_inotable()
 224
 225         self.mount_a.mount_wait()
 226
 227         self.mount_a.write_n_mb("file2_sixmegs", 6)
 228         self.mount_a.write_n_mb("file3_sixmegs", 6)
 229
 230         inos = self._get_paths_to_ino()
 231
 232         # Flush journal
 233         self.mount_a.umount_wait()
 234         self.fs.mds_asok(["flush", "journal"])
 235
 236         self.mount_a.umount_wait()
 237
 238         with self.assert_cluster_log("inode table repaired", invert_match=True):
 239             out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
 240             self.assertNotEqual(out_json, None)
 241
 242         self.mds_cluster.mds_stop()
 243         self.mds_cluster.mds_fail()
 244
 245         # Truncate the journal (to ensure the inotable on disk
 246         # is all that will be in the InoTable in memory)
 247
 248         self.fs.journal_tool(["event", "splice",
 249                               "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
 250
 251         self.fs.journal_tool(["event", "splice",
 252                               "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
 253
 254         # Revert to old inotable.
 255         for key, value in inotable_copy.items():
 256            self.fs.put_metadata_object_raw(key, value)
 257
 258         self.mds_cluster.mds_restart()
 259         self.fs.wait_for_daemons()
 260
 261         with self.assert_cluster_log("inode table repaired"):
 262             out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
 263             self.assertNotEqual(out_json, None)
 264
 265         self.mds_cluster.mds_stop()
 266         table_text = self.fs.table_tool(["0", "show", "inode"])
 267         table = json.loads(table_text)
 268         self.assertGreater(
 269                 table['0']['data']['inotable']['free'][0]['start'],
 270                 inos['./file3_sixmegs'])
 271
 272     def test_backtrace_repair(self):
 273         """
 274         That the MDS can repair an inodes backtrace in the data pool
 275         if it is found to be damaged.
 276         """
 277         # Create a file for subsequent checks
 278         self.mount_a.run_shell(["mkdir", "parent_a"])
 279         self.mount_a.run_shell(["touch", "parent_a/alpha"])
 280         file_ino = self.mount_a.path_to_ino("parent_a/alpha")
 281
 282         # That backtrace and layout are written after initial flush
 283         self.fs.mds_asok(["flush", "journal"])
 284         backtrace = self.fs.read_backtrace(file_ino)
 285         self.assertEqual(['alpha', 'parent_a'],
 286                          [a['dname'] for a in backtrace['ancestors']])
 287
 288         # Go corrupt the backtrace
 289         self.fs._write_data_xattr(file_ino, "parent",
 290                                   "oh i'm sorry did i overwrite your xattr?")
 291
 292         with self.assert_cluster_log("bad backtrace on inode"):
 293             out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
 294             self.assertNotEqual(out_json, None)
 295         self.fs.mds_asok(["flush", "journal"])
 296         backtrace = self.fs.read_backtrace(file_ino)
 297         self.assertEqual(['alpha', 'parent_a'],
 298                          [a['dname'] for a in backtrace['ancestors']])