ceph/qa/tasks/cephfs/test_scrub.py

   1 """
   2 Test CephFS scrub (distinct from OSD scrub) functionality
   3 """
   4 import logging
   5 import os
   6 import traceback
   7 from collections import namedtuple
   8
   9 from teuthology.orchestra.run import CommandFailedError
  10 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  11
  12 log = logging.getLogger(__name__)
  13
  14 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  15
  16
  17 class Workload(object):
  18     def __init__(self, filesystem, mount):
  19         self._mount = mount
  20         self._filesystem = filesystem
  21         self._initial_state = None
  22
  23         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  24         # are rather verbose, but we only see them when something breaks, and they
  25         # let us see which check failed without having to decorate each check with
  26         # a string
  27         self._errors = []
  28
  29     def assert_equal(self, a, b):
  30         try:
  31             if a != b:
  32                 raise AssertionError("{0} != {1}".format(a, b))
  33         except AssertionError as e:
  34             self._errors.append(
  35                 ValidationError(e, traceback.format_exc(3))
  36             )
  37
  38     def write(self):
  39         """
  40         Write the workload files to the mount
  41         """
  42         raise NotImplementedError()
  43
  44     def validate(self):
  45         """
  46         Read from the mount and validate that the workload files are present (i.e. have
  47         survived or been reconstructed from the test scenario)
  48         """
  49         raise NotImplementedError()
  50
  51     def damage(self):
  52         """
  53         Damage the filesystem pools in ways that will be interesting to recover from.  By
  54         default just wipe everything in the metadata pool
  55         """
  56         # Delete every object in the metadata pool
  57         objects = self._filesystem.rados(["ls"]).split("\n")
  58         for o in objects:
  59             self._filesystem.rados(["rm", o])
  60
  61     def flush(self):
  62         """
  63         Called after client unmount, after write: flush whatever you want
  64         """
  65         self._filesystem.mds_asok(["flush", "journal"])
  66
  67
  68 class BacktraceWorkload(Workload):
  69     """
  70     Single file, single directory, wipe the backtrace and check it.
  71     """
  72     def write(self):
  73         self._mount.run_shell(["mkdir", "subdir"])
  74         self._mount.write_n_mb("subdir/sixmegs", 6)
  75
  76     def validate(self):
  77         st = self._mount.stat("subdir/sixmegs")
  78         self._filesystem.mds_asok(["flush", "journal"])
  79         bt = self._filesystem.read_backtrace(st['st_ino'])
  80         parent = bt['ancestors'][0]['dname']
  81         self.assert_equal(parent, "sixmegs")
  82         return self._errors
  83
  84     def damage(self):
  85         st = self._mount.stat("subdir/sixmegs")
  86         self._filesystem.mds_asok(["flush", "journal"])
  87         self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
  88
  89
  90 class DupInodeWorkload(Workload):
  91     """
  92     Duplicate an inode and try scrubbing it twice."
  93     """
  94
  95     def write(self):
  96         self._mount.run_shell(["mkdir", "parent"])
  97         self._mount.run_shell(["mkdir", "parent/child"])
  98         self._mount.write_n_mb("parent/parentfile", 6)
  99         self._mount.write_n_mb("parent/child/childfile", 6)
 100
 101     def damage(self):
 102         temp_bin_path = "/tmp/10000000000.00000000_omap.bin"
 103         self._mount.umount()
 104         self._filesystem.mds_asok(["flush", "journal"])
 105         self._filesystem.mds_stop()
 106         self._filesystem.rados(["getomapval", "10000000000.00000000",
 107                                 "parentfile_head", temp_bin_path])
 108         self._filesystem.rados(["setomapval", "10000000000.00000000",
 109                                 "shadow_head"], stdin_file=temp_bin_path)
 110         self._filesystem.set_ceph_conf('mds', 'mds hack allow loading invalid metadata', True)
 111         self._filesystem.mds_restart()
 112         self._filesystem.wait_for_daemons()
 113
 114     def validate(self):
 115         self._filesystem.mds_asok(["scrub_path", "/", "recursive", "repair"])
 116         self.assert_equal(self._filesystem.are_daemons_healthy(), True)
 117         return self._errors
 118
 119
 120 class TestScrub(CephFSTestCase):
 121     MDSS_REQUIRED = 1
 122
 123     def _scrub(self, workload, workers=1):
 124         """
 125         That when all objects in metadata pool are removed, we can rebuild a metadata pool
 126         based on the contents of a data pool, and a client can see and read our files.
 127         """
 128
 129         # First, inject some files
 130
 131         workload.write()
 132
 133         # are off by default, but in QA we need to explicitly disable them)
 134         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 135         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 136
 137         # Apply any data damage the workload wants
 138         workload.damage()
 139
 140         self.fs.mds_asok(["scrub_path", "/", "recursive", "repair"])
 141
 142         # See that the files are present and correct
 143         errors = workload.validate()
 144         if errors:
 145             log.error("Validation errors found: {0}".format(len(errors)))
 146             for e in errors:
 147                 log.error(e.exception)
 148                 log.error(e.backtrace)
 149             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 150                 errors[0].exception, errors[0].backtrace
 151             ))
 152
 153     def test_scrub_backtrace(self):
 154         self._scrub(BacktraceWorkload(self.fs, self.mount_a))
 155
 156     def test_scrub_dup_inode(self):
 157         self._scrub(DupInodeWorkload(self.fs, self.mount_a))