ceph/qa/tasks/cephfs/test_recovery_pool.py

   1 """
   2 Test our tools for recovering metadata from the data pool into an alternate pool
   3 """
   4
   5 import logging
   6 import traceback
   7 from collections import namedtuple
   8
   9 from teuthology.orchestra.run import CommandFailedError
  10 from tasks.cephfs.cephfs_test_case import CephFSTestCase
  11
  12 log = logging.getLogger(__name__)
  13
  14
  15 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  16
  17
  18 class OverlayWorkload(object):
  19     def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
  20         self._orig_fs = orig_fs
  21         self._recovery_fs = recovery_fs
  22         self._orig_mount = orig_mount
  23         self._recovery_mount = recovery_mount
  24         self._initial_state = None
  25
  26         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  27         # are rather verbose, but we only see them when something breaks, and they
  28         # let us see which check failed without having to decorate each check with
  29         # a string
  30         self._errors = []
  31
  32     def assert_equal(self, a, b):
  33         try:
  34             if a != b:
  35                 raise AssertionError("{0} != {1}".format(a, b))
  36         except AssertionError as e:
  37             self._errors.append(
  38                 ValidationError(e, traceback.format_exc(3))
  39             )
  40
  41     def write(self):
  42         """
  43         Write the workload files to the mount
  44         """
  45         raise NotImplementedError()
  46
  47     def validate(self):
  48         """
  49         Read from the mount and validate that the workload files are present (i.e. have
  50         survived or been reconstructed from the test scenario)
  51         """
  52         raise NotImplementedError()
  53
  54     def damage(self):
  55         """
  56         Damage the filesystem pools in ways that will be interesting to recover from.  By
  57         default just wipe everything in the metadata pool
  58         """
  59
  60         pool = self._orig_fs.get_metadata_pool_name()
  61         self._orig_fs.rados(["purge", pool, '--yes-i-really-really-mean-it'])
  62
  63     def flush(self):
  64         """
  65         Called after client unmount, after write: flush whatever you want
  66         """
  67         self._orig_fs.mds_asok(["flush", "journal"])
  68         self._recovery_fs.mds_asok(["flush", "journal"])
  69
  70
  71 class SimpleOverlayWorkload(OverlayWorkload):
  72     """
  73     Single file, single directory, check that it gets recovered and so does its size
  74     """
  75     def write(self):
  76         self._orig_mount.run_shell(["mkdir", "subdir"])
  77         self._orig_mount.write_n_mb("subdir/sixmegs", 6)
  78         self._initial_state = self._orig_mount.stat("subdir/sixmegs")
  79
  80     def validate(self):
  81         self._recovery_mount.run_shell(["ls", "subdir"])
  82         st = self._recovery_mount.stat("subdir/sixmegs")
  83         self.assert_equal(st['st_size'], self._initial_state['st_size'])
  84         return self._errors
  85
  86 class TestRecoveryPool(CephFSTestCase):
  87     MDSS_REQUIRED = 2
  88     CLIENTS_REQUIRED = 2
  89     REQUIRE_RECOVERY_FILESYSTEM = True
  90
  91     def is_marked_damaged(self, rank):
  92         mds_map = self.fs.get_mds_map()
  93         return rank in mds_map['damaged']
  94
  95     def _rebuild_metadata(self, workload, other_pool=None, workers=1):
  96         """
  97         That when all objects in metadata pool are removed, we can rebuild a metadata pool
  98         based on the contents of a data pool, and a client can see and read our files.
  99         """
 100
 101         # First, inject some files
 102
 103         workload.write()
 104
 105         # Unmount the client and flush the journal: the tool should also cope with
 106         # situations where there is dirty metadata, but we'll test that separately
 107         self.mount_a.umount_wait()
 108         self.mount_b.umount_wait()
 109         workload.flush()
 110
 111         # Create the alternate pool if requested
 112         recovery_fs = self.recovery_fs.name
 113         recovery_pool = self.recovery_fs.get_metadata_pool_name()
 114         self.recovery_fs.data_scan(['init', '--force-init',
 115                                     '--filesystem', recovery_fs,
 116                                     '--alternate-pool', recovery_pool])
 117         self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
 118         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
 119         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
 120         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
 121
 122         # Stop the MDS
 123         self.fs.mds_stop() # otherwise MDS will join once the fs is reset
 124         self.fs.fail()
 125
 126         # After recovery, we need the MDS to not be strict about stats (in production these options
 127         # are off by default, but in QA we need to explicitly disable them)
 128         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 129         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 130
 131         # Apply any data damage the workload wants
 132         workload.damage()
 133
 134         # Reset the MDS map in case multiple ranks were in play: recovery procedure
 135         # only understands how to rebuild metadata under rank 0
 136         self.fs.reset()
 137
 138         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
 139         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
 140         self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
 141
 142         # Run the recovery procedure
 143         if False:
 144             with self.assertRaises(CommandFailedError):
 145                 # Normal reset should fail when no objects are present, we'll use --force instead
 146                 self.fs.journal_tool(["journal", "reset"], 0)
 147
 148         self.fs.data_scan(['scan_extents', '--alternate-pool',
 149                            recovery_pool, '--filesystem', self.fs.name,
 150                            self.fs.get_data_pool_name()])
 151         self.fs.data_scan(['scan_inodes', '--alternate-pool',
 152                            recovery_pool, '--filesystem', self.fs.name,
 153                            '--force-corrupt', '--force-init',
 154                            self.fs.get_data_pool_name()])
 155         self.fs.journal_tool(['event', 'recover_dentries', 'list',
 156                               '--alternate-pool', recovery_pool], 0)
 157
 158         self.fs.data_scan(['init', '--force-init', '--filesystem',
 159                            self.fs.name])
 160         self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
 161                            '--force-corrupt', '--force-init',
 162                            self.fs.get_data_pool_name()])
 163         self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
 164
 165         self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
 166         self.fs.journal_tool(['journal', 'reset', '--force'], 0)
 167         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
 168                                             recovery_fs + ":0")
 169
 170         # Mark the MDS repaired
 171         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 172
 173         # Start the MDS
 174         self.fs.mds_restart()
 175         self.fs.set_joinable()
 176         self.recovery_fs.mds_restart()
 177         self.fs.wait_for_daemons()
 178         self.recovery_fs.wait_for_daemons()
 179         status = self.recovery_fs.status()
 180         for rank in self.recovery_fs.get_ranks(status=status):
 181             self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
 182                                                 'injectargs', '--debug-mds=20')
 183             self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
 184         log.info(str(self.mds_cluster.status()))
 185
 186         # Mount a client
 187         self.mount_a.mount_wait()
 188         self.mount_b.mount_wait(cephfs_name=recovery_fs)
 189
 190         # See that the files are present and correct
 191         errors = workload.validate()
 192         if errors:
 193             log.error("Validation errors found: {0}".format(len(errors)))
 194             for e in errors:
 195                 log.error(e.exception)
 196                 log.error(e.backtrace)
 197             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 198                 errors[0].exception, errors[0].backtrace
 199             ))
 200
 201     def test_rebuild_simple(self):
 202         self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
 203                                                      self.mount_a, self.mount_b))