ceph/qa/tasks/cephfs/test_recovery_pool.py

   1 """
   2 Test our tools for recovering metadata from the data pool into an alternate pool
   3 """
   4
   5 import logging
   6 import traceback
   7 from collections import namedtuple
   8
   9 from teuthology.orchestra.run import CommandFailedError
  10 from tasks.cephfs.cephfs_test_case import CephFSTestCase
  11
  12 log = logging.getLogger(__name__)
  13
  14
  15 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  16
  17
  18 class OverlayWorkload(object):
  19     def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
  20         self._orig_fs = orig_fs
  21         self._recovery_fs = recovery_fs
  22         self._orig_mount = orig_mount
  23         self._recovery_mount = recovery_mount
  24         self._initial_state = None
  25
  26         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  27         # are rather verbose, but we only see them when something breaks, and they
  28         # let us see which check failed without having to decorate each check with
  29         # a string
  30         self._errors = []
  31
  32     def assert_equal(self, a, b):
  33         try:
  34             if a != b:
  35                 raise AssertionError("{0} != {1}".format(a, b))
  36         except AssertionError as e:
  37             self._errors.append(
  38                 ValidationError(e, traceback.format_exc(3))
  39             )
  40
  41     def write(self):
  42         """
  43         Write the workload files to the mount
  44         """
  45         raise NotImplementedError()
  46
  47     def validate(self):
  48         """
  49         Read from the mount and validate that the workload files are present (i.e. have
  50         survived or been reconstructed from the test scenario)
  51         """
  52         raise NotImplementedError()
  53
  54     def damage(self):
  55         """
  56         Damage the filesystem pools in ways that will be interesting to recover from.  By
  57         default just wipe everything in the metadata pool
  58         """
  59         # Delete every object in the metadata pool
  60         objects = self._orig_fs.rados(["ls"]).split("\n")
  61         for o in objects:
  62             self._orig_fs.rados(["rm", o])
  63
  64     def flush(self):
  65         """
  66         Called after client unmount, after write: flush whatever you want
  67         """
  68         self._orig_fs.mds_asok(["flush", "journal"])
  69         self._recovery_fs.mds_asok(["flush", "journal"])
  70
  71
  72 class SimpleOverlayWorkload(OverlayWorkload):
  73     """
  74     Single file, single directory, check that it gets recovered and so does its size
  75     """
  76     def write(self):
  77         self._orig_mount.run_shell(["mkdir", "subdir"])
  78         self._orig_mount.write_n_mb("subdir/sixmegs", 6)
  79         self._initial_state = self._orig_mount.stat("subdir/sixmegs")
  80
  81     def validate(self):
  82         self._recovery_mount.run_shell(["ls", "subdir"])
  83         st = self._recovery_mount.stat("subdir/sixmegs")
  84         self.assert_equal(st['st_size'], self._initial_state['st_size'])
  85         return self._errors
  86
  87 class TestRecoveryPool(CephFSTestCase):
  88     MDSS_REQUIRED = 2
  89     CLIENTS_REQUIRED = 2
  90     REQUIRE_RECOVERY_FILESYSTEM = True
  91
  92     def is_marked_damaged(self, rank):
  93         mds_map = self.fs.get_mds_map()
  94         return rank in mds_map['damaged']
  95
  96     def _rebuild_metadata(self, workload, other_pool=None, workers=1):
  97         """
  98         That when all objects in metadata pool are removed, we can rebuild a metadata pool
  99         based on the contents of a data pool, and a client can see and read our files.
 100         """
 101
 102         # First, inject some files
 103
 104         workload.write()
 105
 106         # Unmount the client and flush the journal: the tool should also cope with
 107         # situations where there is dirty metadata, but we'll test that separately
 108         self.mount_a.umount_wait()
 109         self.mount_b.umount_wait()
 110         workload.flush()
 111
 112         # Create the alternate pool if requested
 113         recovery_fs = self.recovery_fs.name
 114         recovery_pool = self.recovery_fs.get_metadata_pool_name()
 115         self.recovery_fs.data_scan(['init', '--force-init',
 116                                     '--filesystem', recovery_fs,
 117                                     '--alternate-pool', recovery_pool])
 118         self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
 119         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
 120         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
 121         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
 122
 123         # Stop the MDS
 124         self.fs.mds_stop()
 125         self.fs.mds_fail()
 126
 127         # After recovery, we need the MDS to not be strict about stats (in production these options
 128         # are off by default, but in QA we need to explicitly disable them)
 129         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 130         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 131
 132         # Apply any data damage the workload wants
 133         workload.damage()
 134
 135         # Reset the MDS map in case multiple ranks were in play: recovery procedure
 136         # only understands how to rebuild metadata under rank 0
 137         self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
 138                 '--yes-i-really-mean-it')
 139
 140         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
 141         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
 142         self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
 143
 144         # Run the recovery procedure
 145         if False:
 146             with self.assertRaises(CommandFailedError):
 147                 # Normal reset should fail when no objects are present, we'll use --force instead
 148                 self.fs.journal_tool(["journal", "reset"], 0)
 149
 150         self.fs.mds_stop()
 151         self.fs.data_scan(['scan_extents', '--alternate-pool',
 152                            recovery_pool, '--filesystem', self.fs.name,
 153                            self.fs.get_data_pool_name()])
 154         self.fs.data_scan(['scan_inodes', '--alternate-pool',
 155                            recovery_pool, '--filesystem', self.fs.name,
 156                            '--force-corrupt', '--force-init',
 157                            self.fs.get_data_pool_name()])
 158         self.fs.journal_tool(['event', 'recover_dentries', 'list',
 159                               '--alternate-pool', recovery_pool], 0)
 160
 161         self.fs.data_scan(['init', '--force-init', '--filesystem',
 162                            self.fs.name])
 163         self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
 164                            '--force-corrupt', '--force-init',
 165                            self.fs.get_data_pool_name()])
 166         self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
 167
 168         self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
 169         self.fs.journal_tool(['journal', 'reset', '--force'], 0)
 170         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
 171                                             recovery_fs + ":0")
 172
 173         # Mark the MDS repaired
 174         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 175
 176         # Start the MDS
 177         self.fs.mds_restart()
 178         self.recovery_fs.mds_restart()
 179         self.fs.wait_for_daemons()
 180         self.recovery_fs.wait_for_daemons()
 181         status = self.recovery_fs.status()
 182         for rank in self.recovery_fs.get_ranks(status=status):
 183             self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
 184                                                 'injectargs', '--debug-mds=20')
 185             self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
 186         log.info(str(self.mds_cluster.status()))
 187
 188         # Mount a client
 189         self.mount_a.mount()
 190         self.mount_b.mount(mount_fs_name=recovery_fs)
 191         self.mount_a.wait_until_mounted()
 192         self.mount_b.wait_until_mounted()
 193
 194         # See that the files are present and correct
 195         errors = workload.validate()
 196         if errors:
 197             log.error("Validation errors found: {0}".format(len(errors)))
 198             for e in errors:
 199                 log.error(e.exception)
 200                 log.error(e.backtrace)
 201             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 202                 errors[0].exception, errors[0].backtrace
 203             ))
 204
 205     def test_rebuild_simple(self):
 206         self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
 207                                                      self.mount_a, self.mount_b))