ceph/qa/tasks/cephfs/test_recovery_pool.py

   1
   2 """
   3 Test our tools for recovering metadata from the data pool into an alternate pool
   4 """
   5 import json
   6
   7 import logging
   8 import os
   9 from textwrap import dedent
  10 import traceback
  11 from collections import namedtuple, defaultdict
  12
  13 from teuthology.orchestra.run import CommandFailedError
  14 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  15
  16 log = logging.getLogger(__name__)
  17
  18
  19 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  20
  21
  22 class OverlayWorkload(object):
  23     def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
  24         self._orig_fs = orig_fs
  25         self._recovery_fs = recovery_fs
  26         self._orig_mount = orig_mount
  27         self._recovery_mount = recovery_mount
  28         self._initial_state = None
  29
  30         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  31         # are rather verbose, but we only see them when something breaks, and they
  32         # let us see which check failed without having to decorate each check with
  33         # a string
  34         self._errors = []
  35
  36     def assert_equal(self, a, b):
  37         try:
  38             if a != b:
  39                 raise AssertionError("{0} != {1}".format(a, b))
  40         except AssertionError as e:
  41             self._errors.append(
  42                 ValidationError(e, traceback.format_exc(3))
  43             )
  44
  45     def write(self):
  46         """
  47         Write the workload files to the mount
  48         """
  49         raise NotImplementedError()
  50
  51     def validate(self):
  52         """
  53         Read from the mount and validate that the workload files are present (i.e. have
  54         survived or been reconstructed from the test scenario)
  55         """
  56         raise NotImplementedError()
  57
  58     def damage(self):
  59         """
  60         Damage the filesystem pools in ways that will be interesting to recover from.  By
  61         default just wipe everything in the metadata pool
  62         """
  63         # Delete every object in the metadata pool
  64         objects = self._orig_fs.rados(["ls"]).split("\n")
  65         for o in objects:
  66             self._orig_fs.rados(["rm", o])
  67
  68     def flush(self):
  69         """
  70         Called after client unmount, after write: flush whatever you want
  71         """
  72         self._orig_fs.mds_asok(["flush", "journal"])
  73         self._recovery_fs.mds_asok(["flush", "journal"])
  74
  75
  76 class SimpleOverlayWorkload(OverlayWorkload):
  77     """
  78     Single file, single directory, check that it gets recovered and so does its size
  79     """
  80     def write(self):
  81         self._orig_mount.run_shell(["mkdir", "subdir"])
  82         self._orig_mount.write_n_mb("subdir/sixmegs", 6)
  83         self._initial_state = self._orig_mount.stat("subdir/sixmegs")
  84
  85     def validate(self):
  86         self._recovery_mount.run_shell(["ls", "subdir"])
  87         st = self._recovery_mount.stat("subdir/sixmegs")
  88         self.assert_equal(st['st_size'], self._initial_state['st_size'])
  89         return self._errors
  90
  91 class TestRecoveryPool(CephFSTestCase):
  92     MDSS_REQUIRED = 2
  93     CLIENTS_REQUIRED = 2
  94     REQUIRE_RECOVERY_FILESYSTEM = True
  95
  96     def is_marked_damaged(self, rank):
  97         mds_map = self.fs.get_mds_map()
  98         return rank in mds_map['damaged']
  99
 100     def _rebuild_metadata(self, workload, other_pool=None, workers=1):
 101         """
 102         That when all objects in metadata pool are removed, we can rebuild a metadata pool
 103         based on the contents of a data pool, and a client can see and read our files.
 104         """
 105
 106         # First, inject some files
 107
 108         workload.write()
 109
 110         # Unmount the client and flush the journal: the tool should also cope with
 111         # situations where there is dirty metadata, but we'll test that separately
 112         self.mount_a.umount_wait()
 113         self.mount_b.umount_wait()
 114         workload.flush()
 115
 116         # Create the alternate pool if requested
 117         recovery_fs = self.recovery_fs.name
 118         recovery_pool = self.recovery_fs.get_metadata_pool_name()
 119         self.recovery_fs.data_scan(['init', '--force-init',
 120                                     '--filesystem', recovery_fs,
 121                                     '--alternate-pool', recovery_pool])
 122         self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
 123         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
 124         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
 125         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
 126
 127         # Stop the MDS
 128         self.fs.mds_stop()
 129         self.fs.mds_fail()
 130
 131         # After recovery, we need the MDS to not be strict about stats (in production these options
 132         # are off by default, but in QA we need to explicitly disable them)
 133         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 134         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 135
 136         # Apply any data damage the workload wants
 137         workload.damage()
 138
 139         # Reset the MDS map in case multiple ranks were in play: recovery procedure
 140         # only understands how to rebuild metadata under rank 0
 141         self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
 142                 '--yes-i-really-mean-it')
 143
 144         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
 145         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
 146         self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
 147
 148         # Run the recovery procedure
 149         if False:
 150             with self.assertRaises(CommandFailedError):
 151                 # Normal reset should fail when no objects are present, we'll use --force instead
 152                 self.fs.journal_tool(["journal", "reset"], 0)
 153
 154         self.fs.mds_stop()
 155         self.fs.data_scan(['scan_extents', '--alternate-pool',
 156                            recovery_pool, '--filesystem', self.fs.name,
 157                            self.fs.get_data_pool_name()])
 158         self.fs.data_scan(['scan_inodes', '--alternate-pool',
 159                            recovery_pool, '--filesystem', self.fs.name,
 160                            '--force-corrupt', '--force-init',
 161                            self.fs.get_data_pool_name()])
 162         self.fs.journal_tool(['event', 'recover_dentries', 'list',
 163                               '--alternate-pool', recovery_pool], 0)
 164
 165         self.fs.data_scan(['init', '--force-init', '--filesystem',
 166                            self.fs.name])
 167         self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
 168                            '--force-corrupt', '--force-init',
 169                            self.fs.get_data_pool_name()])
 170         self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
 171
 172         self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
 173         self.fs.journal_tool(['journal', 'reset', '--force'], 0)
 174         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
 175                                             recovery_fs + ":0")
 176
 177         # Mark the MDS repaired
 178         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 179
 180         # Start the MDS
 181         self.fs.mds_restart()
 182         self.recovery_fs.mds_restart()
 183         self.fs.wait_for_daemons()
 184         self.recovery_fs.wait_for_daemons()
 185         status = self.recovery_fs.status()
 186         for rank in self.recovery_fs.get_ranks(status=status):
 187             self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
 188                                                 'injectargs', '--debug-mds=20')
 189             self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
 190         log.info(str(self.mds_cluster.status()))
 191
 192         # Mount a client
 193         self.mount_a.mount()
 194         self.mount_b.mount(mount_fs_name=recovery_fs)
 195         self.mount_a.wait_until_mounted()
 196         self.mount_b.wait_until_mounted()
 197
 198         # See that the files are present and correct
 199         errors = workload.validate()
 200         if errors:
 201             log.error("Validation errors found: {0}".format(len(errors)))
 202             for e in errors:
 203                 log.error(e.exception)
 204                 log.error(e.backtrace)
 205             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 206                 errors[0].exception, errors[0].backtrace
 207             ))
 208
 209     def test_rebuild_simple(self):
 210         self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
 211                                                      self.mount_a, self.mount_b))