3 Test our tools for recovering metadata from the data pool into an alternate pool
9 from textwrap
import dedent
11 from collections
import namedtuple
, defaultdict
13 from teuthology
.orchestra
.run
import CommandFailedError
14 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
, for_teuthology
16 log
= logging
.getLogger(__name__
)
19 ValidationError
= namedtuple("ValidationError", ["exception", "backtrace"])
22 class OverlayWorkload(object):
23 def __init__(self
, orig_fs
, recovery_fs
, orig_mount
, recovery_mount
):
24 self
._orig
_fs
= orig_fs
25 self
._recovery
_fs
= recovery_fs
26 self
._orig
_mount
= orig_mount
27 self
._recovery
_mount
= recovery_mount
28 self
._initial
_state
= None
30 # Accumulate backtraces for every failed validation, and return them. Backtraces
31 # are rather verbose, but we only see them when something breaks, and they
32 # let us see which check failed without having to decorate each check with
36 def assert_equal(self
, a
, b
):
39 raise AssertionError("{0} != {1}".format(a
, b
))
40 except AssertionError as e
:
42 ValidationError(e
, traceback
.format_exc(3))
47 Write the workload files to the mount
49 raise NotImplementedError()
53 Read from the mount and validate that the workload files are present (i.e. have
54 survived or been reconstructed from the test scenario)
56 raise NotImplementedError()
60 Damage the filesystem pools in ways that will be interesting to recover from. By
61 default just wipe everything in the metadata pool
63 # Delete every object in the metadata pool
64 objects
= self
._orig
_fs
.rados(["ls"]).split("\n")
66 self
._orig
_fs
.rados(["rm", o
])
70 Called after client unmount, after write: flush whatever you want
72 self
._orig
_fs
.mds_asok(["flush", "journal"])
73 self
._recovery
_fs
.mds_asok(["flush", "journal"])
76 class SimpleOverlayWorkload(OverlayWorkload
):
78 Single file, single directory, check that it gets recovered and so does its size
81 self
._orig
_mount
.run_shell(["mkdir", "subdir"])
82 self
._orig
_mount
.write_n_mb("subdir/sixmegs", 6)
83 self
._initial
_state
= self
._orig
_mount
.stat("subdir/sixmegs")
86 self
._recovery
_mount
.run_shell(["ls", "subdir"])
87 st
= self
._recovery
_mount
.stat("subdir/sixmegs")
88 self
.assert_equal(st
['st_size'], self
._initial
_state
['st_size'])
91 class TestRecoveryPool(CephFSTestCase
):
94 REQUIRE_RECOVERY_FILESYSTEM
= True
96 def is_marked_damaged(self
, rank
):
97 mds_map
= self
.fs
.get_mds_map()
98 return rank
in mds_map
['damaged']
100 def _rebuild_metadata(self
, workload
, other_pool
=None, workers
=1):
102 That when all objects in metadata pool are removed, we can rebuild a metadata pool
103 based on the contents of a data pool, and a client can see and read our files.
106 # First, inject some files
110 # Unmount the client and flush the journal: the tool should also cope with
111 # situations where there is dirty metadata, but we'll test that separately
112 self
.mount_a
.umount_wait()
113 self
.mount_b
.umount_wait()
116 # Create the alternate pool if requested
117 recovery_fs
= self
.recovery_fs
.name
118 recovery_pool
= self
.recovery_fs
.get_metadata_pool_name()
119 self
.recovery_fs
.data_scan(['init', '--force-init',
120 '--filesystem', recovery_fs
,
121 '--alternate-pool', recovery_pool
])
122 self
.recovery_fs
.mon_manager
.raw_cluster_cmd('-s')
123 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "session"])
124 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "snap"])
125 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "inode"])
131 # After recovery, we need the MDS to not be strict about stats (in production these options
132 # are off by default, but in QA we need to explicitly disable them)
133 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', False)
134 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', False)
136 # Apply any data damage the workload wants
139 # Reset the MDS map in case multiple ranks were in play: recovery procedure
140 # only understands how to rebuild metadata under rank 0
141 self
.fs
.mon_manager
.raw_cluster_cmd('fs', 'reset', self
.fs
.name
,
142 '--yes-i-really-mean-it')
144 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "session"])
145 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "snap"])
146 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "inode"])
148 # Run the recovery procedure
150 with self
.assertRaises(CommandFailedError
):
151 # Normal reset should fail when no objects are present, we'll use --force instead
152 self
.fs
.journal_tool(["journal", "reset"], 0)
155 self
.fs
.data_scan(['scan_extents', '--alternate-pool',
156 recovery_pool
, '--filesystem', self
.fs
.name
,
157 self
.fs
.get_data_pool_name()])
158 self
.fs
.data_scan(['scan_inodes', '--alternate-pool',
159 recovery_pool
, '--filesystem', self
.fs
.name
,
160 '--force-corrupt', '--force-init',
161 self
.fs
.get_data_pool_name()])
162 self
.fs
.journal_tool(['event', 'recover_dentries', 'list',
163 '--alternate-pool', recovery_pool
], 0)
165 self
.fs
.data_scan(['init', '--force-init', '--filesystem',
167 self
.fs
.data_scan(['scan_inodes', '--filesystem', self
.fs
.name
,
168 '--force-corrupt', '--force-init',
169 self
.fs
.get_data_pool_name()])
170 self
.fs
.journal_tool(['event', 'recover_dentries', 'list'], 0)
172 self
.recovery_fs
.journal_tool(['journal', 'reset', '--force'], 0)
173 self
.fs
.journal_tool(['journal', 'reset', '--force'], 0)
174 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired',
177 # Mark the MDS repaired
178 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired', '0')
181 self
.fs
.mds_restart()
182 self
.recovery_fs
.mds_restart()
183 self
.fs
.wait_for_daemons()
184 self
.recovery_fs
.wait_for_daemons()
185 status
= self
.recovery_fs
.status()
186 for rank
in self
.recovery_fs
.get_ranks(status
=status
):
187 self
.fs
.mon_manager
.raw_cluster_cmd('tell', "mds." + rank
['name'],
188 'injectargs', '--debug-mds=20')
189 self
.fs
.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank
=rank
['rank'], status
=status
)
190 log
.info(str(self
.mds_cluster
.status()))
194 self
.mount_b
.mount(mount_fs_name
=recovery_fs
)
195 self
.mount_a
.wait_until_mounted()
196 self
.mount_b
.wait_until_mounted()
198 # See that the files are present and correct
199 errors
= workload
.validate()
201 log
.error("Validation errors found: {0}".format(len(errors
)))
203 log
.error(e
.exception
)
204 log
.error(e
.backtrace
)
205 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
206 errors
[0].exception
, errors
[0].backtrace
209 def test_rebuild_simple(self
):
210 self
._rebuild
_metadata
(SimpleOverlayWorkload(self
.fs
, self
.recovery_fs
,
211 self
.mount_a
, self
.mount_b
))