2 Test our tools for recovering metadata from the data pool into an alternate pool
7 from collections
import namedtuple
9 from teuthology
.orchestra
.run
import CommandFailedError
10 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
12 log
= logging
.getLogger(__name__
)
15 ValidationError
= namedtuple("ValidationError", ["exception", "backtrace"])
18 class OverlayWorkload(object):
19 def __init__(self
, orig_fs
, recovery_fs
, orig_mount
, recovery_mount
):
20 self
._orig
_fs
= orig_fs
21 self
._recovery
_fs
= recovery_fs
22 self
._orig
_mount
= orig_mount
23 self
._recovery
_mount
= recovery_mount
24 self
._initial
_state
= None
26 # Accumulate backtraces for every failed validation, and return them. Backtraces
27 # are rather verbose, but we only see them when something breaks, and they
28 # let us see which check failed without having to decorate each check with
32 def assert_equal(self
, a
, b
):
35 raise AssertionError("{0} != {1}".format(a
, b
))
36 except AssertionError as e
:
38 ValidationError(e
, traceback
.format_exc(3))
43 Write the workload files to the mount
45 raise NotImplementedError()
49 Read from the mount and validate that the workload files are present (i.e. have
50 survived or been reconstructed from the test scenario)
52 raise NotImplementedError()
56 Damage the filesystem pools in ways that will be interesting to recover from. By
57 default just wipe everything in the metadata pool
59 # Delete every object in the metadata pool
60 objects
= self
._orig
_fs
.rados(["ls"]).split("\n")
62 self
._orig
_fs
.rados(["rm", o
])
66 Called after client unmount, after write: flush whatever you want
68 self
._orig
_fs
.mds_asok(["flush", "journal"])
69 self
._recovery
_fs
.mds_asok(["flush", "journal"])
72 class SimpleOverlayWorkload(OverlayWorkload
):
74 Single file, single directory, check that it gets recovered and so does its size
77 self
._orig
_mount
.run_shell(["mkdir", "subdir"])
78 self
._orig
_mount
.write_n_mb("subdir/sixmegs", 6)
79 self
._initial
_state
= self
._orig
_mount
.stat("subdir/sixmegs")
82 self
._recovery
_mount
.run_shell(["ls", "subdir"])
83 st
= self
._recovery
_mount
.stat("subdir/sixmegs")
84 self
.assert_equal(st
['st_size'], self
._initial
_state
['st_size'])
87 class TestRecoveryPool(CephFSTestCase
):
90 REQUIRE_RECOVERY_FILESYSTEM
= True
92 def is_marked_damaged(self
, rank
):
93 mds_map
= self
.fs
.get_mds_map()
94 return rank
in mds_map
['damaged']
96 def _rebuild_metadata(self
, workload
, other_pool
=None, workers
=1):
98 That when all objects in metadata pool are removed, we can rebuild a metadata pool
99 based on the contents of a data pool, and a client can see and read our files.
102 # First, inject some files
106 # Unmount the client and flush the journal: the tool should also cope with
107 # situations where there is dirty metadata, but we'll test that separately
108 self
.mount_a
.umount_wait()
109 self
.mount_b
.umount_wait()
112 # Create the alternate pool if requested
113 recovery_fs
= self
.recovery_fs
.name
114 recovery_pool
= self
.recovery_fs
.get_metadata_pool_name()
115 self
.recovery_fs
.data_scan(['init', '--force-init',
116 '--filesystem', recovery_fs
,
117 '--alternate-pool', recovery_pool
])
118 self
.recovery_fs
.mon_manager
.raw_cluster_cmd('-s')
119 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "session"])
120 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "snap"])
121 self
.recovery_fs
.table_tool([recovery_fs
+ ":0", "reset", "inode"])
127 # After recovery, we need the MDS to not be strict about stats (in production these options
128 # are off by default, but in QA we need to explicitly disable them)
129 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', False)
130 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', False)
132 # Apply any data damage the workload wants
135 # Reset the MDS map in case multiple ranks were in play: recovery procedure
136 # only understands how to rebuild metadata under rank 0
137 self
.fs
.mon_manager
.raw_cluster_cmd('fs', 'reset', self
.fs
.name
,
138 '--yes-i-really-mean-it')
140 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "session"])
141 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "snap"])
142 self
.fs
.table_tool([self
.fs
.name
+ ":0", "reset", "inode"])
144 # Run the recovery procedure
146 with self
.assertRaises(CommandFailedError
):
147 # Normal reset should fail when no objects are present, we'll use --force instead
148 self
.fs
.journal_tool(["journal", "reset"], 0)
151 self
.fs
.data_scan(['scan_extents', '--alternate-pool',
152 recovery_pool
, '--filesystem', self
.fs
.name
,
153 self
.fs
.get_data_pool_name()])
154 self
.fs
.data_scan(['scan_inodes', '--alternate-pool',
155 recovery_pool
, '--filesystem', self
.fs
.name
,
156 '--force-corrupt', '--force-init',
157 self
.fs
.get_data_pool_name()])
158 self
.fs
.journal_tool(['event', 'recover_dentries', 'list',
159 '--alternate-pool', recovery_pool
], 0)
161 self
.fs
.data_scan(['init', '--force-init', '--filesystem',
163 self
.fs
.data_scan(['scan_inodes', '--filesystem', self
.fs
.name
,
164 '--force-corrupt', '--force-init',
165 self
.fs
.get_data_pool_name()])
166 self
.fs
.journal_tool(['event', 'recover_dentries', 'list'], 0)
168 self
.recovery_fs
.journal_tool(['journal', 'reset', '--force'], 0)
169 self
.fs
.journal_tool(['journal', 'reset', '--force'], 0)
170 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired',
173 # Mark the MDS repaired
174 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired', '0')
177 self
.fs
.mds_restart()
178 self
.recovery_fs
.mds_restart()
179 self
.fs
.wait_for_daemons()
180 self
.recovery_fs
.wait_for_daemons()
181 status
= self
.recovery_fs
.status()
182 for rank
in self
.recovery_fs
.get_ranks(status
=status
):
183 self
.fs
.mon_manager
.raw_cluster_cmd('tell', "mds." + rank
['name'],
184 'injectargs', '--debug-mds=20')
185 self
.fs
.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank
=rank
['rank'], status
=status
)
186 log
.info(str(self
.mds_cluster
.status()))
190 self
.mount_b
.mount(mount_fs_name
=recovery_fs
)
191 self
.mount_a
.wait_until_mounted()
192 self
.mount_b
.wait_until_mounted()
194 # See that the files are present and correct
195 errors
= workload
.validate()
197 log
.error("Validation errors found: {0}".format(len(errors
)))
199 log
.error(e
.exception
)
200 log
.error(e
.backtrace
)
201 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
202 errors
[0].exception
, errors
[0].backtrace
205 def test_rebuild_simple(self
):
206 self
._rebuild
_metadata
(SimpleOverlayWorkload(self
.fs
, self
.recovery_fs
,
207 self
.mount_a
, self
.mount_b
))