]>
Commit | Line | Data |
---|---|---|
181888fb FG |
1 | """ |
2 | Test our tools for recovering metadata from the data pool into an alternate pool | |
3 | """ | |
181888fb FG |
4 | |
5 | import logging | |
181888fb | 6 | import traceback |
9f95a23c | 7 | from collections import namedtuple |
181888fb FG |
8 | |
9 | from teuthology.orchestra.run import CommandFailedError | |
9f95a23c | 10 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
181888fb FG |
11 | |
12 | log = logging.getLogger(__name__) | |
13 | ||
14 | ||
15 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
16 | ||
17 | ||
18 | class OverlayWorkload(object): | |
19 | def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount): | |
20 | self._orig_fs = orig_fs | |
21 | self._recovery_fs = recovery_fs | |
22 | self._orig_mount = orig_mount | |
23 | self._recovery_mount = recovery_mount | |
24 | self._initial_state = None | |
25 | ||
26 | # Accumulate backtraces for every failed validation, and return them. Backtraces | |
27 | # are rather verbose, but we only see them when something breaks, and they | |
28 | # let us see which check failed without having to decorate each check with | |
29 | # a string | |
30 | self._errors = [] | |
31 | ||
32 | def assert_equal(self, a, b): | |
33 | try: | |
34 | if a != b: | |
35 | raise AssertionError("{0} != {1}".format(a, b)) | |
36 | except AssertionError as e: | |
37 | self._errors.append( | |
38 | ValidationError(e, traceback.format_exc(3)) | |
39 | ) | |
40 | ||
41 | def write(self): | |
42 | """ | |
43 | Write the workload files to the mount | |
44 | """ | |
45 | raise NotImplementedError() | |
46 | ||
47 | def validate(self): | |
48 | """ | |
49 | Read from the mount and validate that the workload files are present (i.e. have | |
50 | survived or been reconstructed from the test scenario) | |
51 | """ | |
52 | raise NotImplementedError() | |
53 | ||
54 | def damage(self): | |
55 | """ | |
56 | Damage the filesystem pools in ways that will be interesting to recover from. By | |
57 | default just wipe everything in the metadata pool | |
58 | """ | |
59 | # Delete every object in the metadata pool | |
60 | objects = self._orig_fs.rados(["ls"]).split("\n") | |
61 | for o in objects: | |
62 | self._orig_fs.rados(["rm", o]) | |
63 | ||
64 | def flush(self): | |
65 | """ | |
66 | Called after client unmount, after write: flush whatever you want | |
67 | """ | |
68 | self._orig_fs.mds_asok(["flush", "journal"]) | |
69 | self._recovery_fs.mds_asok(["flush", "journal"]) | |
70 | ||
71 | ||
72 | class SimpleOverlayWorkload(OverlayWorkload): | |
73 | """ | |
74 | Single file, single directory, check that it gets recovered and so does its size | |
75 | """ | |
76 | def write(self): | |
77 | self._orig_mount.run_shell(["mkdir", "subdir"]) | |
78 | self._orig_mount.write_n_mb("subdir/sixmegs", 6) | |
79 | self._initial_state = self._orig_mount.stat("subdir/sixmegs") | |
80 | ||
81 | def validate(self): | |
82 | self._recovery_mount.run_shell(["ls", "subdir"]) | |
83 | st = self._recovery_mount.stat("subdir/sixmegs") | |
84 | self.assert_equal(st['st_size'], self._initial_state['st_size']) | |
85 | return self._errors | |
86 | ||
87 | class TestRecoveryPool(CephFSTestCase): | |
88 | MDSS_REQUIRED = 2 | |
89 | CLIENTS_REQUIRED = 2 | |
90 | REQUIRE_RECOVERY_FILESYSTEM = True | |
91 | ||
92 | def is_marked_damaged(self, rank): | |
93 | mds_map = self.fs.get_mds_map() | |
94 | return rank in mds_map['damaged'] | |
95 | ||
96 | def _rebuild_metadata(self, workload, other_pool=None, workers=1): | |
97 | """ | |
98 | That when all objects in metadata pool are removed, we can rebuild a metadata pool | |
99 | based on the contents of a data pool, and a client can see and read our files. | |
100 | """ | |
101 | ||
102 | # First, inject some files | |
103 | ||
104 | workload.write() | |
105 | ||
106 | # Unmount the client and flush the journal: the tool should also cope with | |
107 | # situations where there is dirty metadata, but we'll test that separately | |
108 | self.mount_a.umount_wait() | |
109 | self.mount_b.umount_wait() | |
110 | workload.flush() | |
111 | ||
112 | # Create the alternate pool if requested | |
113 | recovery_fs = self.recovery_fs.name | |
114 | recovery_pool = self.recovery_fs.get_metadata_pool_name() | |
115 | self.recovery_fs.data_scan(['init', '--force-init', | |
116 | '--filesystem', recovery_fs, | |
117 | '--alternate-pool', recovery_pool]) | |
118 | self.recovery_fs.mon_manager.raw_cluster_cmd('-s') | |
119 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"]) | |
120 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"]) | |
121 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"]) | |
122 | ||
123 | # Stop the MDS | |
124 | self.fs.mds_stop() | |
125 | self.fs.mds_fail() | |
126 | ||
127 | # After recovery, we need the MDS to not be strict about stats (in production these options | |
128 | # are off by default, but in QA we need to explicitly disable them) | |
129 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) | |
130 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
131 | ||
132 | # Apply any data damage the workload wants | |
133 | workload.damage() | |
134 | ||
135 | # Reset the MDS map in case multiple ranks were in play: recovery procedure | |
136 | # only understands how to rebuild metadata under rank 0 | |
137 | self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, | |
138 | '--yes-i-really-mean-it') | |
139 | ||
181888fb FG |
140 | self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) |
141 | self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) | |
142 | self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) | |
143 | ||
144 | # Run the recovery procedure | |
145 | if False: | |
146 | with self.assertRaises(CommandFailedError): | |
147 | # Normal reset should fail when no objects are present, we'll use --force instead | |
f64942e4 | 148 | self.fs.journal_tool(["journal", "reset"], 0) |
181888fb FG |
149 | |
150 | self.fs.mds_stop() | |
151 | self.fs.data_scan(['scan_extents', '--alternate-pool', | |
152 | recovery_pool, '--filesystem', self.fs.name, | |
153 | self.fs.get_data_pool_name()]) | |
154 | self.fs.data_scan(['scan_inodes', '--alternate-pool', | |
155 | recovery_pool, '--filesystem', self.fs.name, | |
156 | '--force-corrupt', '--force-init', | |
157 | self.fs.get_data_pool_name()]) | |
f64942e4 AA |
158 | self.fs.journal_tool(['event', 'recover_dentries', 'list', |
159 | '--alternate-pool', recovery_pool], 0) | |
181888fb FG |
160 | |
161 | self.fs.data_scan(['init', '--force-init', '--filesystem', | |
162 | self.fs.name]) | |
163 | self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name, | |
164 | '--force-corrupt', '--force-init', | |
165 | self.fs.get_data_pool_name()]) | |
f64942e4 | 166 | self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) |
181888fb | 167 | |
f64942e4 AA |
168 | self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0) |
169 | self.fs.journal_tool(['journal', 'reset', '--force'], 0) | |
181888fb FG |
170 | self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', |
171 | recovery_fs + ":0") | |
172 | ||
173 | # Mark the MDS repaired | |
174 | self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') | |
175 | ||
176 | # Start the MDS | |
177 | self.fs.mds_restart() | |
178 | self.recovery_fs.mds_restart() | |
179 | self.fs.wait_for_daemons() | |
180 | self.recovery_fs.wait_for_daemons() | |
f64942e4 AA |
181 | status = self.recovery_fs.status() |
182 | for rank in self.recovery_fs.get_ranks(status=status): | |
183 | self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'], | |
181888fb | 184 | 'injectargs', '--debug-mds=20') |
11fdf7f2 | 185 | self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status) |
181888fb FG |
186 | log.info(str(self.mds_cluster.status())) |
187 | ||
188 | # Mount a client | |
e306af50 TL |
189 | self.mount_a.mount_wait() |
190 | self.mount_b.mount_wait(mount_fs_name=recovery_fs) | |
181888fb FG |
191 | |
192 | # See that the files are present and correct | |
193 | errors = workload.validate() | |
194 | if errors: | |
195 | log.error("Validation errors found: {0}".format(len(errors))) | |
196 | for e in errors: | |
197 | log.error(e.exception) | |
198 | log.error(e.backtrace) | |
199 | raise AssertionError("Validation failed, first error: {0}\n{1}".format( | |
200 | errors[0].exception, errors[0].backtrace | |
201 | )) | |
202 | ||
203 | def test_rebuild_simple(self): | |
204 | self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs, | |
205 | self.mount_a, self.mount_b)) |