]>
Commit | Line | Data |
---|---|---|
181888fb FG |
1 | |
2 | """ | |
3 | Test our tools for recovering metadata from the data pool into an alternate pool | |
4 | """ | |
5 | import json | |
6 | ||
7 | import logging | |
8 | import os | |
9 | from textwrap import dedent | |
10 | import traceback | |
11 | from collections import namedtuple, defaultdict | |
12 | ||
13 | from teuthology.orchestra.run import CommandFailedError | |
14 | from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology | |
15 | ||
16 | log = logging.getLogger(__name__) | |
17 | ||
18 | ||
19 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
20 | ||
21 | ||
22 | class OverlayWorkload(object): | |
23 | def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount): | |
24 | self._orig_fs = orig_fs | |
25 | self._recovery_fs = recovery_fs | |
26 | self._orig_mount = orig_mount | |
27 | self._recovery_mount = recovery_mount | |
28 | self._initial_state = None | |
29 | ||
30 | # Accumulate backtraces for every failed validation, and return them. Backtraces | |
31 | # are rather verbose, but we only see them when something breaks, and they | |
32 | # let us see which check failed without having to decorate each check with | |
33 | # a string | |
34 | self._errors = [] | |
35 | ||
36 | def assert_equal(self, a, b): | |
37 | try: | |
38 | if a != b: | |
39 | raise AssertionError("{0} != {1}".format(a, b)) | |
40 | except AssertionError as e: | |
41 | self._errors.append( | |
42 | ValidationError(e, traceback.format_exc(3)) | |
43 | ) | |
44 | ||
45 | def write(self): | |
46 | """ | |
47 | Write the workload files to the mount | |
48 | """ | |
49 | raise NotImplementedError() | |
50 | ||
51 | def validate(self): | |
52 | """ | |
53 | Read from the mount and validate that the workload files are present (i.e. have | |
54 | survived or been reconstructed from the test scenario) | |
55 | """ | |
56 | raise NotImplementedError() | |
57 | ||
58 | def damage(self): | |
59 | """ | |
60 | Damage the filesystem pools in ways that will be interesting to recover from. By | |
61 | default just wipe everything in the metadata pool | |
62 | """ | |
63 | # Delete every object in the metadata pool | |
64 | objects = self._orig_fs.rados(["ls"]).split("\n") | |
65 | for o in objects: | |
66 | self._orig_fs.rados(["rm", o]) | |
67 | ||
68 | def flush(self): | |
69 | """ | |
70 | Called after client unmount, after write: flush whatever you want | |
71 | """ | |
72 | self._orig_fs.mds_asok(["flush", "journal"]) | |
73 | self._recovery_fs.mds_asok(["flush", "journal"]) | |
74 | ||
75 | ||
76 | class SimpleOverlayWorkload(OverlayWorkload): | |
77 | """ | |
78 | Single file, single directory, check that it gets recovered and so does its size | |
79 | """ | |
80 | def write(self): | |
81 | self._orig_mount.run_shell(["mkdir", "subdir"]) | |
82 | self._orig_mount.write_n_mb("subdir/sixmegs", 6) | |
83 | self._initial_state = self._orig_mount.stat("subdir/sixmegs") | |
84 | ||
85 | def validate(self): | |
86 | self._recovery_mount.run_shell(["ls", "subdir"]) | |
87 | st = self._recovery_mount.stat("subdir/sixmegs") | |
88 | self.assert_equal(st['st_size'], self._initial_state['st_size']) | |
89 | return self._errors | |
90 | ||
91 | class TestRecoveryPool(CephFSTestCase): | |
92 | MDSS_REQUIRED = 2 | |
93 | CLIENTS_REQUIRED = 2 | |
94 | REQUIRE_RECOVERY_FILESYSTEM = True | |
95 | ||
96 | def is_marked_damaged(self, rank): | |
97 | mds_map = self.fs.get_mds_map() | |
98 | return rank in mds_map['damaged'] | |
99 | ||
100 | def _rebuild_metadata(self, workload, other_pool=None, workers=1): | |
101 | """ | |
102 | That when all objects in metadata pool are removed, we can rebuild a metadata pool | |
103 | based on the contents of a data pool, and a client can see and read our files. | |
104 | """ | |
105 | ||
106 | # First, inject some files | |
107 | ||
108 | workload.write() | |
109 | ||
110 | # Unmount the client and flush the journal: the tool should also cope with | |
111 | # situations where there is dirty metadata, but we'll test that separately | |
112 | self.mount_a.umount_wait() | |
113 | self.mount_b.umount_wait() | |
114 | workload.flush() | |
115 | ||
116 | # Create the alternate pool if requested | |
117 | recovery_fs = self.recovery_fs.name | |
118 | recovery_pool = self.recovery_fs.get_metadata_pool_name() | |
119 | self.recovery_fs.data_scan(['init', '--force-init', | |
120 | '--filesystem', recovery_fs, | |
121 | '--alternate-pool', recovery_pool]) | |
122 | self.recovery_fs.mon_manager.raw_cluster_cmd('-s') | |
123 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"]) | |
124 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"]) | |
125 | self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"]) | |
126 | ||
127 | # Stop the MDS | |
128 | self.fs.mds_stop() | |
129 | self.fs.mds_fail() | |
130 | ||
131 | # After recovery, we need the MDS to not be strict about stats (in production these options | |
132 | # are off by default, but in QA we need to explicitly disable them) | |
133 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) | |
134 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
135 | ||
136 | # Apply any data damage the workload wants | |
137 | workload.damage() | |
138 | ||
139 | # Reset the MDS map in case multiple ranks were in play: recovery procedure | |
140 | # only understands how to rebuild metadata under rank 0 | |
141 | self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, | |
142 | '--yes-i-really-mean-it') | |
143 | ||
181888fb FG |
144 | self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) |
145 | self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) | |
146 | self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) | |
147 | ||
148 | # Run the recovery procedure | |
149 | if False: | |
150 | with self.assertRaises(CommandFailedError): | |
151 | # Normal reset should fail when no objects are present, we'll use --force instead | |
f64942e4 | 152 | self.fs.journal_tool(["journal", "reset"], 0) |
181888fb FG |
153 | |
154 | self.fs.mds_stop() | |
155 | self.fs.data_scan(['scan_extents', '--alternate-pool', | |
156 | recovery_pool, '--filesystem', self.fs.name, | |
157 | self.fs.get_data_pool_name()]) | |
158 | self.fs.data_scan(['scan_inodes', '--alternate-pool', | |
159 | recovery_pool, '--filesystem', self.fs.name, | |
160 | '--force-corrupt', '--force-init', | |
161 | self.fs.get_data_pool_name()]) | |
f64942e4 AA |
162 | self.fs.journal_tool(['event', 'recover_dentries', 'list', |
163 | '--alternate-pool', recovery_pool], 0) | |
181888fb FG |
164 | |
165 | self.fs.data_scan(['init', '--force-init', '--filesystem', | |
166 | self.fs.name]) | |
167 | self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name, | |
168 | '--force-corrupt', '--force-init', | |
169 | self.fs.get_data_pool_name()]) | |
f64942e4 | 170 | self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) |
181888fb | 171 | |
f64942e4 AA |
172 | self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0) |
173 | self.fs.journal_tool(['journal', 'reset', '--force'], 0) | |
181888fb FG |
174 | self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', |
175 | recovery_fs + ":0") | |
176 | ||
177 | # Mark the MDS repaired | |
178 | self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') | |
179 | ||
180 | # Start the MDS | |
181 | self.fs.mds_restart() | |
182 | self.recovery_fs.mds_restart() | |
183 | self.fs.wait_for_daemons() | |
184 | self.recovery_fs.wait_for_daemons() | |
f64942e4 AA |
185 | status = self.recovery_fs.status() |
186 | for rank in self.recovery_fs.get_ranks(status=status): | |
187 | self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'], | |
181888fb | 188 | 'injectargs', '--debug-mds=20') |
11fdf7f2 | 189 | self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status) |
181888fb FG |
190 | log.info(str(self.mds_cluster.status())) |
191 | ||
192 | # Mount a client | |
193 | self.mount_a.mount() | |
194 | self.mount_b.mount(mount_fs_name=recovery_fs) | |
195 | self.mount_a.wait_until_mounted() | |
196 | self.mount_b.wait_until_mounted() | |
197 | ||
198 | # See that the files are present and correct | |
199 | errors = workload.validate() | |
200 | if errors: | |
201 | log.error("Validation errors found: {0}".format(len(errors))) | |
202 | for e in errors: | |
203 | log.error(e.exception) | |
204 | log.error(e.backtrace) | |
205 | raise AssertionError("Validation failed, first error: {0}\n{1}".format( | |
206 | errors[0].exception, errors[0].backtrace | |
207 | )) | |
208 | ||
209 | def test_rebuild_simple(self): | |
210 | self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs, | |
211 | self.mount_a, self.mount_b)) |