]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Test that the forward scrub functionality can traverse metadata and apply | |
4 | requested tags, on well formed metadata. | |
5 | ||
6 | This is *not* the real testing for forward scrub, which will need to test | |
7 | how the functionality responds to damaged metadata. | |
8 | ||
9 | """ | |
7c673cae | 10 | import logging |
f67539c2 | 11 | import json |
e306af50 | 12 | |
7c673cae | 13 | from collections import namedtuple |
e306af50 | 14 | from io import BytesIO |
7c673cae FG |
15 | from textwrap import dedent |
16 | ||
20effc67 | 17 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
18 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
19 | ||
20 | import struct | |
21 | ||
22 | log = logging.getLogger(__name__) | |
23 | ||
24 | ||
25 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
26 | ||
27 | ||
28 | class TestForwardScrub(CephFSTestCase): | |
29 | MDSS_REQUIRED = 1 | |
30 | ||
31 | def _read_str_xattr(self, pool, obj, attr): | |
32 | """ | |
33 | Read a ceph-encoded string from a rados xattr | |
34 | """ | |
f67539c2 TL |
35 | output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool, |
36 | stdout=BytesIO()).stdout.getvalue() | |
7c673cae | 37 | strlen = struct.unpack('i', output[0:4])[0] |
f67539c2 | 38 | return output[4:(4 + strlen)].decode(encoding='ascii') |
7c673cae FG |
39 | |
40 | def _get_paths_to_ino(self): | |
41 | inos = {} | |
42 | p = self.mount_a.run_shell(["find", "./"]) | |
43 | paths = p.stdout.getvalue().strip().split() | |
44 | for path in paths: | |
45 | inos[path] = self.mount_a.path_to_ino(path) | |
46 | ||
47 | return inos | |
48 | ||
49 | def test_apply_tag(self): | |
50 | self.mount_a.run_shell(["mkdir", "parentdir"]) | |
51 | self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) | |
52 | self.mount_a.run_shell(["touch", "rfile"]) | |
53 | self.mount_a.run_shell(["touch", "parentdir/pfile"]) | |
54 | self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) | |
55 | ||
56 | # Build a structure mapping path to inode, as we will later want | |
57 | # to check object by object and objects are named after ino number | |
58 | inos = self._get_paths_to_ino() | |
59 | ||
60 | # Flush metadata: this is a friendly test of forward scrub so we're skipping | |
61 | # the part where it's meant to cope with dirty metadata | |
62 | self.mount_a.umount_wait() | |
63 | self.fs.mds_asok(["flush", "journal"]) | |
64 | ||
65 | tag = "mytag" | |
66 | ||
67 | # Execute tagging forward scrub | |
68 | self.fs.mds_asok(["tag", "path", "/parentdir", tag]) | |
69 | # Wait for completion | |
70 | import time | |
71 | time.sleep(10) | |
72 | # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll | |
73 | # watch that instead | |
74 | ||
75 | # Check that dirs were tagged | |
76 | for dirpath in ["./parentdir", "./parentdir/childdir"]: | |
77 | self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) | |
78 | ||
79 | # Check that files were tagged | |
80 | for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: | |
81 | self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) | |
82 | ||
83 | # This guy wasn't in the tag path, shouldn't have been tagged | |
84 | self.assertUntagged(inos["./rfile"]) | |
85 | ||
86 | def assertUntagged(self, ino): | |
87 | file_obj_name = "{0:x}.00000000".format(ino) | |
88 | with self.assertRaises(CommandFailedError): | |
89 | self._read_str_xattr( | |
90 | self.fs.get_data_pool_name(), | |
91 | file_obj_name, | |
92 | "scrub_tag" | |
93 | ) | |
94 | ||
95 | def assertTagged(self, ino, tag, pool): | |
96 | file_obj_name = "{0:x}.00000000".format(ino) | |
97 | wrote = self._read_str_xattr( | |
98 | pool, | |
99 | file_obj_name, | |
100 | "scrub_tag" | |
101 | ) | |
102 | self.assertEqual(wrote, tag) | |
103 | ||
104 | def _validate_linkage(self, expected): | |
105 | inos = self._get_paths_to_ino() | |
106 | try: | |
107 | self.assertDictEqual(inos, expected) | |
108 | except AssertionError: | |
109 | log.error("Expected: {0}".format(json.dumps(expected, indent=2))) | |
110 | log.error("Actual: {0}".format(json.dumps(inos, indent=2))) | |
111 | raise | |
112 | ||
113 | def test_orphan_scan(self): | |
114 | # Create some files whose metadata we will flush | |
115 | self.mount_a.run_python(dedent(""" | |
116 | import os | |
117 | mount_point = "{mount_point}" | |
118 | parent = os.path.join(mount_point, "parent") | |
119 | os.mkdir(parent) | |
120 | flushed = os.path.join(parent, "flushed") | |
121 | os.mkdir(flushed) | |
122 | for f in ["alpha", "bravo", "charlie"]: | |
123 | open(os.path.join(flushed, f), 'w').write(f) | |
124 | """.format(mount_point=self.mount_a.mountpoint))) | |
125 | ||
126 | inos = self._get_paths_to_ino() | |
127 | ||
128 | # Flush journal | |
129 | # Umount before flush to avoid cap releases putting | |
130 | # things we don't want in the journal later. | |
131 | self.mount_a.umount_wait() | |
132 | self.fs.mds_asok(["flush", "journal"]) | |
133 | ||
134 | # Create a new inode that's just in the log, i.e. would | |
135 | # look orphaned to backward scan if backward scan wisnae | |
136 | # respectin' tha scrub_tag xattr. | |
e306af50 | 137 | self.mount_a.mount_wait() |
7c673cae FG |
138 | self.mount_a.run_shell(["mkdir", "parent/unflushed"]) |
139 | self.mount_a.run_shell(["dd", "if=/dev/urandom", | |
140 | "of=./parent/unflushed/jfile", | |
141 | "bs=1M", "count=8"]) | |
142 | inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") | |
143 | inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") | |
144 | self.mount_a.umount_wait() | |
145 | ||
146 | # Orphan an inode by deleting its dentry | |
147 | # Our victim will be.... bravo. | |
148 | self.mount_a.umount_wait() | |
f67539c2 | 149 | self.fs.fail() |
7c673cae FG |
150 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) |
151 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
152 | frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) | |
f67539c2 | 153 | self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"]) |
7c673cae | 154 | |
f67539c2 | 155 | self.fs.set_joinable() |
7c673cae FG |
156 | self.fs.wait_for_daemons() |
157 | ||
158 | # See that the orphaned file is indeed missing from a client's POV | |
e306af50 | 159 | self.mount_a.mount_wait() |
7c673cae FG |
160 | damaged_state = self._get_paths_to_ino() |
161 | self.assertNotIn("./parent/flushed/bravo", damaged_state) | |
162 | self.mount_a.umount_wait() | |
163 | ||
164 | # Run a tagging forward scrub | |
165 | tag = "mytag123" | |
166 | self.fs.mds_asok(["tag", "path", "/parent", tag]) | |
167 | ||
168 | # See that the orphan wisnae tagged | |
169 | self.assertUntagged(inos['./parent/flushed/bravo']) | |
170 | ||
171 | # See that the flushed-metadata-and-still-present files are tagged | |
172 | self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) | |
173 | self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) | |
174 | ||
175 | # See that journalled-but-not-flushed file *was* tagged | |
176 | self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) | |
177 | ||
178 | # Run cephfs-data-scan targeting only orphans | |
f67539c2 | 179 | self.fs.fail() |
7c673cae FG |
180 | self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) |
181 | self.fs.data_scan([ | |
182 | "scan_inodes", | |
183 | "--filter-tag", tag, | |
184 | self.fs.get_data_pool_name() | |
185 | ]) | |
186 | ||
187 | # After in-place injection stats should be kosher again | |
188 | self.fs.set_ceph_conf('mds', 'mds verify scatter', True) | |
189 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) | |
190 | ||
191 | # And we should have all the same linkage we started with, | |
192 | # and no lost+found, and no extra inodes! | |
f67539c2 | 193 | self.fs.set_joinable() |
7c673cae | 194 | self.fs.wait_for_daemons() |
e306af50 | 195 | self.mount_a.mount_wait() |
7c673cae FG |
196 | self._validate_linkage(inos) |
197 | ||
198 | def _stash_inotable(self): | |
199 | # Get all active ranks | |
200 | ranks = self.fs.get_all_mds_rank() | |
201 | ||
202 | inotable_dict = {} | |
203 | for rank in ranks: | |
204 | inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" | |
9f95a23c | 205 | print("Trying to fetch inotable object: " + inotable_oid) |
7c673cae FG |
206 | |
207 | #self.fs.get_metadata_object("InoTable", "mds0_inotable") | |
f67539c2 | 208 | inotable_raw = self.fs.radosmo(['get', inotable_oid, '-']) |
7c673cae FG |
209 | inotable_dict[inotable_oid] = inotable_raw |
210 | return inotable_dict | |
211 | ||
212 | def test_inotable_sync(self): | |
213 | self.mount_a.write_n_mb("file1_sixmegs", 6) | |
214 | ||
215 | # Flush journal | |
216 | self.mount_a.umount_wait() | |
217 | self.fs.mds_asok(["flush", "journal"]) | |
218 | ||
219 | inotable_copy = self._stash_inotable() | |
220 | ||
e306af50 | 221 | self.mount_a.mount_wait() |
7c673cae FG |
222 | |
223 | self.mount_a.write_n_mb("file2_sixmegs", 6) | |
224 | self.mount_a.write_n_mb("file3_sixmegs", 6) | |
225 | ||
226 | inos = self._get_paths_to_ino() | |
227 | ||
228 | # Flush journal | |
229 | self.mount_a.umount_wait() | |
230 | self.fs.mds_asok(["flush", "journal"]) | |
231 | ||
232 | self.mount_a.umount_wait() | |
233 | ||
234 | with self.assert_cluster_log("inode table repaired", invert_match=True): | |
b3b6e05e | 235 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 236 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
237 | self.assertEqual(out_json["return_code"], 0) |
238 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 239 | |
f67539c2 | 240 | self.fs.fail() |
7c673cae FG |
241 | |
242 | # Truncate the journal (to ensure the inotable on disk | |
243 | # is all that will be in the InoTable in memory) | |
244 | ||
245 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 246 | "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) |
7c673cae FG |
247 | |
248 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 249 | "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) |
7c673cae FG |
250 | |
251 | # Revert to old inotable. | |
9f95a23c | 252 | for key, value in inotable_copy.items(): |
f67539c2 | 253 | self.fs.radosm(["put", key, "-"], stdin=BytesIO(value)) |
7c673cae | 254 | |
f67539c2 | 255 | self.fs.set_joinable() |
7c673cae FG |
256 | self.fs.wait_for_daemons() |
257 | ||
258 | with self.assert_cluster_log("inode table repaired"): | |
b3b6e05e | 259 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 260 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
261 | self.assertEqual(out_json["return_code"], 0) |
262 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 263 | |
f67539c2 | 264 | self.fs.fail() |
7c673cae FG |
265 | table_text = self.fs.table_tool(["0", "show", "inode"]) |
266 | table = json.loads(table_text) | |
267 | self.assertGreater( | |
268 | table['0']['data']['inotable']['free'][0]['start'], | |
269 | inos['./file3_sixmegs']) | |
270 | ||
271 | def test_backtrace_repair(self): | |
272 | """ | |
273 | That the MDS can repair an inodes backtrace in the data pool | |
274 | if it is found to be damaged. | |
275 | """ | |
276 | # Create a file for subsequent checks | |
277 | self.mount_a.run_shell(["mkdir", "parent_a"]) | |
278 | self.mount_a.run_shell(["touch", "parent_a/alpha"]) | |
279 | file_ino = self.mount_a.path_to_ino("parent_a/alpha") | |
280 | ||
281 | # That backtrace and layout are written after initial flush | |
282 | self.fs.mds_asok(["flush", "journal"]) | |
283 | backtrace = self.fs.read_backtrace(file_ino) | |
284 | self.assertEqual(['alpha', 'parent_a'], | |
285 | [a['dname'] for a in backtrace['ancestors']]) | |
286 | ||
287 | # Go corrupt the backtrace | |
288 | self.fs._write_data_xattr(file_ino, "parent", | |
289 | "oh i'm sorry did i overwrite your xattr?") | |
290 | ||
291 | with self.assert_cluster_log("bad backtrace on inode"): | |
b3b6e05e | 292 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 293 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
294 | self.assertEqual(out_json["return_code"], 0) |
295 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
296 | ||
7c673cae FG |
297 | self.fs.mds_asok(["flush", "journal"]) |
298 | backtrace = self.fs.read_backtrace(file_ino) | |
299 | self.assertEqual(['alpha', 'parent_a'], | |
300 | [a['dname'] for a in backtrace['ancestors']]) |