]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Test that the forward scrub functionality can traverse metadata and apply | |
4 | requested tags, on well formed metadata. | |
5 | ||
6 | This is *not* the real testing for forward scrub, which will need to test | |
7 | how the functionality responds to damaged metadata. | |
8 | ||
9 | """ | |
7c673cae | 10 | import logging |
f67539c2 | 11 | import json |
e306af50 | 12 | |
7c673cae | 13 | from collections import namedtuple |
e306af50 | 14 | from io import BytesIO |
7c673cae FG |
15 | from textwrap import dedent |
16 | ||
20effc67 | 17 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
18 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
19 | ||
20 | import struct | |
21 | ||
22 | log = logging.getLogger(__name__) | |
23 | ||
24 | ||
25 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
26 | ||
27 | ||
28 | class TestForwardScrub(CephFSTestCase): | |
29 | MDSS_REQUIRED = 1 | |
30 | ||
31 | def _read_str_xattr(self, pool, obj, attr): | |
32 | """ | |
33 | Read a ceph-encoded string from a rados xattr | |
34 | """ | |
f67539c2 TL |
35 | output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool, |
36 | stdout=BytesIO()).stdout.getvalue() | |
7c673cae | 37 | strlen = struct.unpack('i', output[0:4])[0] |
f67539c2 | 38 | return output[4:(4 + strlen)].decode(encoding='ascii') |
7c673cae FG |
39 | |
40 | def _get_paths_to_ino(self): | |
41 | inos = {} | |
42 | p = self.mount_a.run_shell(["find", "./"]) | |
43 | paths = p.stdout.getvalue().strip().split() | |
44 | for path in paths: | |
45 | inos[path] = self.mount_a.path_to_ino(path) | |
46 | ||
47 | return inos | |
48 | ||
49 | def test_apply_tag(self): | |
50 | self.mount_a.run_shell(["mkdir", "parentdir"]) | |
51 | self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) | |
52 | self.mount_a.run_shell(["touch", "rfile"]) | |
53 | self.mount_a.run_shell(["touch", "parentdir/pfile"]) | |
54 | self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) | |
55 | ||
56 | # Build a structure mapping path to inode, as we will later want | |
57 | # to check object by object and objects are named after ino number | |
58 | inos = self._get_paths_to_ino() | |
59 | ||
60 | # Flush metadata: this is a friendly test of forward scrub so we're skipping | |
61 | # the part where it's meant to cope with dirty metadata | |
62 | self.mount_a.umount_wait() | |
63 | self.fs.mds_asok(["flush", "journal"]) | |
64 | ||
65 | tag = "mytag" | |
66 | ||
67 | # Execute tagging forward scrub | |
68 | self.fs.mds_asok(["tag", "path", "/parentdir", tag]) | |
69 | # Wait for completion | |
70 | import time | |
71 | time.sleep(10) | |
72 | # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll | |
73 | # watch that instead | |
74 | ||
75 | # Check that dirs were tagged | |
76 | for dirpath in ["./parentdir", "./parentdir/childdir"]: | |
77 | self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) | |
78 | ||
79 | # Check that files were tagged | |
80 | for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: | |
81 | self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) | |
82 | ||
83 | # This guy wasn't in the tag path, shouldn't have been tagged | |
84 | self.assertUntagged(inos["./rfile"]) | |
85 | ||
86 | def assertUntagged(self, ino): | |
87 | file_obj_name = "{0:x}.00000000".format(ino) | |
88 | with self.assertRaises(CommandFailedError): | |
89 | self._read_str_xattr( | |
90 | self.fs.get_data_pool_name(), | |
91 | file_obj_name, | |
92 | "scrub_tag" | |
93 | ) | |
94 | ||
95 | def assertTagged(self, ino, tag, pool): | |
96 | file_obj_name = "{0:x}.00000000".format(ino) | |
97 | wrote = self._read_str_xattr( | |
98 | pool, | |
99 | file_obj_name, | |
100 | "scrub_tag" | |
101 | ) | |
102 | self.assertEqual(wrote, tag) | |
103 | ||
104 | def _validate_linkage(self, expected): | |
105 | inos = self._get_paths_to_ino() | |
106 | try: | |
107 | self.assertDictEqual(inos, expected) | |
108 | except AssertionError: | |
109 | log.error("Expected: {0}".format(json.dumps(expected, indent=2))) | |
110 | log.error("Actual: {0}".format(json.dumps(inos, indent=2))) | |
111 | raise | |
112 | ||
113 | def test_orphan_scan(self): | |
114 | # Create some files whose metadata we will flush | |
115 | self.mount_a.run_python(dedent(""" | |
116 | import os | |
117 | mount_point = "{mount_point}" | |
118 | parent = os.path.join(mount_point, "parent") | |
119 | os.mkdir(parent) | |
120 | flushed = os.path.join(parent, "flushed") | |
121 | os.mkdir(flushed) | |
122 | for f in ["alpha", "bravo", "charlie"]: | |
123 | open(os.path.join(flushed, f), 'w').write(f) | |
124 | """.format(mount_point=self.mount_a.mountpoint))) | |
125 | ||
126 | inos = self._get_paths_to_ino() | |
127 | ||
128 | # Flush journal | |
129 | # Umount before flush to avoid cap releases putting | |
130 | # things we don't want in the journal later. | |
131 | self.mount_a.umount_wait() | |
1e59de90 | 132 | self.fs.flush() |
7c673cae FG |
133 | |
134 | # Create a new inode that's just in the log, i.e. would | |
135 | # look orphaned to backward scan if backward scan wisnae | |
136 | # respectin' tha scrub_tag xattr. | |
e306af50 | 137 | self.mount_a.mount_wait() |
7c673cae FG |
138 | self.mount_a.run_shell(["mkdir", "parent/unflushed"]) |
139 | self.mount_a.run_shell(["dd", "if=/dev/urandom", | |
140 | "of=./parent/unflushed/jfile", | |
141 | "bs=1M", "count=8"]) | |
142 | inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") | |
143 | inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") | |
144 | self.mount_a.umount_wait() | |
145 | ||
146 | # Orphan an inode by deleting its dentry | |
147 | # Our victim will be.... bravo. | |
148 | self.mount_a.umount_wait() | |
f67539c2 | 149 | self.fs.fail() |
7c673cae FG |
150 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) |
151 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
152 | frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) | |
f67539c2 | 153 | self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"]) |
7c673cae | 154 | |
f67539c2 | 155 | self.fs.set_joinable() |
7c673cae FG |
156 | self.fs.wait_for_daemons() |
157 | ||
158 | # See that the orphaned file is indeed missing from a client's POV | |
e306af50 | 159 | self.mount_a.mount_wait() |
7c673cae FG |
160 | damaged_state = self._get_paths_to_ino() |
161 | self.assertNotIn("./parent/flushed/bravo", damaged_state) | |
162 | self.mount_a.umount_wait() | |
163 | ||
164 | # Run a tagging forward scrub | |
165 | tag = "mytag123" | |
1e59de90 | 166 | self.fs.rank_asok(["tag", "path", "/parent", tag]) |
7c673cae FG |
167 | |
168 | # See that the orphan wisnae tagged | |
169 | self.assertUntagged(inos['./parent/flushed/bravo']) | |
170 | ||
171 | # See that the flushed-metadata-and-still-present files are tagged | |
172 | self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) | |
173 | self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) | |
174 | ||
175 | # See that journalled-but-not-flushed file *was* tagged | |
176 | self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) | |
177 | ||
1e59de90 TL |
178 | # okay, now we are going to run cephfs-data-scan. It's necessary to |
179 | # have a clean journal otherwise replay will blowup on mismatched | |
180 | # inotable versions (due to scan_links) | |
181 | self.fs.flush() | |
f67539c2 | 182 | self.fs.fail() |
1e59de90 TL |
183 | self.fs.journal_tool(["journal", "reset", "--force"], 0) |
184 | ||
185 | # Run cephfs-data-scan targeting only orphans | |
7c673cae FG |
186 | self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) |
187 | self.fs.data_scan([ | |
188 | "scan_inodes", | |
189 | "--filter-tag", tag, | |
190 | self.fs.get_data_pool_name() | |
191 | ]) | |
1e59de90 | 192 | self.fs.data_scan(["scan_links"]) |
7c673cae FG |
193 | |
194 | # After in-place injection stats should be kosher again | |
195 | self.fs.set_ceph_conf('mds', 'mds verify scatter', True) | |
196 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) | |
197 | ||
198 | # And we should have all the same linkage we started with, | |
199 | # and no lost+found, and no extra inodes! | |
f67539c2 | 200 | self.fs.set_joinable() |
7c673cae | 201 | self.fs.wait_for_daemons() |
e306af50 | 202 | self.mount_a.mount_wait() |
7c673cae FG |
203 | self._validate_linkage(inos) |
204 | ||
205 | def _stash_inotable(self): | |
206 | # Get all active ranks | |
207 | ranks = self.fs.get_all_mds_rank() | |
208 | ||
209 | inotable_dict = {} | |
210 | for rank in ranks: | |
211 | inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" | |
9f95a23c | 212 | print("Trying to fetch inotable object: " + inotable_oid) |
7c673cae FG |
213 | |
214 | #self.fs.get_metadata_object("InoTable", "mds0_inotable") | |
f67539c2 | 215 | inotable_raw = self.fs.radosmo(['get', inotable_oid, '-']) |
7c673cae FG |
216 | inotable_dict[inotable_oid] = inotable_raw |
217 | return inotable_dict | |
218 | ||
219 | def test_inotable_sync(self): | |
220 | self.mount_a.write_n_mb("file1_sixmegs", 6) | |
221 | ||
222 | # Flush journal | |
223 | self.mount_a.umount_wait() | |
224 | self.fs.mds_asok(["flush", "journal"]) | |
225 | ||
226 | inotable_copy = self._stash_inotable() | |
227 | ||
e306af50 | 228 | self.mount_a.mount_wait() |
7c673cae FG |
229 | |
230 | self.mount_a.write_n_mb("file2_sixmegs", 6) | |
231 | self.mount_a.write_n_mb("file3_sixmegs", 6) | |
232 | ||
233 | inos = self._get_paths_to_ino() | |
234 | ||
235 | # Flush journal | |
236 | self.mount_a.umount_wait() | |
237 | self.fs.mds_asok(["flush", "journal"]) | |
238 | ||
239 | self.mount_a.umount_wait() | |
240 | ||
241 | with self.assert_cluster_log("inode table repaired", invert_match=True): | |
b3b6e05e | 242 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 243 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
244 | self.assertEqual(out_json["return_code"], 0) |
245 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 246 | |
f67539c2 | 247 | self.fs.fail() |
7c673cae FG |
248 | |
249 | # Truncate the journal (to ensure the inotable on disk | |
250 | # is all that will be in the InoTable in memory) | |
251 | ||
252 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 253 | "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) |
7c673cae FG |
254 | |
255 | self.fs.journal_tool(["event", "splice", | |
f64942e4 | 256 | "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) |
7c673cae FG |
257 | |
258 | # Revert to old inotable. | |
9f95a23c | 259 | for key, value in inotable_copy.items(): |
f67539c2 | 260 | self.fs.radosm(["put", key, "-"], stdin=BytesIO(value)) |
7c673cae | 261 | |
f67539c2 | 262 | self.fs.set_joinable() |
7c673cae FG |
263 | self.fs.wait_for_daemons() |
264 | ||
265 | with self.assert_cluster_log("inode table repaired"): | |
b3b6e05e | 266 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 267 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
268 | self.assertEqual(out_json["return_code"], 0) |
269 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
7c673cae | 270 | |
f67539c2 | 271 | self.fs.fail() |
7c673cae FG |
272 | table_text = self.fs.table_tool(["0", "show", "inode"]) |
273 | table = json.loads(table_text) | |
274 | self.assertGreater( | |
275 | table['0']['data']['inotable']['free'][0]['start'], | |
276 | inos['./file3_sixmegs']) | |
277 | ||
278 | def test_backtrace_repair(self): | |
279 | """ | |
280 | That the MDS can repair an inodes backtrace in the data pool | |
281 | if it is found to be damaged. | |
282 | """ | |
283 | # Create a file for subsequent checks | |
284 | self.mount_a.run_shell(["mkdir", "parent_a"]) | |
285 | self.mount_a.run_shell(["touch", "parent_a/alpha"]) | |
286 | file_ino = self.mount_a.path_to_ino("parent_a/alpha") | |
287 | ||
288 | # That backtrace and layout are written after initial flush | |
289 | self.fs.mds_asok(["flush", "journal"]) | |
290 | backtrace = self.fs.read_backtrace(file_ino) | |
291 | self.assertEqual(['alpha', 'parent_a'], | |
292 | [a['dname'] for a in backtrace['ancestors']]) | |
293 | ||
294 | # Go corrupt the backtrace | |
295 | self.fs._write_data_xattr(file_ino, "parent", | |
296 | "oh i'm sorry did i overwrite your xattr?") | |
297 | ||
298 | with self.assert_cluster_log("bad backtrace on inode"): | |
b3b6e05e | 299 | out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) |
1adf2230 | 300 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
301 | self.assertEqual(out_json["return_code"], 0) |
302 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
303 | ||
7c673cae FG |
304 | self.fs.mds_asok(["flush", "journal"]) |
305 | backtrace = self.fs.read_backtrace(file_ino) | |
306 | self.assertEqual(['alpha', 'parent_a'], | |
307 | [a['dname'] for a in backtrace['ancestors']]) |