]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Test that the forward scrub functionality can traverse metadata and apply | |
4 | requested tags, on well formed metadata. | |
5 | ||
6 | This is *not* the real testing for forward scrub, which will need to test | |
7 | how the functionality responds to damaged metadata. | |
8 | ||
9 | """ | |
10 | import json | |
11 | ||
12 | import logging | |
13 | from collections import namedtuple | |
14 | from textwrap import dedent | |
15 | ||
16 | from teuthology.orchestra.run import CommandFailedError | |
17 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
18 | ||
19 | import struct | |
20 | ||
21 | log = logging.getLogger(__name__) | |
22 | ||
23 | ||
24 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
25 | ||
26 | ||
27 | class TestForwardScrub(CephFSTestCase): | |
28 | MDSS_REQUIRED = 1 | |
29 | ||
30 | def _read_str_xattr(self, pool, obj, attr): | |
31 | """ | |
32 | Read a ceph-encoded string from a rados xattr | |
33 | """ | |
34 | output = self.fs.rados(["getxattr", obj, attr], pool=pool) | |
35 | strlen = struct.unpack('i', output[0:4])[0] | |
36 | return output[4:(4 + strlen)] | |
37 | ||
38 | def _get_paths_to_ino(self): | |
39 | inos = {} | |
40 | p = self.mount_a.run_shell(["find", "./"]) | |
41 | paths = p.stdout.getvalue().strip().split() | |
42 | for path in paths: | |
43 | inos[path] = self.mount_a.path_to_ino(path) | |
44 | ||
45 | return inos | |
46 | ||
47 | def test_apply_tag(self): | |
48 | self.mount_a.run_shell(["mkdir", "parentdir"]) | |
49 | self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) | |
50 | self.mount_a.run_shell(["touch", "rfile"]) | |
51 | self.mount_a.run_shell(["touch", "parentdir/pfile"]) | |
52 | self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) | |
53 | ||
54 | # Build a structure mapping path to inode, as we will later want | |
55 | # to check object by object and objects are named after ino number | |
56 | inos = self._get_paths_to_ino() | |
57 | ||
58 | # Flush metadata: this is a friendly test of forward scrub so we're skipping | |
59 | # the part where it's meant to cope with dirty metadata | |
60 | self.mount_a.umount_wait() | |
61 | self.fs.mds_asok(["flush", "journal"]) | |
62 | ||
63 | tag = "mytag" | |
64 | ||
65 | # Execute tagging forward scrub | |
66 | self.fs.mds_asok(["tag", "path", "/parentdir", tag]) | |
67 | # Wait for completion | |
68 | import time | |
69 | time.sleep(10) | |
70 | # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll | |
71 | # watch that instead | |
72 | ||
73 | # Check that dirs were tagged | |
74 | for dirpath in ["./parentdir", "./parentdir/childdir"]: | |
75 | self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) | |
76 | ||
77 | # Check that files were tagged | |
78 | for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: | |
79 | self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) | |
80 | ||
81 | # This guy wasn't in the tag path, shouldn't have been tagged | |
82 | self.assertUntagged(inos["./rfile"]) | |
83 | ||
84 | def assertUntagged(self, ino): | |
85 | file_obj_name = "{0:x}.00000000".format(ino) | |
86 | with self.assertRaises(CommandFailedError): | |
87 | self._read_str_xattr( | |
88 | self.fs.get_data_pool_name(), | |
89 | file_obj_name, | |
90 | "scrub_tag" | |
91 | ) | |
92 | ||
93 | def assertTagged(self, ino, tag, pool): | |
94 | file_obj_name = "{0:x}.00000000".format(ino) | |
95 | wrote = self._read_str_xattr( | |
96 | pool, | |
97 | file_obj_name, | |
98 | "scrub_tag" | |
99 | ) | |
100 | self.assertEqual(wrote, tag) | |
101 | ||
102 | def _validate_linkage(self, expected): | |
103 | inos = self._get_paths_to_ino() | |
104 | try: | |
105 | self.assertDictEqual(inos, expected) | |
106 | except AssertionError: | |
107 | log.error("Expected: {0}".format(json.dumps(expected, indent=2))) | |
108 | log.error("Actual: {0}".format(json.dumps(inos, indent=2))) | |
109 | raise | |
110 | ||
111 | def test_orphan_scan(self): | |
112 | # Create some files whose metadata we will flush | |
113 | self.mount_a.run_python(dedent(""" | |
114 | import os | |
115 | mount_point = "{mount_point}" | |
116 | parent = os.path.join(mount_point, "parent") | |
117 | os.mkdir(parent) | |
118 | flushed = os.path.join(parent, "flushed") | |
119 | os.mkdir(flushed) | |
120 | for f in ["alpha", "bravo", "charlie"]: | |
121 | open(os.path.join(flushed, f), 'w').write(f) | |
122 | """.format(mount_point=self.mount_a.mountpoint))) | |
123 | ||
124 | inos = self._get_paths_to_ino() | |
125 | ||
126 | # Flush journal | |
127 | # Umount before flush to avoid cap releases putting | |
128 | # things we don't want in the journal later. | |
129 | self.mount_a.umount_wait() | |
130 | self.fs.mds_asok(["flush", "journal"]) | |
131 | ||
132 | # Create a new inode that's just in the log, i.e. would | |
133 | # look orphaned to backward scan if backward scan wisnae | |
134 | # respectin' tha scrub_tag xattr. | |
135 | self.mount_a.mount() | |
136 | self.mount_a.run_shell(["mkdir", "parent/unflushed"]) | |
137 | self.mount_a.run_shell(["dd", "if=/dev/urandom", | |
138 | "of=./parent/unflushed/jfile", | |
139 | "bs=1M", "count=8"]) | |
140 | inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") | |
141 | inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") | |
142 | self.mount_a.umount_wait() | |
143 | ||
144 | # Orphan an inode by deleting its dentry | |
145 | # Our victim will be.... bravo. | |
146 | self.mount_a.umount_wait() | |
147 | self.fs.mds_stop() | |
148 | self.fs.mds_fail() | |
149 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) | |
150 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
151 | frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) | |
152 | self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"]) | |
153 | ||
154 | self.fs.mds_restart() | |
155 | self.fs.wait_for_daemons() | |
156 | ||
157 | # See that the orphaned file is indeed missing from a client's POV | |
158 | self.mount_a.mount() | |
159 | damaged_state = self._get_paths_to_ino() | |
160 | self.assertNotIn("./parent/flushed/bravo", damaged_state) | |
161 | self.mount_a.umount_wait() | |
162 | ||
163 | # Run a tagging forward scrub | |
164 | tag = "mytag123" | |
165 | self.fs.mds_asok(["tag", "path", "/parent", tag]) | |
166 | ||
167 | # See that the orphan wisnae tagged | |
168 | self.assertUntagged(inos['./parent/flushed/bravo']) | |
169 | ||
170 | # See that the flushed-metadata-and-still-present files are tagged | |
171 | self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) | |
172 | self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) | |
173 | ||
174 | # See that journalled-but-not-flushed file *was* tagged | |
175 | self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) | |
176 | ||
177 | # Run cephfs-data-scan targeting only orphans | |
178 | self.fs.mds_stop() | |
179 | self.fs.mds_fail() | |
180 | self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) | |
181 | self.fs.data_scan([ | |
182 | "scan_inodes", | |
183 | "--filter-tag", tag, | |
184 | self.fs.get_data_pool_name() | |
185 | ]) | |
186 | ||
187 | # After in-place injection stats should be kosher again | |
188 | self.fs.set_ceph_conf('mds', 'mds verify scatter', True) | |
189 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) | |
190 | ||
191 | # And we should have all the same linkage we started with, | |
192 | # and no lost+found, and no extra inodes! | |
193 | self.fs.mds_restart() | |
194 | self.fs.wait_for_daemons() | |
195 | self.mount_a.mount() | |
196 | self._validate_linkage(inos) | |
197 | ||
198 | def _stash_inotable(self): | |
199 | # Get all active ranks | |
200 | ranks = self.fs.get_all_mds_rank() | |
201 | ||
202 | inotable_dict = {} | |
203 | for rank in ranks: | |
204 | inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" | |
205 | print "Trying to fetch inotable object: " + inotable_oid | |
206 | ||
207 | #self.fs.get_metadata_object("InoTable", "mds0_inotable") | |
208 | inotable_raw = self.fs.get_metadata_object_raw(inotable_oid) | |
209 | inotable_dict[inotable_oid] = inotable_raw | |
210 | return inotable_dict | |
211 | ||
212 | def test_inotable_sync(self): | |
213 | self.mount_a.write_n_mb("file1_sixmegs", 6) | |
214 | ||
215 | # Flush journal | |
216 | self.mount_a.umount_wait() | |
217 | self.fs.mds_asok(["flush", "journal"]) | |
218 | ||
219 | inotable_copy = self._stash_inotable() | |
220 | ||
221 | self.mount_a.mount() | |
222 | ||
223 | self.mount_a.write_n_mb("file2_sixmegs", 6) | |
224 | self.mount_a.write_n_mb("file3_sixmegs", 6) | |
225 | ||
226 | inos = self._get_paths_to_ino() | |
227 | ||
228 | # Flush journal | |
229 | self.mount_a.umount_wait() | |
230 | self.fs.mds_asok(["flush", "journal"]) | |
231 | ||
232 | self.mount_a.umount_wait() | |
233 | ||
234 | with self.assert_cluster_log("inode table repaired", invert_match=True): | |
1adf2230 AA |
235 | out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) |
236 | self.assertNotEqual(out_json, None) | |
7c673cae FG |
237 | |
238 | self.mds_cluster.mds_stop() | |
239 | self.mds_cluster.mds_fail() | |
240 | ||
241 | # Truncate the journal (to ensure the inotable on disk | |
242 | # is all that will be in the InoTable in memory) | |
243 | ||
244 | self.fs.journal_tool(["event", "splice", | |
245 | "--inode={0}".format(inos["./file2_sixmegs"]), "summary"]) | |
246 | ||
247 | self.fs.journal_tool(["event", "splice", | |
248 | "--inode={0}".format(inos["./file3_sixmegs"]), "summary"]) | |
249 | ||
250 | # Revert to old inotable. | |
251 | for key, value in inotable_copy.iteritems(): | |
252 | self.fs.put_metadata_object_raw(key, value) | |
253 | ||
254 | self.mds_cluster.mds_restart() | |
255 | self.fs.wait_for_daemons() | |
256 | ||
257 | with self.assert_cluster_log("inode table repaired"): | |
1adf2230 AA |
258 | out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) |
259 | self.assertNotEqual(out_json, None) | |
7c673cae FG |
260 | |
261 | self.mds_cluster.mds_stop() | |
262 | table_text = self.fs.table_tool(["0", "show", "inode"]) | |
263 | table = json.loads(table_text) | |
264 | self.assertGreater( | |
265 | table['0']['data']['inotable']['free'][0]['start'], | |
266 | inos['./file3_sixmegs']) | |
267 | ||
268 | def test_backtrace_repair(self): | |
269 | """ | |
270 | That the MDS can repair an inodes backtrace in the data pool | |
271 | if it is found to be damaged. | |
272 | """ | |
273 | # Create a file for subsequent checks | |
274 | self.mount_a.run_shell(["mkdir", "parent_a"]) | |
275 | self.mount_a.run_shell(["touch", "parent_a/alpha"]) | |
276 | file_ino = self.mount_a.path_to_ino("parent_a/alpha") | |
277 | ||
278 | # That backtrace and layout are written after initial flush | |
279 | self.fs.mds_asok(["flush", "journal"]) | |
280 | backtrace = self.fs.read_backtrace(file_ino) | |
281 | self.assertEqual(['alpha', 'parent_a'], | |
282 | [a['dname'] for a in backtrace['ancestors']]) | |
283 | ||
284 | # Go corrupt the backtrace | |
285 | self.fs._write_data_xattr(file_ino, "parent", | |
286 | "oh i'm sorry did i overwrite your xattr?") | |
287 | ||
288 | with self.assert_cluster_log("bad backtrace on inode"): | |
1adf2230 AA |
289 | out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) |
290 | self.assertNotEqual(out_json, None) | |
7c673cae FG |
291 | self.fs.mds_asok(["flush", "journal"]) |
292 | backtrace = self.fs.read_backtrace(file_ino) | |
293 | self.assertEqual(['alpha', 'parent_a'], | |
294 | [a['dname'] for a in backtrace['ancestors']]) |