]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_forward_scrub.py
19eb4afaf2a53f6015e36041a8fdccc8329c43a3
[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py
1
2 """
3 Test that the forward scrub functionality can traverse metadata and apply
4 requested tags, on well formed metadata.
5
6 This is *not* the real testing for forward scrub, which will need to test
7 how the functionality responds to damaged metadata.
8
9 """
10 import logging
11 import json
12
13 from collections import namedtuple
14 from io import BytesIO
15 from textwrap import dedent
16
17 from teuthology.orchestra.run import CommandFailedError
18 from tasks.cephfs.cephfs_test_case import CephFSTestCase
19
20 import struct
21
22 log = logging.getLogger(__name__)
23
24
25 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
26
27
28 class TestForwardScrub(CephFSTestCase):
29 MDSS_REQUIRED = 1
30
31 def _read_str_xattr(self, pool, obj, attr):
32 """
33 Read a ceph-encoded string from a rados xattr
34 """
35 output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
36 stdout=BytesIO()).stdout.getvalue()
37 strlen = struct.unpack('i', output[0:4])[0]
38 return output[4:(4 + strlen)].decode(encoding='ascii')
39
40 def _get_paths_to_ino(self):
41 inos = {}
42 p = self.mount_a.run_shell(["find", "./"])
43 paths = p.stdout.getvalue().strip().split()
44 for path in paths:
45 inos[path] = self.mount_a.path_to_ino(path)
46
47 return inos
48
49 def test_apply_tag(self):
50 self.mount_a.run_shell(["mkdir", "parentdir"])
51 self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
52 self.mount_a.run_shell(["touch", "rfile"])
53 self.mount_a.run_shell(["touch", "parentdir/pfile"])
54 self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
55
56 # Build a structure mapping path to inode, as we will later want
57 # to check object by object and objects are named after ino number
58 inos = self._get_paths_to_ino()
59
60 # Flush metadata: this is a friendly test of forward scrub so we're skipping
61 # the part where it's meant to cope with dirty metadata
62 self.mount_a.umount_wait()
63 self.fs.mds_asok(["flush", "journal"])
64
65 tag = "mytag"
66
67 # Execute tagging forward scrub
68 self.fs.mds_asok(["tag", "path", "/parentdir", tag])
69 # Wait for completion
70 import time
71 time.sleep(10)
72 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
73 # watch that instead
74
75 # Check that dirs were tagged
76 for dirpath in ["./parentdir", "./parentdir/childdir"]:
77 self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
78
79 # Check that files were tagged
80 for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
81 self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
82
83 # This guy wasn't in the tag path, shouldn't have been tagged
84 self.assertUntagged(inos["./rfile"])
85
86 def assertUntagged(self, ino):
87 file_obj_name = "{0:x}.00000000".format(ino)
88 with self.assertRaises(CommandFailedError):
89 self._read_str_xattr(
90 self.fs.get_data_pool_name(),
91 file_obj_name,
92 "scrub_tag"
93 )
94
95 def assertTagged(self, ino, tag, pool):
96 file_obj_name = "{0:x}.00000000".format(ino)
97 wrote = self._read_str_xattr(
98 pool,
99 file_obj_name,
100 "scrub_tag"
101 )
102 self.assertEqual(wrote, tag)
103
104 def _validate_linkage(self, expected):
105 inos = self._get_paths_to_ino()
106 try:
107 self.assertDictEqual(inos, expected)
108 except AssertionError:
109 log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
110 log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
111 raise
112
113 def test_orphan_scan(self):
114 # Create some files whose metadata we will flush
115 self.mount_a.run_python(dedent("""
116 import os
117 mount_point = "{mount_point}"
118 parent = os.path.join(mount_point, "parent")
119 os.mkdir(parent)
120 flushed = os.path.join(parent, "flushed")
121 os.mkdir(flushed)
122 for f in ["alpha", "bravo", "charlie"]:
123 open(os.path.join(flushed, f), 'w').write(f)
124 """.format(mount_point=self.mount_a.mountpoint)))
125
126 inos = self._get_paths_to_ino()
127
128 # Flush journal
129 # Umount before flush to avoid cap releases putting
130 # things we don't want in the journal later.
131 self.mount_a.umount_wait()
132 self.fs.mds_asok(["flush", "journal"])
133
134 # Create a new inode that's just in the log, i.e. would
135 # look orphaned to backward scan if backward scan wisnae
136 # respectin' tha scrub_tag xattr.
137 self.mount_a.mount_wait()
138 self.mount_a.run_shell(["mkdir", "parent/unflushed"])
139 self.mount_a.run_shell(["dd", "if=/dev/urandom",
140 "of=./parent/unflushed/jfile",
141 "bs=1M", "count=8"])
142 inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
143 inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
144 self.mount_a.umount_wait()
145
146 # Orphan an inode by deleting its dentry
147 # Our victim will be.... bravo.
148 self.mount_a.umount_wait()
149 self.fs.fail()
150 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
151 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
152 frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
153 self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
154
155 self.fs.set_joinable()
156 self.fs.wait_for_daemons()
157
158 # See that the orphaned file is indeed missing from a client's POV
159 self.mount_a.mount_wait()
160 damaged_state = self._get_paths_to_ino()
161 self.assertNotIn("./parent/flushed/bravo", damaged_state)
162 self.mount_a.umount_wait()
163
164 # Run a tagging forward scrub
165 tag = "mytag123"
166 self.fs.mds_asok(["tag", "path", "/parent", tag])
167
168 # See that the orphan wisnae tagged
169 self.assertUntagged(inos['./parent/flushed/bravo'])
170
171 # See that the flushed-metadata-and-still-present files are tagged
172 self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
173 self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
174
175 # See that journalled-but-not-flushed file *was* tagged
176 self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
177
178 # Run cephfs-data-scan targeting only orphans
179 self.fs.fail()
180 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
181 self.fs.data_scan([
182 "scan_inodes",
183 "--filter-tag", tag,
184 self.fs.get_data_pool_name()
185 ])
186
187 # After in-place injection stats should be kosher again
188 self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
189 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
190
191 # And we should have all the same linkage we started with,
192 # and no lost+found, and no extra inodes!
193 self.fs.set_joinable()
194 self.fs.wait_for_daemons()
195 self.mount_a.mount_wait()
196 self._validate_linkage(inos)
197
198 def _stash_inotable(self):
199 # Get all active ranks
200 ranks = self.fs.get_all_mds_rank()
201
202 inotable_dict = {}
203 for rank in ranks:
204 inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
205 print("Trying to fetch inotable object: " + inotable_oid)
206
207 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
208 inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
209 inotable_dict[inotable_oid] = inotable_raw
210 return inotable_dict
211
212 def test_inotable_sync(self):
213 self.mount_a.write_n_mb("file1_sixmegs", 6)
214
215 # Flush journal
216 self.mount_a.umount_wait()
217 self.fs.mds_asok(["flush", "journal"])
218
219 inotable_copy = self._stash_inotable()
220
221 self.mount_a.mount_wait()
222
223 self.mount_a.write_n_mb("file2_sixmegs", 6)
224 self.mount_a.write_n_mb("file3_sixmegs", 6)
225
226 inos = self._get_paths_to_ino()
227
228 # Flush journal
229 self.mount_a.umount_wait()
230 self.fs.mds_asok(["flush", "journal"])
231
232 self.mount_a.umount_wait()
233
234 with self.assert_cluster_log("inode table repaired", invert_match=True):
235 out_json = self.fs.run_scrub(["start", "/", "repair", "recursive"])
236 self.assertNotEqual(out_json, None)
237 self.assertEqual(out_json["return_code"], 0)
238 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
239
240 self.fs.fail()
241
242 # Truncate the journal (to ensure the inotable on disk
243 # is all that will be in the InoTable in memory)
244
245 self.fs.journal_tool(["event", "splice",
246 "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
247
248 self.fs.journal_tool(["event", "splice",
249 "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
250
251 # Revert to old inotable.
252 for key, value in inotable_copy.items():
253 self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
254
255 self.fs.set_joinable()
256 self.fs.wait_for_daemons()
257
258 with self.assert_cluster_log("inode table repaired"):
259 out_json = self.fs.run_scrub(["start", "/", "repair", "recursive"])
260 self.assertNotEqual(out_json, None)
261 self.assertEqual(out_json["return_code"], 0)
262 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
263
264 self.fs.fail()
265 table_text = self.fs.table_tool(["0", "show", "inode"])
266 table = json.loads(table_text)
267 self.assertGreater(
268 table['0']['data']['inotable']['free'][0]['start'],
269 inos['./file3_sixmegs'])
270
271 def test_backtrace_repair(self):
272 """
273 That the MDS can repair an inodes backtrace in the data pool
274 if it is found to be damaged.
275 """
276 # Create a file for subsequent checks
277 self.mount_a.run_shell(["mkdir", "parent_a"])
278 self.mount_a.run_shell(["touch", "parent_a/alpha"])
279 file_ino = self.mount_a.path_to_ino("parent_a/alpha")
280
281 # That backtrace and layout are written after initial flush
282 self.fs.mds_asok(["flush", "journal"])
283 backtrace = self.fs.read_backtrace(file_ino)
284 self.assertEqual(['alpha', 'parent_a'],
285 [a['dname'] for a in backtrace['ancestors']])
286
287 # Go corrupt the backtrace
288 self.fs._write_data_xattr(file_ino, "parent",
289 "oh i'm sorry did i overwrite your xattr?")
290
291 with self.assert_cluster_log("bad backtrace on inode"):
292 out_json = self.fs.run_scrub(["start", "/", "repair", "recursive"])
293 self.assertNotEqual(out_json, None)
294 self.assertEqual(out_json["return_code"], 0)
295 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
296
297 self.fs.mds_asok(["flush", "journal"])
298 backtrace = self.fs.read_backtrace(file_ino)
299 self.assertEqual(['alpha', 'parent_a'],
300 [a['dname'] for a in backtrace['ancestors']])