]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_forward_scrub.py
update ceph source to reef 18.2.0
[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py
CommitLineData
7c673cae
FG
1
2"""
3Test that the forward scrub functionality can traverse metadata and apply
4requested tags, on well formed metadata.
5
6This is *not* the real testing for forward scrub, which will need to test
7how the functionality responds to damaged metadata.
8
9"""
7c673cae 10import logging
f67539c2 11import json
e306af50 12
7c673cae 13from collections import namedtuple
e306af50 14from io import BytesIO
7c673cae
FG
15from textwrap import dedent
16
20effc67 17from teuthology.exceptions import CommandFailedError
7c673cae
FG
18from tasks.cephfs.cephfs_test_case import CephFSTestCase
19
20import struct
21
22log = logging.getLogger(__name__)
23
24
25ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
26
27
28class TestForwardScrub(CephFSTestCase):
29 MDSS_REQUIRED = 1
30
31 def _read_str_xattr(self, pool, obj, attr):
32 """
33 Read a ceph-encoded string from a rados xattr
34 """
f67539c2
TL
35 output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
36 stdout=BytesIO()).stdout.getvalue()
7c673cae 37 strlen = struct.unpack('i', output[0:4])[0]
f67539c2 38 return output[4:(4 + strlen)].decode(encoding='ascii')
7c673cae
FG
39
40 def _get_paths_to_ino(self):
41 inos = {}
42 p = self.mount_a.run_shell(["find", "./"])
43 paths = p.stdout.getvalue().strip().split()
44 for path in paths:
45 inos[path] = self.mount_a.path_to_ino(path)
46
47 return inos
48
49 def test_apply_tag(self):
50 self.mount_a.run_shell(["mkdir", "parentdir"])
51 self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
52 self.mount_a.run_shell(["touch", "rfile"])
53 self.mount_a.run_shell(["touch", "parentdir/pfile"])
54 self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
55
56 # Build a structure mapping path to inode, as we will later want
57 # to check object by object and objects are named after ino number
58 inos = self._get_paths_to_ino()
59
60 # Flush metadata: this is a friendly test of forward scrub so we're skipping
61 # the part where it's meant to cope with dirty metadata
62 self.mount_a.umount_wait()
63 self.fs.mds_asok(["flush", "journal"])
64
65 tag = "mytag"
66
67 # Execute tagging forward scrub
68 self.fs.mds_asok(["tag", "path", "/parentdir", tag])
69 # Wait for completion
70 import time
71 time.sleep(10)
72 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
73 # watch that instead
74
75 # Check that dirs were tagged
76 for dirpath in ["./parentdir", "./parentdir/childdir"]:
77 self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
78
79 # Check that files were tagged
80 for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
81 self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
82
83 # This guy wasn't in the tag path, shouldn't have been tagged
84 self.assertUntagged(inos["./rfile"])
85
86 def assertUntagged(self, ino):
87 file_obj_name = "{0:x}.00000000".format(ino)
88 with self.assertRaises(CommandFailedError):
89 self._read_str_xattr(
90 self.fs.get_data_pool_name(),
91 file_obj_name,
92 "scrub_tag"
93 )
94
95 def assertTagged(self, ino, tag, pool):
96 file_obj_name = "{0:x}.00000000".format(ino)
97 wrote = self._read_str_xattr(
98 pool,
99 file_obj_name,
100 "scrub_tag"
101 )
102 self.assertEqual(wrote, tag)
103
104 def _validate_linkage(self, expected):
105 inos = self._get_paths_to_ino()
106 try:
107 self.assertDictEqual(inos, expected)
108 except AssertionError:
109 log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
110 log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
111 raise
112
113 def test_orphan_scan(self):
114 # Create some files whose metadata we will flush
115 self.mount_a.run_python(dedent("""
116 import os
117 mount_point = "{mount_point}"
118 parent = os.path.join(mount_point, "parent")
119 os.mkdir(parent)
120 flushed = os.path.join(parent, "flushed")
121 os.mkdir(flushed)
122 for f in ["alpha", "bravo", "charlie"]:
123 open(os.path.join(flushed, f), 'w').write(f)
124 """.format(mount_point=self.mount_a.mountpoint)))
125
126 inos = self._get_paths_to_ino()
127
128 # Flush journal
129 # Umount before flush to avoid cap releases putting
130 # things we don't want in the journal later.
131 self.mount_a.umount_wait()
1e59de90 132 self.fs.flush()
7c673cae
FG
133
134 # Create a new inode that's just in the log, i.e. would
135 # look orphaned to backward scan if backward scan wisnae
136 # respectin' tha scrub_tag xattr.
e306af50 137 self.mount_a.mount_wait()
7c673cae
FG
138 self.mount_a.run_shell(["mkdir", "parent/unflushed"])
139 self.mount_a.run_shell(["dd", "if=/dev/urandom",
140 "of=./parent/unflushed/jfile",
141 "bs=1M", "count=8"])
142 inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
143 inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
144 self.mount_a.umount_wait()
145
146 # Orphan an inode by deleting its dentry
147 # Our victim will be.... bravo.
148 self.mount_a.umount_wait()
f67539c2 149 self.fs.fail()
7c673cae
FG
150 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
151 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
152 frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
f67539c2 153 self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
7c673cae 154
f67539c2 155 self.fs.set_joinable()
7c673cae
FG
156 self.fs.wait_for_daemons()
157
158 # See that the orphaned file is indeed missing from a client's POV
e306af50 159 self.mount_a.mount_wait()
7c673cae
FG
160 damaged_state = self._get_paths_to_ino()
161 self.assertNotIn("./parent/flushed/bravo", damaged_state)
162 self.mount_a.umount_wait()
163
164 # Run a tagging forward scrub
165 tag = "mytag123"
1e59de90 166 self.fs.rank_asok(["tag", "path", "/parent", tag])
7c673cae
FG
167
168 # See that the orphan wisnae tagged
169 self.assertUntagged(inos['./parent/flushed/bravo'])
170
171 # See that the flushed-metadata-and-still-present files are tagged
172 self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
173 self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
174
175 # See that journalled-but-not-flushed file *was* tagged
176 self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
177
1e59de90
TL
178 # okay, now we are going to run cephfs-data-scan. It's necessary to
179 # have a clean journal otherwise replay will blowup on mismatched
180 # inotable versions (due to scan_links)
181 self.fs.flush()
f67539c2 182 self.fs.fail()
1e59de90
TL
183 self.fs.journal_tool(["journal", "reset", "--force"], 0)
184
185 # Run cephfs-data-scan targeting only orphans
7c673cae
FG
186 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
187 self.fs.data_scan([
188 "scan_inodes",
189 "--filter-tag", tag,
190 self.fs.get_data_pool_name()
191 ])
1e59de90 192 self.fs.data_scan(["scan_links"])
7c673cae
FG
193
194 # After in-place injection stats should be kosher again
195 self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
196 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
197
198 # And we should have all the same linkage we started with,
199 # and no lost+found, and no extra inodes!
f67539c2 200 self.fs.set_joinable()
7c673cae 201 self.fs.wait_for_daemons()
e306af50 202 self.mount_a.mount_wait()
7c673cae
FG
203 self._validate_linkage(inos)
204
205 def _stash_inotable(self):
206 # Get all active ranks
207 ranks = self.fs.get_all_mds_rank()
208
209 inotable_dict = {}
210 for rank in ranks:
211 inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
9f95a23c 212 print("Trying to fetch inotable object: " + inotable_oid)
7c673cae
FG
213
214 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
f67539c2 215 inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
7c673cae
FG
216 inotable_dict[inotable_oid] = inotable_raw
217 return inotable_dict
218
219 def test_inotable_sync(self):
220 self.mount_a.write_n_mb("file1_sixmegs", 6)
221
222 # Flush journal
223 self.mount_a.umount_wait()
224 self.fs.mds_asok(["flush", "journal"])
225
226 inotable_copy = self._stash_inotable()
227
e306af50 228 self.mount_a.mount_wait()
7c673cae
FG
229
230 self.mount_a.write_n_mb("file2_sixmegs", 6)
231 self.mount_a.write_n_mb("file3_sixmegs", 6)
232
233 inos = self._get_paths_to_ino()
234
235 # Flush journal
236 self.mount_a.umount_wait()
237 self.fs.mds_asok(["flush", "journal"])
238
239 self.mount_a.umount_wait()
240
241 with self.assert_cluster_log("inode table repaired", invert_match=True):
b3b6e05e 242 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 243 self.assertNotEqual(out_json, None)
f67539c2
TL
244 self.assertEqual(out_json["return_code"], 0)
245 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 246
f67539c2 247 self.fs.fail()
7c673cae
FG
248
249 # Truncate the journal (to ensure the inotable on disk
250 # is all that will be in the InoTable in memory)
251
252 self.fs.journal_tool(["event", "splice",
f64942e4 253 "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
7c673cae
FG
254
255 self.fs.journal_tool(["event", "splice",
f64942e4 256 "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
7c673cae
FG
257
258 # Revert to old inotable.
9f95a23c 259 for key, value in inotable_copy.items():
f67539c2 260 self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
7c673cae 261
f67539c2 262 self.fs.set_joinable()
7c673cae
FG
263 self.fs.wait_for_daemons()
264
265 with self.assert_cluster_log("inode table repaired"):
b3b6e05e 266 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 267 self.assertNotEqual(out_json, None)
f67539c2
TL
268 self.assertEqual(out_json["return_code"], 0)
269 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 270
f67539c2 271 self.fs.fail()
7c673cae
FG
272 table_text = self.fs.table_tool(["0", "show", "inode"])
273 table = json.loads(table_text)
274 self.assertGreater(
275 table['0']['data']['inotable']['free'][0]['start'],
276 inos['./file3_sixmegs'])
277
278 def test_backtrace_repair(self):
279 """
280 That the MDS can repair an inodes backtrace in the data pool
281 if it is found to be damaged.
282 """
283 # Create a file for subsequent checks
284 self.mount_a.run_shell(["mkdir", "parent_a"])
285 self.mount_a.run_shell(["touch", "parent_a/alpha"])
286 file_ino = self.mount_a.path_to_ino("parent_a/alpha")
287
288 # That backtrace and layout are written after initial flush
289 self.fs.mds_asok(["flush", "journal"])
290 backtrace = self.fs.read_backtrace(file_ino)
291 self.assertEqual(['alpha', 'parent_a'],
292 [a['dname'] for a in backtrace['ancestors']])
293
294 # Go corrupt the backtrace
295 self.fs._write_data_xattr(file_ino, "parent",
296 "oh i'm sorry did i overwrite your xattr?")
297
298 with self.assert_cluster_log("bad backtrace on inode"):
b3b6e05e 299 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 300 self.assertNotEqual(out_json, None)
f67539c2
TL
301 self.assertEqual(out_json["return_code"], 0)
302 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
303
7c673cae
FG
304 self.fs.mds_asok(["flush", "journal"])
305 backtrace = self.fs.read_backtrace(file_ino)
306 self.assertEqual(['alpha', 'parent_a'],
307 [a['dname'] for a in backtrace['ancestors']])