]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_forward_scrub.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py
CommitLineData
7c673cae
FG
1
2"""
3Test that the forward scrub functionality can traverse metadata and apply
4requested tags, on well formed metadata.
5
6This is *not* the real testing for forward scrub, which will need to test
7how the functionality responds to damaged metadata.
8
9"""
7c673cae 10import logging
f67539c2 11import json
e306af50 12
7c673cae 13from collections import namedtuple
e306af50 14from io import BytesIO
7c673cae
FG
15from textwrap import dedent
16
20effc67 17from teuthology.exceptions import CommandFailedError
7c673cae
FG
18from tasks.cephfs.cephfs_test_case import CephFSTestCase
19
20import struct
21
22log = logging.getLogger(__name__)
23
24
25ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
26
27
28class TestForwardScrub(CephFSTestCase):
29 MDSS_REQUIRED = 1
30
31 def _read_str_xattr(self, pool, obj, attr):
32 """
33 Read a ceph-encoded string from a rados xattr
34 """
f67539c2
TL
35 output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
36 stdout=BytesIO()).stdout.getvalue()
7c673cae 37 strlen = struct.unpack('i', output[0:4])[0]
f67539c2 38 return output[4:(4 + strlen)].decode(encoding='ascii')
7c673cae
FG
39
40 def _get_paths_to_ino(self):
41 inos = {}
42 p = self.mount_a.run_shell(["find", "./"])
43 paths = p.stdout.getvalue().strip().split()
44 for path in paths:
45 inos[path] = self.mount_a.path_to_ino(path)
46
47 return inos
48
49 def test_apply_tag(self):
50 self.mount_a.run_shell(["mkdir", "parentdir"])
51 self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
52 self.mount_a.run_shell(["touch", "rfile"])
53 self.mount_a.run_shell(["touch", "parentdir/pfile"])
54 self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
55
56 # Build a structure mapping path to inode, as we will later want
57 # to check object by object and objects are named after ino number
58 inos = self._get_paths_to_ino()
59
60 # Flush metadata: this is a friendly test of forward scrub so we're skipping
61 # the part where it's meant to cope with dirty metadata
62 self.mount_a.umount_wait()
63 self.fs.mds_asok(["flush", "journal"])
64
65 tag = "mytag"
66
67 # Execute tagging forward scrub
68 self.fs.mds_asok(["tag", "path", "/parentdir", tag])
69 # Wait for completion
70 import time
71 time.sleep(10)
72 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
73 # watch that instead
74
75 # Check that dirs were tagged
76 for dirpath in ["./parentdir", "./parentdir/childdir"]:
77 self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
78
79 # Check that files were tagged
80 for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
81 self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
82
83 # This guy wasn't in the tag path, shouldn't have been tagged
84 self.assertUntagged(inos["./rfile"])
85
86 def assertUntagged(self, ino):
87 file_obj_name = "{0:x}.00000000".format(ino)
88 with self.assertRaises(CommandFailedError):
89 self._read_str_xattr(
90 self.fs.get_data_pool_name(),
91 file_obj_name,
92 "scrub_tag"
93 )
94
95 def assertTagged(self, ino, tag, pool):
96 file_obj_name = "{0:x}.00000000".format(ino)
97 wrote = self._read_str_xattr(
98 pool,
99 file_obj_name,
100 "scrub_tag"
101 )
102 self.assertEqual(wrote, tag)
103
104 def _validate_linkage(self, expected):
105 inos = self._get_paths_to_ino()
106 try:
107 self.assertDictEqual(inos, expected)
108 except AssertionError:
109 log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
110 log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
111 raise
112
113 def test_orphan_scan(self):
114 # Create some files whose metadata we will flush
115 self.mount_a.run_python(dedent("""
116 import os
117 mount_point = "{mount_point}"
118 parent = os.path.join(mount_point, "parent")
119 os.mkdir(parent)
120 flushed = os.path.join(parent, "flushed")
121 os.mkdir(flushed)
122 for f in ["alpha", "bravo", "charlie"]:
123 open(os.path.join(flushed, f), 'w').write(f)
124 """.format(mount_point=self.mount_a.mountpoint)))
125
126 inos = self._get_paths_to_ino()
127
128 # Flush journal
129 # Umount before flush to avoid cap releases putting
130 # things we don't want in the journal later.
131 self.mount_a.umount_wait()
132 self.fs.mds_asok(["flush", "journal"])
133
134 # Create a new inode that's just in the log, i.e. would
135 # look orphaned to backward scan if backward scan wisnae
136 # respectin' tha scrub_tag xattr.
e306af50 137 self.mount_a.mount_wait()
7c673cae
FG
138 self.mount_a.run_shell(["mkdir", "parent/unflushed"])
139 self.mount_a.run_shell(["dd", "if=/dev/urandom",
140 "of=./parent/unflushed/jfile",
141 "bs=1M", "count=8"])
142 inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
143 inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
144 self.mount_a.umount_wait()
145
146 # Orphan an inode by deleting its dentry
147 # Our victim will be.... bravo.
148 self.mount_a.umount_wait()
f67539c2 149 self.fs.fail()
7c673cae
FG
150 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
151 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
152 frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
f67539c2 153 self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
7c673cae 154
f67539c2 155 self.fs.set_joinable()
7c673cae
FG
156 self.fs.wait_for_daemons()
157
158 # See that the orphaned file is indeed missing from a client's POV
e306af50 159 self.mount_a.mount_wait()
7c673cae
FG
160 damaged_state = self._get_paths_to_ino()
161 self.assertNotIn("./parent/flushed/bravo", damaged_state)
162 self.mount_a.umount_wait()
163
164 # Run a tagging forward scrub
165 tag = "mytag123"
166 self.fs.mds_asok(["tag", "path", "/parent", tag])
167
168 # See that the orphan wisnae tagged
169 self.assertUntagged(inos['./parent/flushed/bravo'])
170
171 # See that the flushed-metadata-and-still-present files are tagged
172 self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
173 self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
174
175 # See that journalled-but-not-flushed file *was* tagged
176 self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
177
178 # Run cephfs-data-scan targeting only orphans
f67539c2 179 self.fs.fail()
7c673cae
FG
180 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
181 self.fs.data_scan([
182 "scan_inodes",
183 "--filter-tag", tag,
184 self.fs.get_data_pool_name()
185 ])
186
187 # After in-place injection stats should be kosher again
188 self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
189 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
190
191 # And we should have all the same linkage we started with,
192 # and no lost+found, and no extra inodes!
f67539c2 193 self.fs.set_joinable()
7c673cae 194 self.fs.wait_for_daemons()
e306af50 195 self.mount_a.mount_wait()
7c673cae
FG
196 self._validate_linkage(inos)
197
198 def _stash_inotable(self):
199 # Get all active ranks
200 ranks = self.fs.get_all_mds_rank()
201
202 inotable_dict = {}
203 for rank in ranks:
204 inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
9f95a23c 205 print("Trying to fetch inotable object: " + inotable_oid)
7c673cae
FG
206
207 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
f67539c2 208 inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
7c673cae
FG
209 inotable_dict[inotable_oid] = inotable_raw
210 return inotable_dict
211
212 def test_inotable_sync(self):
213 self.mount_a.write_n_mb("file1_sixmegs", 6)
214
215 # Flush journal
216 self.mount_a.umount_wait()
217 self.fs.mds_asok(["flush", "journal"])
218
219 inotable_copy = self._stash_inotable()
220
e306af50 221 self.mount_a.mount_wait()
7c673cae
FG
222
223 self.mount_a.write_n_mb("file2_sixmegs", 6)
224 self.mount_a.write_n_mb("file3_sixmegs", 6)
225
226 inos = self._get_paths_to_ino()
227
228 # Flush journal
229 self.mount_a.umount_wait()
230 self.fs.mds_asok(["flush", "journal"])
231
232 self.mount_a.umount_wait()
233
234 with self.assert_cluster_log("inode table repaired", invert_match=True):
b3b6e05e 235 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 236 self.assertNotEqual(out_json, None)
f67539c2
TL
237 self.assertEqual(out_json["return_code"], 0)
238 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 239
f67539c2 240 self.fs.fail()
7c673cae
FG
241
242 # Truncate the journal (to ensure the inotable on disk
243 # is all that will be in the InoTable in memory)
244
245 self.fs.journal_tool(["event", "splice",
f64942e4 246 "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
7c673cae
FG
247
248 self.fs.journal_tool(["event", "splice",
f64942e4 249 "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
7c673cae
FG
250
251 # Revert to old inotable.
9f95a23c 252 for key, value in inotable_copy.items():
f67539c2 253 self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
7c673cae 254
f67539c2 255 self.fs.set_joinable()
7c673cae
FG
256 self.fs.wait_for_daemons()
257
258 with self.assert_cluster_log("inode table repaired"):
b3b6e05e 259 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 260 self.assertNotEqual(out_json, None)
f67539c2
TL
261 self.assertEqual(out_json["return_code"], 0)
262 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
7c673cae 263
f67539c2 264 self.fs.fail()
7c673cae
FG
265 table_text = self.fs.table_tool(["0", "show", "inode"])
266 table = json.loads(table_text)
267 self.assertGreater(
268 table['0']['data']['inotable']['free'][0]['start'],
269 inos['./file3_sixmegs'])
270
271 def test_backtrace_repair(self):
272 """
273 That the MDS can repair an inodes backtrace in the data pool
274 if it is found to be damaged.
275 """
276 # Create a file for subsequent checks
277 self.mount_a.run_shell(["mkdir", "parent_a"])
278 self.mount_a.run_shell(["touch", "parent_a/alpha"])
279 file_ino = self.mount_a.path_to_ino("parent_a/alpha")
280
281 # That backtrace and layout are written after initial flush
282 self.fs.mds_asok(["flush", "journal"])
283 backtrace = self.fs.read_backtrace(file_ino)
284 self.assertEqual(['alpha', 'parent_a'],
285 [a['dname'] for a in backtrace['ancestors']])
286
287 # Go corrupt the backtrace
288 self.fs._write_data_xattr(file_ino, "parent",
289 "oh i'm sorry did i overwrite your xattr?")
290
291 with self.assert_cluster_log("bad backtrace on inode"):
b3b6e05e 292 out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
1adf2230 293 self.assertNotEqual(out_json, None)
f67539c2
TL
294 self.assertEqual(out_json["return_code"], 0)
295 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
296
7c673cae
FG
297 self.fs.mds_asok(["flush", "journal"])
298 backtrace = self.fs.read_backtrace(file_ino)
299 self.assertEqual(['alpha', 'parent_a'],
300 [a['dname'] for a in backtrace['ancestors']])