]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_forward_scrub.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py
1
2 """
3 Test that the forward scrub functionality can traverse metadata and apply
4 requested tags, on well formed metadata.
5
6 This is *not* the real testing for forward scrub, which will need to test
7 how the functionality responds to damaged metadata.
8
9 """
10 import json
11
12 import logging
13 import six
14
15 from collections import namedtuple
16 from io import BytesIO
17 from textwrap import dedent
18
19 from teuthology.orchestra.run import CommandFailedError
20 from tasks.cephfs.cephfs_test_case import CephFSTestCase
21
22 import struct
23
24 log = logging.getLogger(__name__)
25
26
27 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
28
29
30 class TestForwardScrub(CephFSTestCase):
31 MDSS_REQUIRED = 1
32
33 def _read_str_xattr(self, pool, obj, attr):
34 """
35 Read a ceph-encoded string from a rados xattr
36 """
37 output = self.fs.rados(["getxattr", obj, attr], pool=pool,
38 stdout_data=BytesIO())
39 strlen = struct.unpack('i', output[0:4])[0]
40 return six.ensure_str(output[4:(4 + strlen)], encoding='ascii')
41
42 def _get_paths_to_ino(self):
43 inos = {}
44 p = self.mount_a.run_shell(["find", "./"])
45 paths = p.stdout.getvalue().strip().split()
46 for path in paths:
47 inos[path] = self.mount_a.path_to_ino(path)
48
49 return inos
50
51 def test_apply_tag(self):
52 self.mount_a.run_shell(["mkdir", "parentdir"])
53 self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
54 self.mount_a.run_shell(["touch", "rfile"])
55 self.mount_a.run_shell(["touch", "parentdir/pfile"])
56 self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
57
58 # Build a structure mapping path to inode, as we will later want
59 # to check object by object and objects are named after ino number
60 inos = self._get_paths_to_ino()
61
62 # Flush metadata: this is a friendly test of forward scrub so we're skipping
63 # the part where it's meant to cope with dirty metadata
64 self.mount_a.umount_wait()
65 self.fs.mds_asok(["flush", "journal"])
66
67 tag = "mytag"
68
69 # Execute tagging forward scrub
70 self.fs.mds_asok(["tag", "path", "/parentdir", tag])
71 # Wait for completion
72 import time
73 time.sleep(10)
74 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
75 # watch that instead
76
77 # Check that dirs were tagged
78 for dirpath in ["./parentdir", "./parentdir/childdir"]:
79 self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
80
81 # Check that files were tagged
82 for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
83 self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
84
85 # This guy wasn't in the tag path, shouldn't have been tagged
86 self.assertUntagged(inos["./rfile"])
87
88 def assertUntagged(self, ino):
89 file_obj_name = "{0:x}.00000000".format(ino)
90 with self.assertRaises(CommandFailedError):
91 self._read_str_xattr(
92 self.fs.get_data_pool_name(),
93 file_obj_name,
94 "scrub_tag"
95 )
96
97 def assertTagged(self, ino, tag, pool):
98 file_obj_name = "{0:x}.00000000".format(ino)
99 wrote = self._read_str_xattr(
100 pool,
101 file_obj_name,
102 "scrub_tag"
103 )
104 self.assertEqual(wrote, tag)
105
106 def _validate_linkage(self, expected):
107 inos = self._get_paths_to_ino()
108 try:
109 self.assertDictEqual(inos, expected)
110 except AssertionError:
111 log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
112 log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
113 raise
114
115 def test_orphan_scan(self):
116 # Create some files whose metadata we will flush
117 self.mount_a.run_python(dedent("""
118 import os
119 mount_point = "{mount_point}"
120 parent = os.path.join(mount_point, "parent")
121 os.mkdir(parent)
122 flushed = os.path.join(parent, "flushed")
123 os.mkdir(flushed)
124 for f in ["alpha", "bravo", "charlie"]:
125 open(os.path.join(flushed, f), 'w').write(f)
126 """.format(mount_point=self.mount_a.mountpoint)))
127
128 inos = self._get_paths_to_ino()
129
130 # Flush journal
131 # Umount before flush to avoid cap releases putting
132 # things we don't want in the journal later.
133 self.mount_a.umount_wait()
134 self.fs.mds_asok(["flush", "journal"])
135
136 # Create a new inode that's just in the log, i.e. would
137 # look orphaned to backward scan if backward scan wisnae
138 # respectin' tha scrub_tag xattr.
139 self.mount_a.mount_wait()
140 self.mount_a.run_shell(["mkdir", "parent/unflushed"])
141 self.mount_a.run_shell(["dd", "if=/dev/urandom",
142 "of=./parent/unflushed/jfile",
143 "bs=1M", "count=8"])
144 inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
145 inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
146 self.mount_a.umount_wait()
147
148 # Orphan an inode by deleting its dentry
149 # Our victim will be.... bravo.
150 self.mount_a.umount_wait()
151 self.fs.mds_stop()
152 self.fs.mds_fail()
153 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
154 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
155 frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
156 self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
157
158 self.fs.mds_restart()
159 self.fs.wait_for_daemons()
160
161 # See that the orphaned file is indeed missing from a client's POV
162 self.mount_a.mount_wait()
163 damaged_state = self._get_paths_to_ino()
164 self.assertNotIn("./parent/flushed/bravo", damaged_state)
165 self.mount_a.umount_wait()
166
167 # Run a tagging forward scrub
168 tag = "mytag123"
169 self.fs.mds_asok(["tag", "path", "/parent", tag])
170
171 # See that the orphan wisnae tagged
172 self.assertUntagged(inos['./parent/flushed/bravo'])
173
174 # See that the flushed-metadata-and-still-present files are tagged
175 self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
176 self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
177
178 # See that journalled-but-not-flushed file *was* tagged
179 self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
180
181 # Run cephfs-data-scan targeting only orphans
182 self.fs.mds_stop()
183 self.fs.mds_fail()
184 self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
185 self.fs.data_scan([
186 "scan_inodes",
187 "--filter-tag", tag,
188 self.fs.get_data_pool_name()
189 ])
190
191 # After in-place injection stats should be kosher again
192 self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
193 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
194
195 # And we should have all the same linkage we started with,
196 # and no lost+found, and no extra inodes!
197 self.fs.mds_restart()
198 self.fs.wait_for_daemons()
199 self.mount_a.mount_wait()
200 self._validate_linkage(inos)
201
202 def _stash_inotable(self):
203 # Get all active ranks
204 ranks = self.fs.get_all_mds_rank()
205
206 inotable_dict = {}
207 for rank in ranks:
208 inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
209 print("Trying to fetch inotable object: " + inotable_oid)
210
211 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
212 inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
213 inotable_dict[inotable_oid] = inotable_raw
214 return inotable_dict
215
216 def test_inotable_sync(self):
217 self.mount_a.write_n_mb("file1_sixmegs", 6)
218
219 # Flush journal
220 self.mount_a.umount_wait()
221 self.fs.mds_asok(["flush", "journal"])
222
223 inotable_copy = self._stash_inotable()
224
225 self.mount_a.mount_wait()
226
227 self.mount_a.write_n_mb("file2_sixmegs", 6)
228 self.mount_a.write_n_mb("file3_sixmegs", 6)
229
230 inos = self._get_paths_to_ino()
231
232 # Flush journal
233 self.mount_a.umount_wait()
234 self.fs.mds_asok(["flush", "journal"])
235
236 self.mount_a.umount_wait()
237
238 with self.assert_cluster_log("inode table repaired", invert_match=True):
239 out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
240 self.assertNotEqual(out_json, None)
241
242 self.mds_cluster.mds_stop()
243 self.mds_cluster.mds_fail()
244
245 # Truncate the journal (to ensure the inotable on disk
246 # is all that will be in the InoTable in memory)
247
248 self.fs.journal_tool(["event", "splice",
249 "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
250
251 self.fs.journal_tool(["event", "splice",
252 "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
253
254 # Revert to old inotable.
255 for key, value in inotable_copy.items():
256 self.fs.put_metadata_object_raw(key, value)
257
258 self.mds_cluster.mds_restart()
259 self.fs.wait_for_daemons()
260
261 with self.assert_cluster_log("inode table repaired"):
262 out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
263 self.assertNotEqual(out_json, None)
264
265 self.mds_cluster.mds_stop()
266 table_text = self.fs.table_tool(["0", "show", "inode"])
267 table = json.loads(table_text)
268 self.assertGreater(
269 table['0']['data']['inotable']['free'][0]['start'],
270 inos['./file3_sixmegs'])
271
272 def test_backtrace_repair(self):
273 """
274 That the MDS can repair an inodes backtrace in the data pool
275 if it is found to be damaged.
276 """
277 # Create a file for subsequent checks
278 self.mount_a.run_shell(["mkdir", "parent_a"])
279 self.mount_a.run_shell(["touch", "parent_a/alpha"])
280 file_ino = self.mount_a.path_to_ino("parent_a/alpha")
281
282 # That backtrace and layout are written after initial flush
283 self.fs.mds_asok(["flush", "journal"])
284 backtrace = self.fs.read_backtrace(file_ino)
285 self.assertEqual(['alpha', 'parent_a'],
286 [a['dname'] for a in backtrace['ancestors']])
287
288 # Go corrupt the backtrace
289 self.fs._write_data_xattr(file_ino, "parent",
290 "oh i'm sorry did i overwrite your xattr?")
291
292 with self.assert_cluster_log("bad backtrace on inode"):
293 out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
294 self.assertNotEqual(out_json, None)
295 self.fs.mds_asok(["flush", "journal"])
296 backtrace = self.fs.read_backtrace(file_ino)
297 self.assertEqual(['alpha', 'parent_a'],
298 [a['dname'] for a in backtrace['ancestors']])