]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_forward_scrub.py
3 Test that the forward scrub functionality can traverse metadata and apply
4 requested tags, on well formed metadata.
6 This is *not* the real testing for forward scrub, which will need to test
7 how the functionality responds to damaged metadata.
15 from collections
import namedtuple
16 from io
import BytesIO
17 from textwrap
import dedent
19 from teuthology
.orchestra
.run
import CommandFailedError
20 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
24 log
= logging
.getLogger(__name__
)
27 ValidationError
= namedtuple("ValidationError", ["exception", "backtrace"])
30 class TestForwardScrub(CephFSTestCase
):
33 def _read_str_xattr(self
, pool
, obj
, attr
):
35 Read a ceph-encoded string from a rados xattr
37 output
= self
.fs
.rados(["getxattr", obj
, attr
], pool
=pool
,
38 stdout_data
=BytesIO())
39 strlen
= struct
.unpack('i', output
[0:4])[0]
40 return six
.ensure_str(output
[4:(4 + strlen
)], encoding
='ascii')
42 def _get_paths_to_ino(self
):
44 p
= self
.mount_a
.run_shell(["find", "./"])
45 paths
= p
.stdout
.getvalue().strip().split()
47 inos
[path
] = self
.mount_a
.path_to_ino(path
)
51 def test_apply_tag(self
):
52 self
.mount_a
.run_shell(["mkdir", "parentdir"])
53 self
.mount_a
.run_shell(["mkdir", "parentdir/childdir"])
54 self
.mount_a
.run_shell(["touch", "rfile"])
55 self
.mount_a
.run_shell(["touch", "parentdir/pfile"])
56 self
.mount_a
.run_shell(["touch", "parentdir/childdir/cfile"])
58 # Build a structure mapping path to inode, as we will later want
59 # to check object by object and objects are named after ino number
60 inos
= self
._get
_paths
_to
_ino
()
62 # Flush metadata: this is a friendly test of forward scrub so we're skipping
63 # the part where it's meant to cope with dirty metadata
64 self
.mount_a
.umount_wait()
65 self
.fs
.mds_asok(["flush", "journal"])
69 # Execute tagging forward scrub
70 self
.fs
.mds_asok(["tag", "path", "/parentdir", tag
])
74 # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
77 # Check that dirs were tagged
78 for dirpath
in ["./parentdir", "./parentdir/childdir"]:
79 self
.assertTagged(inos
[dirpath
], tag
, self
.fs
.get_metadata_pool_name())
81 # Check that files were tagged
82 for filepath
in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
83 self
.assertTagged(inos
[filepath
], tag
, self
.fs
.get_data_pool_name())
85 # This guy wasn't in the tag path, shouldn't have been tagged
86 self
.assertUntagged(inos
["./rfile"])
88 def assertUntagged(self
, ino
):
89 file_obj_name
= "{0:x}.00000000".format(ino
)
90 with self
.assertRaises(CommandFailedError
):
92 self
.fs
.get_data_pool_name(),
97 def assertTagged(self
, ino
, tag
, pool
):
98 file_obj_name
= "{0:x}.00000000".format(ino
)
99 wrote
= self
._read
_str
_xattr
(
104 self
.assertEqual(wrote
, tag
)
106 def _validate_linkage(self
, expected
):
107 inos
= self
._get
_paths
_to
_ino
()
109 self
.assertDictEqual(inos
, expected
)
110 except AssertionError:
111 log
.error("Expected: {0}".format(json
.dumps(expected
, indent
=2)))
112 log
.error("Actual: {0}".format(json
.dumps(inos
, indent
=2)))
115 def test_orphan_scan(self
):
116 # Create some files whose metadata we will flush
117 self
.mount_a
.run_python(dedent("""
119 mount_point = "{mount_point}"
120 parent = os.path.join(mount_point, "parent")
122 flushed = os.path.join(parent, "flushed")
124 for f in ["alpha", "bravo", "charlie"]:
125 open(os.path.join(flushed, f), 'w').write(f)
126 """.format(mount_point
=self
.mount_a
.mountpoint
)))
128 inos
= self
._get
_paths
_to
_ino
()
131 # Umount before flush to avoid cap releases putting
132 # things we don't want in the journal later.
133 self
.mount_a
.umount_wait()
134 self
.fs
.mds_asok(["flush", "journal"])
136 # Create a new inode that's just in the log, i.e. would
137 # look orphaned to backward scan if backward scan wisnae
138 # respectin' tha scrub_tag xattr.
139 self
.mount_a
.mount_wait()
140 self
.mount_a
.run_shell(["mkdir", "parent/unflushed"])
141 self
.mount_a
.run_shell(["dd", "if=/dev/urandom",
142 "of=./parent/unflushed/jfile",
144 inos
["./parent/unflushed"] = self
.mount_a
.path_to_ino("./parent/unflushed")
145 inos
["./parent/unflushed/jfile"] = self
.mount_a
.path_to_ino("./parent/unflushed/jfile")
146 self
.mount_a
.umount_wait()
148 # Orphan an inode by deleting its dentry
149 # Our victim will be.... bravo.
150 self
.mount_a
.umount_wait()
153 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', False)
154 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', False)
155 frag_obj_id
= "{0:x}.00000000".format(inos
["./parent/flushed"])
156 self
.fs
.rados(["rmomapkey", frag_obj_id
, "bravo_head"])
158 self
.fs
.mds_restart()
159 self
.fs
.wait_for_daemons()
161 # See that the orphaned file is indeed missing from a client's POV
162 self
.mount_a
.mount_wait()
163 damaged_state
= self
._get
_paths
_to
_ino
()
164 self
.assertNotIn("./parent/flushed/bravo", damaged_state
)
165 self
.mount_a
.umount_wait()
167 # Run a tagging forward scrub
169 self
.fs
.mds_asok(["tag", "path", "/parent", tag
])
171 # See that the orphan wisnae tagged
172 self
.assertUntagged(inos
['./parent/flushed/bravo'])
174 # See that the flushed-metadata-and-still-present files are tagged
175 self
.assertTagged(inos
['./parent/flushed/alpha'], tag
, self
.fs
.get_data_pool_name())
176 self
.assertTagged(inos
['./parent/flushed/charlie'], tag
, self
.fs
.get_data_pool_name())
178 # See that journalled-but-not-flushed file *was* tagged
179 self
.assertTagged(inos
['./parent/unflushed/jfile'], tag
, self
.fs
.get_data_pool_name())
181 # Run cephfs-data-scan targeting only orphans
184 self
.fs
.data_scan(["scan_extents", self
.fs
.get_data_pool_name()])
188 self
.fs
.get_data_pool_name()
191 # After in-place injection stats should be kosher again
192 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', True)
193 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', True)
195 # And we should have all the same linkage we started with,
196 # and no lost+found, and no extra inodes!
197 self
.fs
.mds_restart()
198 self
.fs
.wait_for_daemons()
199 self
.mount_a
.mount_wait()
200 self
._validate
_linkage
(inos
)
202 def _stash_inotable(self
):
203 # Get all active ranks
204 ranks
= self
.fs
.get_all_mds_rank()
208 inotable_oid
= "mds{rank:d}_".format(rank
=rank
) + "inotable"
209 print("Trying to fetch inotable object: " + inotable_oid
)
211 #self.fs.get_metadata_object("InoTable", "mds0_inotable")
212 inotable_raw
= self
.fs
.get_metadata_object_raw(inotable_oid
)
213 inotable_dict
[inotable_oid
] = inotable_raw
216 def test_inotable_sync(self
):
217 self
.mount_a
.write_n_mb("file1_sixmegs", 6)
220 self
.mount_a
.umount_wait()
221 self
.fs
.mds_asok(["flush", "journal"])
223 inotable_copy
= self
._stash
_inotable
()
225 self
.mount_a
.mount_wait()
227 self
.mount_a
.write_n_mb("file2_sixmegs", 6)
228 self
.mount_a
.write_n_mb("file3_sixmegs", 6)
230 inos
= self
._get
_paths
_to
_ino
()
233 self
.mount_a
.umount_wait()
234 self
.fs
.mds_asok(["flush", "journal"])
236 self
.mount_a
.umount_wait()
238 with self
.assert_cluster_log("inode table repaired", invert_match
=True):
239 out_json
= self
.fs
.rank_tell(["scrub", "start", "/", "repair", "recursive"])
240 self
.assertNotEqual(out_json
, None)
242 self
.mds_cluster
.mds_stop()
243 self
.mds_cluster
.mds_fail()
245 # Truncate the journal (to ensure the inotable on disk
246 # is all that will be in the InoTable in memory)
248 self
.fs
.journal_tool(["event", "splice",
249 "--inode={0}".format(inos
["./file2_sixmegs"]), "summary"], 0)
251 self
.fs
.journal_tool(["event", "splice",
252 "--inode={0}".format(inos
["./file3_sixmegs"]), "summary"], 0)
254 # Revert to old inotable.
255 for key
, value
in inotable_copy
.items():
256 self
.fs
.put_metadata_object_raw(key
, value
)
258 self
.mds_cluster
.mds_restart()
259 self
.fs
.wait_for_daemons()
261 with self
.assert_cluster_log("inode table repaired"):
262 out_json
= self
.fs
.rank_tell(["scrub", "start", "/", "repair", "recursive"])
263 self
.assertNotEqual(out_json
, None)
265 self
.mds_cluster
.mds_stop()
266 table_text
= self
.fs
.table_tool(["0", "show", "inode"])
267 table
= json
.loads(table_text
)
269 table
['0']['data']['inotable']['free'][0]['start'],
270 inos
['./file3_sixmegs'])
272 def test_backtrace_repair(self
):
274 That the MDS can repair an inodes backtrace in the data pool
275 if it is found to be damaged.
277 # Create a file for subsequent checks
278 self
.mount_a
.run_shell(["mkdir", "parent_a"])
279 self
.mount_a
.run_shell(["touch", "parent_a/alpha"])
280 file_ino
= self
.mount_a
.path_to_ino("parent_a/alpha")
282 # That backtrace and layout are written after initial flush
283 self
.fs
.mds_asok(["flush", "journal"])
284 backtrace
= self
.fs
.read_backtrace(file_ino
)
285 self
.assertEqual(['alpha', 'parent_a'],
286 [a
['dname'] for a
in backtrace
['ancestors']])
288 # Go corrupt the backtrace
289 self
.fs
._write
_data
_xattr
(file_ino
, "parent",
290 "oh i'm sorry did i overwrite your xattr?")
292 with self
.assert_cluster_log("bad backtrace on inode"):
293 out_json
= self
.fs
.rank_tell(["scrub", "start", "/", "repair", "recursive"])
294 self
.assertNotEqual(out_json
, None)
295 self
.fs
.mds_asok(["flush", "journal"])
296 backtrace
= self
.fs
.read_backtrace(file_ino
)
297 self
.assertEqual(['alpha', 'parent_a'],
298 [a
['dname'] for a
in backtrace
['ancestors']])