[ceph.git] / ceph / qa / tasks / cephfs / test_forward_scrub.py


"""
Test that the forward scrub functionality can traverse metadata and apply
requested tags, on well formed metadata.

This is *not* the real testing for forward scrub, which will need to test
how the functionality responds to damaged metadata.

"""
import json

import logging
from collections import namedtuple
from textwrap import dedent

from teuthology.orchestra.run import CommandFailedError
from tasks.cephfs.cephfs_test_case import CephFSTestCase

import struct

log = logging.getLogger(__name__)


ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])


class TestForwardScrub(CephFSTestCase):
    MDSS_REQUIRED = 1

    def _read_str_xattr(self, pool, obj, attr):
        """
        Read a ceph-encoded string from a rados xattr
        """
        output = self.fs.rados(["getxattr", obj, attr], pool=pool)
        strlen = struct.unpack('i', output[0:4])[0]
        return output[4:(4 + strlen)]

    def _get_paths_to_ino(self):
        inos = {}
        p = self.mount_a.run_shell(["find", "./"])
        paths = p.stdout.getvalue().strip().split()
        for path in paths:
            inos[path] = self.mount_a.path_to_ino(path)

        return inos

    def test_apply_tag(self):
        self.mount_a.run_shell(["mkdir", "parentdir"])
        self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
        self.mount_a.run_shell(["touch", "rfile"])
        self.mount_a.run_shell(["touch", "parentdir/pfile"])
        self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])

        # Build a structure mapping path to inode, as we will later want
        # to check object by object and objects are named after ino number
        inos = self._get_paths_to_ino()

        # Flush metadata: this is a friendly test of forward scrub so we're skipping
        # the part where it's meant to cope with dirty metadata
        self.mount_a.umount_wait()
        self.fs.mds_asok(["flush", "journal"])

        tag = "mytag"

        # Execute tagging forward scrub
        self.fs.mds_asok(["tag", "path", "/parentdir", tag])
        # Wait for completion
        import time
        time.sleep(10)
        # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
        # watch that instead

        # Check that dirs were tagged
        for dirpath in ["./parentdir", "./parentdir/childdir"]:
            self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())

        # Check that files were tagged
        for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
            self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())

        # This guy wasn't in the tag path, shouldn't have been tagged
        self.assertUntagged(inos["./rfile"])

    def assertUntagged(self, ino):
        file_obj_name = "{0:x}.00000000".format(ino)
        with self.assertRaises(CommandFailedError):
            self._read_str_xattr(
                self.fs.get_data_pool_name(),
                file_obj_name,
                "scrub_tag"
            )

    def assertTagged(self, ino, tag, pool):
        file_obj_name = "{0:x}.00000000".format(ino)
        wrote = self._read_str_xattr(
            pool,
            file_obj_name,
            "scrub_tag"
        )
        self.assertEqual(wrote, tag)

    def _validate_linkage(self, expected):
        inos = self._get_paths_to_ino()
        try:
            self.assertDictEqual(inos, expected)
        except AssertionError:
            log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
            log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
            raise

    def test_orphan_scan(self):
        # Create some files whose metadata we will flush
        self.mount_a.run_python(dedent("""
            import os
            mount_point = "{mount_point}"
            parent = os.path.join(mount_point, "parent")
            os.mkdir(parent)
            flushed = os.path.join(parent, "flushed")
            os.mkdir(flushed)
            for f in ["alpha", "bravo", "charlie"]:
                open(os.path.join(flushed, f), 'w').write(f)
        """.format(mount_point=self.mount_a.mountpoint)))

        inos = self._get_paths_to_ino()

        # Flush journal
        # Umount before flush to avoid cap releases putting
        # things we don't want in the journal later.
        self.mount_a.umount_wait()
        self.fs.mds_asok(["flush", "journal"])

        # Create a new inode that's just in the log, i.e. would
        # look orphaned to backward scan if backward scan wisnae
        # respectin' tha scrub_tag xattr.
        self.mount_a.mount()
        self.mount_a.run_shell(["mkdir", "parent/unflushed"])
        self.mount_a.run_shell(["dd", "if=/dev/urandom",
                                "of=./parent/unflushed/jfile",
                                "bs=1M", "count=8"])
        inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
        inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
        self.mount_a.umount_wait()

        # Orphan an inode by deleting its dentry
        # Our victim will be.... bravo.
        self.mount_a.umount_wait()
        self.fs.mds_stop()
        self.fs.mds_fail()
        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
        frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
        self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])

        self.fs.mds_restart()
        self.fs.wait_for_daemons()

        # See that the orphaned file is indeed missing from a client's POV
        self.mount_a.mount()
        damaged_state = self._get_paths_to_ino()
        self.assertNotIn("./parent/flushed/bravo", damaged_state)
        self.mount_a.umount_wait()

        # Run a tagging forward scrub
        tag = "mytag123"
        self.fs.mds_asok(["tag", "path", "/parent", tag])

        # See that the orphan wisnae tagged
        self.assertUntagged(inos['./parent/flushed/bravo'])

        # See that the flushed-metadata-and-still-present files are tagged
        self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
        self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())

        # See that journalled-but-not-flushed file *was* tagged
        self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())

        # Run cephfs-data-scan targeting only orphans
        self.fs.mds_stop()
        self.fs.mds_fail()
        self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
        self.fs.data_scan([
            "scan_inodes",
            "--filter-tag", tag,
            self.fs.get_data_pool_name()
        ])

        # After in-place injection stats should be kosher again
        self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)

        # And we should have all the same linkage we started with,
        # and no lost+found, and no extra inodes!
        self.fs.mds_restart()
        self.fs.wait_for_daemons()
        self.mount_a.mount()
        self._validate_linkage(inos)

    def _stash_inotable(self):
        # Get all active ranks
        ranks = self.fs.get_all_mds_rank()

        inotable_dict = {}
        for rank in ranks:
            inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
            print "Trying to fetch inotable object: " + inotable_oid

            #self.fs.get_metadata_object("InoTable", "mds0_inotable")
            inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
            inotable_dict[inotable_oid] = inotable_raw
        return inotable_dict

    def test_inotable_sync(self):
        self.mount_a.write_n_mb("file1_sixmegs", 6)

        # Flush journal
        self.mount_a.umount_wait()
        self.fs.mds_asok(["flush", "journal"])

        inotable_copy = self._stash_inotable()

        self.mount_a.mount()

        self.mount_a.write_n_mb("file2_sixmegs", 6)
        self.mount_a.write_n_mb("file3_sixmegs", 6)

        inos = self._get_paths_to_ino()

        # Flush journal
        self.mount_a.umount_wait()
        self.fs.mds_asok(["flush", "journal"])

        self.mount_a.umount_wait()

        with self.assert_cluster_log("inode table repaired", invert_match=True):
            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
            self.assertNotEqual(out_json, None)

        self.mds_cluster.mds_stop()
        self.mds_cluster.mds_fail()

        # Truncate the journal (to ensure the inotable on disk
        # is all that will be in the InoTable in memory)

        self.fs.journal_tool(["event", "splice",
            "--inode={0}".format(inos["./file2_sixmegs"]), "summary"])

        self.fs.journal_tool(["event", "splice",
            "--inode={0}".format(inos["./file3_sixmegs"]), "summary"])

        # Revert to old inotable.
        for key, value in inotable_copy.iteritems():
           self.fs.put_metadata_object_raw(key, value)

        self.mds_cluster.mds_restart()
        self.fs.wait_for_daemons()

        with self.assert_cluster_log("inode table repaired"):
            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
            self.assertNotEqual(out_json, None)

        self.mds_cluster.mds_stop()
        table_text = self.fs.table_tool(["0", "show", "inode"])
        table = json.loads(table_text)
        self.assertGreater(
                table['0']['data']['inotable']['free'][0]['start'],
                inos['./file3_sixmegs'])

    def test_backtrace_repair(self):
        """
        That the MDS can repair an inodes backtrace in the data pool
        if it is found to be damaged.
        """
        # Create a file for subsequent checks
        self.mount_a.run_shell(["mkdir", "parent_a"])
        self.mount_a.run_shell(["touch", "parent_a/alpha"])
        file_ino = self.mount_a.path_to_ino("parent_a/alpha")

        # That backtrace and layout are written after initial flush
        self.fs.mds_asok(["flush", "journal"])
        backtrace = self.fs.read_backtrace(file_ino)
        self.assertEqual(['alpha', 'parent_a'],
                         [a['dname'] for a in backtrace['ancestors']])

        # Go corrupt the backtrace
        self.fs._write_data_xattr(file_ino, "parent",
                                  "oh i'm sorry did i overwrite your xattr?")

        with self.assert_cluster_log("bad backtrace on inode"):
            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
            self.assertNotEqual(out_json, None)
        self.fs.mds_asok(["flush", "journal"])
        backtrace = self.fs.read_backtrace(file_ino)
        self.assertEqual(['alpha', 'parent_a'],
                         [a['dname'] for a in backtrace['ancestors']])
Commit	Line	Data
7c673cae FG	1
	2	"""
	3	Test that the forward scrub functionality can traverse metadata and apply
	4	requested tags, on well formed metadata.
	5
	6	This is not the real testing for forward scrub, which will need to test
	7	how the functionality responds to damaged metadata.
	8
	9	"""
	10	import json
	11
	12	import logging
	13	from collections import namedtuple
	14	from textwrap import dedent
	15
	16	from teuthology.orchestra.run import CommandFailedError
	17	from tasks.cephfs.cephfs_test_case import CephFSTestCase
	18
	19	import struct
	20
	21	log = logging.getLogger(__name__)
	22
	23
	24	ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
	25
	26
	27	class TestForwardScrub(CephFSTestCase):
	28	MDSS_REQUIRED = 1
	29
	30	def _read_str_xattr(self, pool, obj, attr):
	31	"""
	32	Read a ceph-encoded string from a rados xattr
	33	"""
	34	output = self.fs.rados(["getxattr", obj, attr], pool=pool)
	35	strlen = struct.unpack('i', output[0:4])[0]
	36	return output[4:(4 + strlen)]
	37
	38	def _get_paths_to_ino(self):
	39	inos = {}
	40	p = self.mount_a.run_shell(["find", "./"])
	41	paths = p.stdout.getvalue().strip().split()
	42	for path in paths:
	43	inos[path] = self.mount_a.path_to_ino(path)
	44
	45	return inos
	46
	47	def test_apply_tag(self):
	48	self.mount_a.run_shell(["mkdir", "parentdir"])
	49	self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
	50	self.mount_a.run_shell(["touch", "rfile"])
	51	self.mount_a.run_shell(["touch", "parentdir/pfile"])
	52	self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
	53
	54	# Build a structure mapping path to inode, as we will later want
	55	# to check object by object and objects are named after ino number
	56	inos = self._get_paths_to_ino()
	57
	58	# Flush metadata: this is a friendly test of forward scrub so we're skipping
	59	# the part where it's meant to cope with dirty metadata
	60	self.mount_a.umount_wait()
	61	self.fs.mds_asok(["flush", "journal"])
	62
	63	tag = "mytag"
	64
65	# Execute tagging forward scrub
66	self.fs.mds_asok(["tag", "path", "/parentdir", tag])
67	# Wait for completion
68	import time
69	time.sleep(10)
70	# FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
71	# watch that instead
72
73	# Check that dirs were tagged
74	for dirpath in ["./parentdir", "./parentdir/childdir"]:
75	self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
76
77	# Check that files were tagged
78	for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
79	self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
80
81	# This guy wasn't in the tag path, shouldn't have been tagged
82	self.assertUntagged(inos["./rfile"])
83
84	def assertUntagged(self, ino):
85	file_obj_name = "{0:x}.00000000".format(ino)
86	with self.assertRaises(CommandFailedError):
87	self._read_str_xattr(
88	self.fs.get_data_pool_name(),
89	file_obj_name,
90	"scrub_tag"
91	)
92
93	def assertTagged(self, ino, tag, pool):
94	file_obj_name = "{0:x}.00000000".format(ino)
95	wrote = self._read_str_xattr(
96	pool,
97	file_obj_name,
98	"scrub_tag"
99	)
100	self.assertEqual(wrote, tag)
101
102	def _validate_linkage(self, expected):
103	inos = self._get_paths_to_ino()
104	try:
105	self.assertDictEqual(inos, expected)
106	except AssertionError:
107	log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
108	log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
109	raise
110
111	def test_orphan_scan(self):
112	# Create some files whose metadata we will flush
113	self.mount_a.run_python(dedent("""
114	import os
115	mount_point = "{mount_point}"
116	parent = os.path.join(mount_point, "parent")
117	os.mkdir(parent)
118	flushed = os.path.join(parent, "flushed")
119	os.mkdir(flushed)
120	for f in ["alpha", "bravo", "charlie"]:
121	open(os.path.join(flushed, f), 'w').write(f)
122	""".format(mount_point=self.mount_a.mountpoint)))
123
124	inos = self._get_paths_to_ino()
125
126	# Flush journal
127	# Umount before flush to avoid cap releases putting
128	# things we don't want in the journal later.
129	self.mount_a.umount_wait()
130	self.fs.mds_asok(["flush", "journal"])
131
132	# Create a new inode that's just in the log, i.e. would
133	# look orphaned to backward scan if backward scan wisnae
134	# respectin' tha scrub_tag xattr.
135	self.mount_a.mount()
136	self.mount_a.run_shell(["mkdir", "parent/unflushed"])
137	self.mount_a.run_shell(["dd", "if=/dev/urandom",
138	"of=./parent/unflushed/jfile",
139	"bs=1M", "count=8"])
140	inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
141	inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
142	self.mount_a.umount_wait()
143
144	# Orphan an inode by deleting its dentry
145	# Our victim will be.... bravo.
146	self.mount_a.umount_wait()
147	self.fs.mds_stop()
148	self.fs.mds_fail()
149	self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
150	self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
151	frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
152	self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
153
154	self.fs.mds_restart()
155	self.fs.wait_for_daemons()
156
157	# See that the orphaned file is indeed missing from a client's POV
158	self.mount_a.mount()
159	damaged_state = self._get_paths_to_ino()
160	self.assertNotIn("./parent/flushed/bravo", damaged_state)
161	self.mount_a.umount_wait()
162
163	# Run a tagging forward scrub
164	tag = "mytag123"
165	self.fs.mds_asok(["tag", "path", "/parent", tag])
166
167	# See that the orphan wisnae tagged
168	self.assertUntagged(inos['./parent/flushed/bravo'])
169
170	# See that the flushed-metadata-and-still-present files are tagged
171	self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
172	self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
173
174	# See that journalled-but-not-flushed file was tagged
175	self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
176
177	# Run cephfs-data-scan targeting only orphans
178	self.fs.mds_stop()
179	self.fs.mds_fail()
180	self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
181	self.fs.data_scan([
182	"scan_inodes",
183	"--filter-tag", tag,
184	self.fs.get_data_pool_name()
185	])
186
187	# After in-place injection stats should be kosher again
188	self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
189	self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
190
191	# And we should have all the same linkage we started with,
192	# and no lost+found, and no extra inodes!
193	self.fs.mds_restart()
194	self.fs.wait_for_daemons()
195	self.mount_a.mount()
196	self._validate_linkage(inos)
197
198	def _stash_inotable(self):
199	# Get all active ranks
200	ranks = self.fs.get_all_mds_rank()
201
202	inotable_dict = {}
203	for rank in ranks:
204	inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
205	print "Trying to fetch inotable object: " + inotable_oid
206
207	#self.fs.get_metadata_object("InoTable", "mds0_inotable")
208	inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
209	inotable_dict[inotable_oid] = inotable_raw
210	return inotable_dict
211
212	def test_inotable_sync(self):
213	self.mount_a.write_n_mb("file1_sixmegs", 6)
214
215	# Flush journal
216	self.mount_a.umount_wait()
217	self.fs.mds_asok(["flush", "journal"])
218
219	inotable_copy = self._stash_inotable()
220
221	self.mount_a.mount()
222
223	self.mount_a.write_n_mb("file2_sixmegs", 6)
224	self.mount_a.write_n_mb("file3_sixmegs", 6)
225
226	inos = self._get_paths_to_ino()
227
228	# Flush journal
229	self.mount_a.umount_wait()
230	self.fs.mds_asok(["flush", "journal"])
231
232	self.mount_a.umount_wait()
233
234	with self.assert_cluster_log("inode table repaired", invert_match=True):
1adf2230 AA	235	out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
1adf2230 AA	236	self.assertNotEqual(out_json, None)
7c673cae FG	237
	238	self.mds_cluster.mds_stop()
	239	self.mds_cluster.mds_fail()
	240
	241	# Truncate the journal (to ensure the inotable on disk
	242	# is all that will be in the InoTable in memory)
	243
	244	self.fs.journal_tool(["event", "splice",
	245	"--inode={0}".format(inos["./file2_sixmegs"]), "summary"])
	246
	247	self.fs.journal_tool(["event", "splice",
	248	"--inode={0}".format(inos["./file3_sixmegs"]), "summary"])
	249
	250	# Revert to old inotable.
	251	for key, value in inotable_copy.iteritems():
	252	self.fs.put_metadata_object_raw(key, value)
	253
	254	self.mds_cluster.mds_restart()
	255	self.fs.wait_for_daemons()
	256
	257	with self.assert_cluster_log("inode table repaired"):
1adf2230 AA	258	out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
1adf2230 AA	259	self.assertNotEqual(out_json, None)
7c673cae FG	260
	261	self.mds_cluster.mds_stop()
	262	table_text = self.fs.table_tool(["0", "show", "inode"])
	263	table = json.loads(table_text)
	264	self.assertGreater(
	265	table['0']['data']['inotable']['free'][0]['start'],
	266	inos['./file3_sixmegs'])
	267
	268	def test_backtrace_repair(self):
	269	"""
	270	That the MDS can repair an inodes backtrace in the data pool
	271	if it is found to be damaged.
	272	"""
	273	# Create a file for subsequent checks
	274	self.mount_a.run_shell(["mkdir", "parent_a"])
	275	self.mount_a.run_shell(["touch", "parent_a/alpha"])
	276	file_ino = self.mount_a.path_to_ino("parent_a/alpha")
	277
	278	# That backtrace and layout are written after initial flush
	279	self.fs.mds_asok(["flush", "journal"])
	280	backtrace = self.fs.read_backtrace(file_ino)
	281	self.assertEqual(['alpha', 'parent_a'],
	282	[a['dname'] for a in backtrace['ancestors']])
	283
	284	# Go corrupt the backtrace
	285	self.fs._write_data_xattr(file_ino, "parent",
	286	"oh i'm sorry did i overwrite your xattr?")
	287
	288	with self.assert_cluster_log("bad backtrace on inode"):
1adf2230 AA	289	out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
1adf2230 AA	290	self.assertNotEqual(out_json, None)
7c673cae FG	291	self.fs.mds_asok(["flush", "journal"])
	292	backtrace = self.fs.read_backtrace(file_ino)
	293	self.assertEqual(['alpha', 'parent_a'],
	294	[a['dname'] for a in backtrace['ancestors']])