[ceph.git] / ceph / qa / tasks / cephfs / test_damage.py

import json
import logging
import errno
import re
from teuthology.contextutil import MaxWhileTries
from teuthology.exceptions import CommandFailedError
from teuthology.orchestra.run import wait
from tasks.cephfs.fuse_mount import FuseMount
from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology

DAMAGED_ON_START = "damaged_on_start"
DAMAGED_ON_LS = "damaged_on_ls"
CRASHED = "server crashed"
NO_DAMAGE = "no damage"
READONLY = "readonly"
FAILED_CLIENT = "client failed"
FAILED_SERVER = "server failed"

# An EIO in response to a stat from the client
EIO_ON_LS = "eio"

# An EIO, but nothing in damage table (not ever what we expect)
EIO_NO_DAMAGE = "eio without damage entry"


log = logging.getLogger(__name__)


class TestDamage(CephFSTestCase):
    def _simple_workload_write(self):
        self.mount_a.run_shell(["mkdir", "subdir"])
        self.mount_a.write_n_mb("subdir/sixmegs", 6)
        return self.mount_a.stat("subdir/sixmegs")

    def is_marked_damaged(self, rank):
        mds_map = self.fs.get_mds_map()
        return rank in mds_map['damaged']

    @for_teuthology #459s
    def test_object_deletion(self):
        """
        That the MDS has a clean 'damaged' response to loss of any single metadata object
        """

        self._simple_workload_write()

        # Hmm, actually it would be nice to permute whether the metadata pool
        # state contains sessions or not, but for the moment close this session
        # to avoid waiting through reconnect on every MDS start.
        self.mount_a.umount_wait()
        for mds_name in self.fs.get_active_names():
            self.fs.mds_asok(["flush", "journal"], mds_name)

        self.fs.mds_stop()
        self.fs.mds_fail()

        self.fs.rados(['export', '/tmp/metadata.bin'])

        def is_ignored(obj_id, dentry=None):
            """
            A filter to avoid redundantly mutating many similar objects (e.g.
            stray dirfrags) or similar dentries (e.g. stray dir dentries)
            """
            if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
                return True

            if dentry and obj_id == "100.00000000":
                if re.match("stray.+_head", dentry) and dentry != "stray0_head":
                    return True

            return False

        def get_path(obj_id, dentry=None):
            """
            What filesystem path does this object or dentry correspond to?   i.e.
            what should I poke to see EIO after damaging it?
            """

            if obj_id == "1.00000000" and dentry == "subdir_head":
                return "./subdir"
            elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
                return "./subdir/sixmegs"

            # None means ls will do an "ls -R" in hope of seeing some errors
            return None

        objects = self.fs.rados(["ls"]).split("\n")
        objects = [o for o in objects if not is_ignored(o)]

        # Find all objects with an OMAP header
        omap_header_objs = []
        for o in objects:
            header = self.fs.rados(["getomapheader", o])
            # The rados CLI wraps the header output in a hex-printed style
            header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
            if header_bytes > 0:
                omap_header_objs.append(o)

        # Find all OMAP key/vals
        omap_keys = []
        for o in objects:
            keys_str = self.fs.rados(["listomapkeys", o])
            if keys_str:
                for key in keys_str.split("\n"):
                    if not is_ignored(o, key):
                        omap_keys.append((o, key))

        # Find objects that have data in their bodies
        data_objects = []
        for obj_id in objects:
            stat_out = self.fs.rados(["stat", obj_id])
            size = int(re.match(".+, size (.+)$", stat_out).group(1))
            if size > 0:
                data_objects.append(obj_id)

        # Define the various forms of damage we will inflict
        class MetadataMutation(object):
            def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
                self.obj_id = obj_id_
                self.desc = desc_
                self.mutate_fn = mutate_fn_
                self.expectation = expectation_
                if ls_path is None:
                    self.ls_path = "."
                else:
                    self.ls_path = ls_path

            def __eq__(self, other):
                return self.desc == other.desc

            def __hash__(self):
                return hash(self.desc)

        junk = "deadbeef" * 10
        mutations = []

        # Removals
        for o in objects:
            if o in [
                # JournalPointers are auto-replaced if missing (same path as upgrade)
                "400.00000000",
                # Missing dirfrags for non-system dirs result in empty directory
                "10000000000.00000000",
                # PurgeQueue is auto-created if not found on startup
                "500.00000000"
            ]:
                expectation = NO_DAMAGE
            else:
                expectation = DAMAGED_ON_START

            log.info("Expectation on rm '{0}' will be '{1}'".format(
                o, expectation
            ))

            mutations.append(MetadataMutation(
                o,
                "Delete {0}".format(o),
                lambda o=o: self.fs.rados(["rm", o]),
                expectation
            ))

        # Blatant corruptions
        for obj_id in data_objects:
            if obj_id == "500.00000000":
                # purge queue corruption results in read-only FS
                mutations.append(MetadataMutation(
                    obj_id,
                    "Corrupt {0}".format(obj_id),
                    lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
                    READONLY
                ))
            else:
                mutations.append(MetadataMutation(
                    obj_id,
                    "Corrupt {0}".format(obj_id),
                    lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
                    DAMAGED_ON_START
                ))

        # Truncations
        for o in data_objects:
            if o == "500.00000000":
                # The PurgeQueue is allowed to be empty: Journaler interprets
                # an empty header object as an empty journal.
                expectation = NO_DAMAGE
            else:
                expectation = DAMAGED_ON_START

            mutations.append(
                MetadataMutation(
                    o,
                    "Truncate {0}".format(o),
                    lambda o=o: self.fs.rados(["truncate", o, "0"]),
                    expectation
            ))

        # OMAP value corruptions
        for o, k in omap_keys:
            if o.startswith("100."):
                # Anything in rank 0's 'mydir'
                expectation = DAMAGED_ON_START
            else:
                expectation = EIO_ON_LS

            mutations.append(
                MetadataMutation(
                    o,
                    "Corrupt omap key {0}:{1}".format(o, k),
                    lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
                    expectation,
                    get_path(o, k)
                )
            )

        # OMAP header corruptions
        for o in omap_header_objs:
            if re.match("60.\.00000000", o) \
                    or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
                expectation = DAMAGED_ON_START
            else:
                expectation = NO_DAMAGE

            log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
                o, expectation
            ))

            mutations.append(
                MetadataMutation(
                    o,
                    "Corrupt omap header on {0}".format(o),
                    lambda o=o: self.fs.rados(["setomapheader", o, junk]),
                    expectation
                )
            )

        results = {}

        for mutation in mutations:
            log.info("Applying mutation '{0}'".format(mutation.desc))

            # Reset MDS state
            self.mount_a.umount_wait(force=True)
            self.fs.mds_stop()
            self.fs.mds_fail()
            self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')

            # Reset RADOS pool state
            self.fs.rados(['import', '/tmp/metadata.bin'])

            # Inject the mutation
            mutation.mutate_fn()

            # Try starting the MDS
            self.fs.mds_restart()

            # How long we'll wait between starting a daemon and expecting
            # it to make it through startup, and potentially declare itself
            # damaged to the mon cluster.
            startup_timeout = 60

            if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
                if mutation.expectation == DAMAGED_ON_START:
                    # The MDS may pass through active before making it to damaged
                    try:
                        self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
                    except RuntimeError:
                        pass

                # Wait for MDS to either come up or go into damaged state
                try:
                    self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
                except RuntimeError:
                    crashed = False
                    # Didn't make it to healthy or damaged, did it crash?
                    for daemon_id, daemon in self.fs.mds_daemons.items():
                        if daemon.proc and daemon.proc.finished:
                            crashed = True
                            log.error("Daemon {0} crashed!".format(daemon_id))
                            daemon.proc = None  # So that subsequent stop() doesn't raise error
                    if not crashed:
                        # Didn't go health, didn't go damaged, didn't crash, so what?
                        raise
                    else:
                        log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
                        results[mutation] = CRASHED
                        continue
                if self.is_marked_damaged(0):
                    log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
                    results[mutation] = DAMAGED_ON_START
                    continue
                else:
                    log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
            else:
                try:
                    self.wait_until_true(self.fs.are_daemons_healthy, 60)
                except RuntimeError:
                    log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
                    if self.is_marked_damaged(0):
                        results[mutation] = DAMAGED_ON_START
                    else:
                        results[mutation] = FAILED_SERVER
                    continue
                log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))

            # MDS is up, should go damaged on ls or client mount
            self.mount_a.mount()
            self.mount_a.wait_until_mounted()
            if mutation.ls_path == ".":
                proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
            else:
                proc = self.mount_a.stat(mutation.ls_path, wait=False)

            if mutation.expectation == DAMAGED_ON_LS:
                try:
                    self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
                    log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
                    results[mutation] = DAMAGED_ON_LS
                except RuntimeError:
                    if self.fs.are_daemons_healthy():
                        log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
                            mutation.desc))
                        results[mutation] = NO_DAMAGE
                    else:
                        log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
                        results[mutation] = FAILED_SERVER
            elif mutation.expectation == READONLY:
                proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
                try:
                    proc.wait()
                except CommandFailedError:
                    stderr = proc.stderr.getvalue()
                    log.info(stderr)
                    if "Read-only file system".lower() in stderr.lower():
                        pass
                    else:
                        raise
            else:
                try:
                    wait([proc], 20)
                    log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
                    results[mutation] = NO_DAMAGE
                except MaxWhileTries:
                    log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
                    results[mutation] = FAILED_CLIENT
                except CommandFailedError as e:
                    if e.exitstatus == errno.EIO:
                        log.info("Result: EIO on client")
                        results[mutation] = EIO_ON_LS
                    else:
                        log.info("Result: unexpected error {0} on client".format(e))
                        results[mutation] = FAILED_CLIENT

            if mutation.expectation == EIO_ON_LS:
                # EIOs mean something handled by DamageTable: assert that it has
                # been populated
                damage = json.loads(
                    self.fs.mon_manager.raw_cluster_cmd(
                        'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
                if len(damage) == 0:
                    results[mutation] = EIO_NO_DAMAGE

        failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
        if failures:
            log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
            for mutation, result in failures:
                log.error("  Expected '{0}' actually '{1}' from '{2}'".format(
                    mutation.expectation, result, mutation.desc
                ))
            raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
        else:
            log.info("All {0} mutations had expected outcomes".format(len(mutations)))

    def test_damaged_dentry(self):
        # Damage to dentrys is interesting because it leaves the
        # directory's `complete` flag in a subtle state where
        # we have marked the dir complete in order that folks
        # can access it, but in actual fact there is a dentry
        # missing
        self.mount_a.run_shell(["mkdir", "subdir/"])

        self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
        self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])

        subdir_ino = self.mount_a.path_to_ino("subdir")

        self.mount_a.umount_wait()
        for mds_name in self.fs.get_active_names():
            self.fs.mds_asok(["flush", "journal"], mds_name)

        self.fs.mds_stop()
        self.fs.mds_fail()

        # Corrupt a dentry
        junk = "deadbeef" * 10
        dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
        self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])

        # Start up and try to list it
        self.fs.mds_restart()
        self.fs.wait_for_daemons()

        self.mount_a.mount()
        self.mount_a.wait_until_mounted()
        dentries = self.mount_a.ls("subdir/")

        # The damaged guy should have disappeared
        self.assertEqual(dentries, ["file_undamaged"])

        # I should get ENOENT if I try and read it normally, because
        # the dir is considered complete
        try:
            self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
        except CommandFailedError as e:
            self.assertEqual(e.exitstatus, errno.ENOENT)
        else:
            raise AssertionError("Expected ENOENT")

        # The fact that there is damaged should have bee recorded
        damage = json.loads(
            self.fs.mon_manager.raw_cluster_cmd(
                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                "damage", "ls", '--format=json-pretty'))
        self.assertEqual(len(damage), 1)
        damage_id = damage[0]['id']

        # If I try to create a dentry with the same name as the damaged guy
        # then that should be forbidden
        try:
            self.mount_a.touch("subdir/file_to_be_damaged")
        except CommandFailedError as e:
            self.assertEqual(e.exitstatus, errno.EIO)
        else:
            raise AssertionError("Expected EIO")

        # Attempting that touch will clear the client's complete flag, now
        # when I stat it I'll get EIO instead of ENOENT
        try:
            self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
        except CommandFailedError as e:
            if isinstance(self.mount_a, FuseMount):
                self.assertEqual(e.exitstatus, errno.EIO)
            else:
                # Kernel client handles this case differently
                self.assertEqual(e.exitstatus, errno.ENOENT)
        else:
            raise AssertionError("Expected EIO")

        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
        self.assertEqual(nfiles, "2")

        self.mount_a.umount_wait()

        # Now repair the stats
        scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
        log.info(json.dumps(scrub_json, indent=2))

        self.assertEqual(scrub_json["passed_validation"], False)
        self.assertEqual(scrub_json["raw_stats"]["checked"], True)
        self.assertEqual(scrub_json["raw_stats"]["passed"], False)

        # Check that the file count is now correct
        self.mount_a.mount()
        self.mount_a.wait_until_mounted()
        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
        self.assertEqual(nfiles, "1")

        # Clean up the omap object
        self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])

        # Clean up the damagetable entry
        self.fs.mon_manager.raw_cluster_cmd(
            'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
            "damage", "rm", "{did}".format(did=damage_id))

        # Now I should be able to create a file with the same name as the
        # damaged guy if I want.
        self.mount_a.touch("subdir/file_to_be_damaged")

    def test_open_ino_errors(self):
        """
        That errors encountered during opening inos are properly propagated
        """

        self.mount_a.run_shell(["mkdir", "dir1"])
        self.mount_a.run_shell(["touch", "dir1/file1"])
        self.mount_a.run_shell(["mkdir", "dir2"])
        self.mount_a.run_shell(["touch", "dir2/file2"])
        self.mount_a.run_shell(["mkdir", "testdir"])
        self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
        self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])

        file1_ino = self.mount_a.path_to_ino("dir1/file1")
        file2_ino = self.mount_a.path_to_ino("dir2/file2")
        dir2_ino = self.mount_a.path_to_ino("dir2")

        # Ensure everything is written to backing store
        self.mount_a.umount_wait()
        self.fs.mds_asok(["flush", "journal"])

        # Drop everything from the MDS cache
        self.mds_cluster.mds_stop()
        self.fs.journal_tool(['journal', 'reset'], 0)
        self.mds_cluster.mds_fail_restart()
        self.fs.wait_for_daemons()

        self.mount_a.mount()

        # Case 1: un-decodeable backtrace

        # Validate that the backtrace is present and decodable
        self.fs.read_backtrace(file1_ino)
        # Go corrupt the backtrace of alpha/target (used for resolving
        # bravo/hardlink).
        self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")

        # Check that touching the hardlink gives EIO
        ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
        try:
            ran.wait()
        except CommandFailedError:
            self.assertTrue("Input/output error" in ran.stderr.getvalue())

        # Check that an entry is created in the damage table
        damage = json.loads(
            self.fs.mon_manager.raw_cluster_cmd(
                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                "damage", "ls", '--format=json-pretty'))
        self.assertEqual(len(damage), 1)
        self.assertEqual(damage[0]['damage_type'], "backtrace")
        self.assertEqual(damage[0]['ino'], file1_ino)

        self.fs.mon_manager.raw_cluster_cmd(
            'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
            "damage", "rm", str(damage[0]['id']))


        # Case 2: missing dirfrag for the target inode

        self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])

        # Check that touching the hardlink gives EIO
        ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
        try:
            ran.wait()
        except CommandFailedError:
            self.assertTrue("Input/output error" in ran.stderr.getvalue())

        # Check that an entry is created in the damage table
        damage = json.loads(
            self.fs.mon_manager.raw_cluster_cmd(
                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                "damage", "ls", '--format=json-pretty'))
        self.assertEqual(len(damage), 2)
        if damage[0]['damage_type'] == "backtrace" :
            self.assertEqual(damage[0]['ino'], file2_ino)
            self.assertEqual(damage[1]['damage_type'], "dir_frag")
            self.assertEqual(damage[1]['ino'], dir2_ino)
        else:
            self.assertEqual(damage[0]['damage_type'], "dir_frag")
            self.assertEqual(damage[0]['ino'], dir2_ino)
            self.assertEqual(damage[1]['damage_type'], "backtrace")
            self.assertEqual(damage[1]['ino'], file2_ino)

        for entry in damage:
            self.fs.mon_manager.raw_cluster_cmd(
                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                "damage", "rm", str(entry['id']))
Commit	Line	Data
7c673cae FG	1	import json
	2	import logging
	3	import errno
	4	import re
	5	from teuthology.contextutil import MaxWhileTries
	6	from teuthology.exceptions import CommandFailedError
	7	from teuthology.orchestra.run import wait
	8	from tasks.cephfs.fuse_mount import FuseMount
	9	from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
	10
	11	DAMAGED_ON_START = "damaged_on_start"
	12	DAMAGED_ON_LS = "damaged_on_ls"
	13	CRASHED = "server crashed"
	14	NO_DAMAGE = "no damage"
f64942e4	15	READONLY = "readonly"
7c673cae FG	16	FAILED_CLIENT = "client failed"
	17	FAILED_SERVER = "server failed"
	18
	19	# An EIO in response to a stat from the client
	20	EIO_ON_LS = "eio"
	21
	22	# An EIO, but nothing in damage table (not ever what we expect)
	23	EIO_NO_DAMAGE = "eio without damage entry"
	24
	25
	26	log = logging.getLogger(__name__)
	27
	28
	29	class TestDamage(CephFSTestCase):
	30	def _simple_workload_write(self):
	31	self.mount_a.run_shell(["mkdir", "subdir"])
	32	self.mount_a.write_n_mb("subdir/sixmegs", 6)
	33	return self.mount_a.stat("subdir/sixmegs")
	34
	35	def is_marked_damaged(self, rank):
	36	mds_map = self.fs.get_mds_map()
	37	return rank in mds_map['damaged']
	38
	39	@for_teuthology #459s
	40	def test_object_deletion(self):
	41	"""
	42	That the MDS has a clean 'damaged' response to loss of any single metadata object
	43	"""
	44
	45	self._simple_workload_write()
	46
	47	# Hmm, actually it would be nice to permute whether the metadata pool
	48	# state contains sessions or not, but for the moment close this session
	49	# to avoid waiting through reconnect on every MDS start.
	50	self.mount_a.umount_wait()
	51	for mds_name in self.fs.get_active_names():
	52	self.fs.mds_asok(["flush", "journal"], mds_name)
	53
	54	self.fs.mds_stop()
	55	self.fs.mds_fail()
	56
	57	self.fs.rados(['export', '/tmp/metadata.bin'])
	58
	59	def is_ignored(obj_id, dentry=None):
	60	"""
	61	A filter to avoid redundantly mutating many similar objects (e.g.
	62	stray dirfrags) or similar dentries (e.g. stray dir dentries)
	63	"""
	64	if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
	65	return True
	66
	67	if dentry and obj_id == "100.00000000":
	68	if re.match("stray.+_head", dentry) and dentry != "stray0_head":
	69	return True
	70
	71	return False
	72
	73	def get_path(obj_id, dentry=None):
	74	"""
	75	What filesystem path does this object or dentry correspond to? i.e.
	76	what should I poke to see EIO after damaging it?
	77	"""
	78
	79	if obj_id == "1.00000000" and dentry == "subdir_head":
80	return "./subdir"
81	elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
82	return "./subdir/sixmegs"
83
84	# None means ls will do an "ls -R" in hope of seeing some errors
85	return None
86
87	objects = self.fs.rados(["ls"]).split("\n")
88	objects = [o for o in objects if not is_ignored(o)]
89
90	# Find all objects with an OMAP header
91	omap_header_objs = []
92	for o in objects:
93	header = self.fs.rados(["getomapheader", o])
94	# The rados CLI wraps the header output in a hex-printed style
95	header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
96	if header_bytes > 0:
97	omap_header_objs.append(o)
98
99	# Find all OMAP key/vals
100	omap_keys = []
101	for o in objects:
102	keys_str = self.fs.rados(["listomapkeys", o])
103	if keys_str:
104	for key in keys_str.split("\n"):
105	if not is_ignored(o, key):
106	omap_keys.append((o, key))
107
108	# Find objects that have data in their bodies
109	data_objects = []
110	for obj_id in objects:
111	stat_out = self.fs.rados(["stat", obj_id])
112	size = int(re.match(".+, size (.+)$", stat_out).group(1))
113	if size > 0:
114	data_objects.append(obj_id)
115
116	# Define the various forms of damage we will inflict
117	class MetadataMutation(object):
118	def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
119	self.obj_id = obj_id_
120	self.desc = desc_
121	self.mutate_fn = mutate_fn_
122	self.expectation = expectation_
123	if ls_path is None:
124	self.ls_path = "."
125	else:
126	self.ls_path = ls_path
127
128	def __eq__(self, other):
129	return self.desc == other.desc
130
131	def __hash__(self):
132	return hash(self.desc)
133
134	junk = "deadbeef" * 10
135	mutations = []
136
137	# Removals
f64942e4 AA	138	for o in objects:
f64942e4 AA	139	if o in [
7c673cae FG	140	# JournalPointers are auto-replaced if missing (same path as upgrade)
	141	"400.00000000",
	142	# Missing dirfrags for non-system dirs result in empty directory
	143	"10000000000.00000000",
	144	# PurgeQueue is auto-created if not found on startup
	145	"500.00000000"
	146	]:
	147	expectation = NO_DAMAGE
	148	else:
	149	expectation = DAMAGED_ON_START
	150
	151	log.info("Expectation on rm '{0}' will be '{1}'".format(
f64942e4	152	o, expectation
7c673cae FG	153	))
	154
	155	mutations.append(MetadataMutation(
f64942e4 AA	156	o,
	157	"Delete {0}".format(o),
	158	lambda o=o: self.fs.rados(["rm", o]),
7c673cae FG	159	expectation
	160	))
	161
	162	# Blatant corruptions
7c673cae FG	163	for obj_id in data_objects:
7c673cae FG	164	if obj_id == "500.00000000":
f64942e4 AA	165	# purge queue corruption results in read-only FS
	166	mutations.append(MetadataMutation(
	167	obj_id,
	168	"Corrupt {0}".format(obj_id),
	169	lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
	170	READONLY
	171	))
	172	else:
	173	mutations.append(MetadataMutation(
	174	obj_id,
	175	"Corrupt {0}".format(obj_id),
	176	lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
	177	DAMAGED_ON_START
	178	))
	179
	180	# Truncations
	181	for o in data_objects:
	182	if o == "500.00000000":
7c673cae FG	183	# The PurgeQueue is allowed to be empty: Journaler interprets
	184	# an empty header object as an empty journal.
	185	expectation = NO_DAMAGE
	186	else:
	187	expectation = DAMAGED_ON_START
	188
	189	mutations.append(
	190	MetadataMutation(
	191	o,
	192	"Truncate {0}".format(o),
	193	lambda o=o: self.fs.rados(["truncate", o, "0"]),
f64942e4	194	expectation
7c673cae FG	195	))
	196
	197	# OMAP value corruptions
	198	for o, k in omap_keys:
	199	if o.startswith("100."):
	200	# Anything in rank 0's 'mydir'
	201	expectation = DAMAGED_ON_START
	202	else:
	203	expectation = EIO_ON_LS
	204
	205	mutations.append(
	206	MetadataMutation(
	207	o,
	208	"Corrupt omap key {0}:{1}".format(o, k),
	209	lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
	210	expectation,
	211	get_path(o, k)
	212	)
	213	)
	214
	215	# OMAP header corruptions
f64942e4 AA	216	for o in omap_header_objs:
	217	if re.match("60.\.00000000", o) \
	218	or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
7c673cae FG	219	expectation = DAMAGED_ON_START
	220	else:
	221	expectation = NO_DAMAGE
	222
	223	log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
f64942e4	224	o, expectation
7c673cae FG	225	))
	226
	227	mutations.append(
	228	MetadataMutation(
f64942e4 AA	229	o,
	230	"Corrupt omap header on {0}".format(o),
	231	lambda o=o: self.fs.rados(["setomapheader", o, junk]),
7c673cae FG	232	expectation
	233	)
	234	)
	235
	236	results = {}
	237
	238	for mutation in mutations:
	239	log.info("Applying mutation '{0}'".format(mutation.desc))
	240
	241	# Reset MDS state
	242	self.mount_a.umount_wait(force=True)
	243	self.fs.mds_stop()
	244	self.fs.mds_fail()
	245	self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
	246
	247	# Reset RADOS pool state
	248	self.fs.rados(['import', '/tmp/metadata.bin'])
	249
	250	# Inject the mutation
	251	mutation.mutate_fn()
	252
	253	# Try starting the MDS
	254	self.fs.mds_restart()
	255
	256	# How long we'll wait between starting a daemon and expecting
	257	# it to make it through startup, and potentially declare itself
	258	# damaged to the mon cluster.
	259	startup_timeout = 60
	260
	261	if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
	262	if mutation.expectation == DAMAGED_ON_START:
	263	# The MDS may pass through active before making it to damaged
	264	try:
	265	self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
	266	except RuntimeError:
	267	pass
	268
	269	# Wait for MDS to either come up or go into damaged state
	270	try:
	271	self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
	272	except RuntimeError:
	273	crashed = False
	274	# Didn't make it to healthy or damaged, did it crash?
	275	for daemon_id, daemon in self.fs.mds_daemons.items():
	276	if daemon.proc and daemon.proc.finished:
	277	crashed = True
	278	log.error("Daemon {0} crashed!".format(daemon_id))
	279	daemon.proc = None # So that subsequent stop() doesn't raise error
	280	if not crashed:
	281	# Didn't go health, didn't go damaged, didn't crash, so what?
	282	raise
	283	else:
	284	log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
	285	results[mutation] = CRASHED
	286	continue
	287	if self.is_marked_damaged(0):
	288	log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
	289	results[mutation] = DAMAGED_ON_START
	290	continue
	291	else:
	292	log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
	293	else:
	294	try:
	295	self.wait_until_true(self.fs.are_daemons_healthy, 60)
296	except RuntimeError:
297	log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
298	if self.is_marked_damaged(0):
299	results[mutation] = DAMAGED_ON_START
300	else:
301	results[mutation] = FAILED_SERVER
302	continue
303	log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
304
305	# MDS is up, should go damaged on ls or client mount
306	self.mount_a.mount()
307	self.mount_a.wait_until_mounted()
308	if mutation.ls_path == ".":
309	proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
310	else:
311	proc = self.mount_a.stat(mutation.ls_path, wait=False)
312
313	if mutation.expectation == DAMAGED_ON_LS:
314	try:
315	self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
316	log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
317	results[mutation] = DAMAGED_ON_LS
318	except RuntimeError:
319	if self.fs.are_daemons_healthy():
320	log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
321	mutation.desc))
322	results[mutation] = NO_DAMAGE
323	else:
324	log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
325	results[mutation] = FAILED_SERVER
f64942e4 AA	326	elif mutation.expectation == READONLY:
	327	proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
	328	try:
	329	proc.wait()
	330	except CommandFailedError:
	331	stderr = proc.stderr.getvalue()
	332	log.info(stderr)
	333	if "Read-only file system".lower() in stderr.lower():
	334	pass
	335	else:
	336	raise
7c673cae FG	337	else:
	338	try:
	339	wait([proc], 20)
	340	log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
	341	results[mutation] = NO_DAMAGE
	342	except MaxWhileTries:
	343	log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
	344	results[mutation] = FAILED_CLIENT
	345	except CommandFailedError as e:
	346	if e.exitstatus == errno.EIO:
	347	log.info("Result: EIO on client")
	348	results[mutation] = EIO_ON_LS
	349	else:
	350	log.info("Result: unexpected error {0} on client".format(e))
	351	results[mutation] = FAILED_CLIENT
	352
	353	if mutation.expectation == EIO_ON_LS:
	354	# EIOs mean something handled by DamageTable: assert that it has
	355	# been populated
	356	damage = json.loads(
	357	self.fs.mon_manager.raw_cluster_cmd(
	358	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
	359	if len(damage) == 0:
	360	results[mutation] = EIO_NO_DAMAGE
	361
	362	failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
	363	if failures:
	364	log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
	365	for mutation, result in failures:
	366	log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
	367	mutation.expectation, result, mutation.desc
	368	))
	369	raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
	370	else:
	371	log.info("All {0} mutations had expected outcomes".format(len(mutations)))
	372
	373	def test_damaged_dentry(self):
	374	# Damage to dentrys is interesting because it leaves the
	375	# directory's `complete` flag in a subtle state where
	376	# we have marked the dir complete in order that folks
	377	# can access it, but in actual fact there is a dentry
	378	# missing
	379	self.mount_a.run_shell(["mkdir", "subdir/"])
	380
	381	self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
	382	self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
	383
	384	subdir_ino = self.mount_a.path_to_ino("subdir")
	385
	386	self.mount_a.umount_wait()
	387	for mds_name in self.fs.get_active_names():
	388	self.fs.mds_asok(["flush", "journal"], mds_name)
	389
	390	self.fs.mds_stop()
	391	self.fs.mds_fail()
	392
	393	# Corrupt a dentry
	394	junk = "deadbeef" * 10
	395	dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
	396	self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
	397
	398	# Start up and try to list it
	399	self.fs.mds_restart()
	400	self.fs.wait_for_daemons()
401
402	self.mount_a.mount()
403	self.mount_a.wait_until_mounted()
404	dentries = self.mount_a.ls("subdir/")
405
406	# The damaged guy should have disappeared
407	self.assertEqual(dentries, ["file_undamaged"])
408
409	# I should get ENOENT if I try and read it normally, because
410	# the dir is considered complete
411	try:
412	self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
413	except CommandFailedError as e:
414	self.assertEqual(e.exitstatus, errno.ENOENT)
415	else:
416	raise AssertionError("Expected ENOENT")
417
418	# The fact that there is damaged should have bee recorded
419	damage = json.loads(
420	self.fs.mon_manager.raw_cluster_cmd(
421	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
422	"damage", "ls", '--format=json-pretty'))
423	self.assertEqual(len(damage), 1)
424	damage_id = damage[0]['id']
425
426	# If I try to create a dentry with the same name as the damaged guy
427	# then that should be forbidden
428	try:
429	self.mount_a.touch("subdir/file_to_be_damaged")
430	except CommandFailedError as e:
431	self.assertEqual(e.exitstatus, errno.EIO)
432	else:
433	raise AssertionError("Expected EIO")
434
435	# Attempting that touch will clear the client's complete flag, now
436	# when I stat it I'll get EIO instead of ENOENT
437	try:
438	self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
439	except CommandFailedError as e:
440	if isinstance(self.mount_a, FuseMount):
441	self.assertEqual(e.exitstatus, errno.EIO)
442	else:
443	# Kernel client handles this case differently
444	self.assertEqual(e.exitstatus, errno.ENOENT)
445	else:
446	raise AssertionError("Expected EIO")
447
448	nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
449	self.assertEqual(nfiles, "2")
450
451	self.mount_a.umount_wait()
452
453	# Now repair the stats
454	scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
455	log.info(json.dumps(scrub_json, indent=2))
456
457	self.assertEqual(scrub_json["passed_validation"], False)
458	self.assertEqual(scrub_json["raw_stats"]["checked"], True)
459	self.assertEqual(scrub_json["raw_stats"]["passed"], False)
460
461	# Check that the file count is now correct
462	self.mount_a.mount()
463	self.mount_a.wait_until_mounted()
464	nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
465	self.assertEqual(nfiles, "1")
466
467	# Clean up the omap object
468	self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
469
470	# Clean up the damagetable entry
471	self.fs.mon_manager.raw_cluster_cmd(
472	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
473	"damage", "rm", "{did}".format(did=damage_id))
474
475	# Now I should be able to create a file with the same name as the
476	# damaged guy if I want.
477	self.mount_a.touch("subdir/file_to_be_damaged")
478
479	def test_open_ino_errors(self):
480	"""
481	That errors encountered during opening inos are properly propagated
482	"""
483
484	self.mount_a.run_shell(["mkdir", "dir1"])
485	self.mount_a.run_shell(["touch", "dir1/file1"])
486	self.mount_a.run_shell(["mkdir", "dir2"])
487	self.mount_a.run_shell(["touch", "dir2/file2"])
488	self.mount_a.run_shell(["mkdir", "testdir"])
489	self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
490	self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
491
492	file1_ino = self.mount_a.path_to_ino("dir1/file1")
493	file2_ino = self.mount_a.path_to_ino("dir2/file2")
494	dir2_ino = self.mount_a.path_to_ino("dir2")
495
496	# Ensure everything is written to backing store
497	self.mount_a.umount_wait()
498	self.fs.mds_asok(["flush", "journal"])
499
500	# Drop everything from the MDS cache
501	self.mds_cluster.mds_stop()
f64942e4	502	self.fs.journal_tool(['journal', 'reset'], 0)
7c673cae FG	503	self.mds_cluster.mds_fail_restart()
	504	self.fs.wait_for_daemons()
	505
	506	self.mount_a.mount()
	507
	508	# Case 1: un-decodeable backtrace
	509
	510	# Validate that the backtrace is present and decodable
	511	self.fs.read_backtrace(file1_ino)
	512	# Go corrupt the backtrace of alpha/target (used for resolving
	513	# bravo/hardlink).
	514	self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
	515
	516	# Check that touching the hardlink gives EIO
	517	ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
	518	try:
	519	ran.wait()
	520	except CommandFailedError:
	521	self.assertTrue("Input/output error" in ran.stderr.getvalue())
	522
	523	# Check that an entry is created in the damage table
	524	damage = json.loads(
	525	self.fs.mon_manager.raw_cluster_cmd(
	526	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
	527	"damage", "ls", '--format=json-pretty'))
	528	self.assertEqual(len(damage), 1)
	529	self.assertEqual(damage[0]['damage_type'], "backtrace")
	530	self.assertEqual(damage[0]['ino'], file1_ino)
	531
	532	self.fs.mon_manager.raw_cluster_cmd(
	533	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
	534	"damage", "rm", str(damage[0]['id']))
	535
	536
	537	# Case 2: missing dirfrag for the target inode
	538
	539	self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
	540
	541	# Check that touching the hardlink gives EIO
	542	ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
	543	try:
	544	ran.wait()
	545	except CommandFailedError:
	546	self.assertTrue("Input/output error" in ran.stderr.getvalue())
	547
	548	# Check that an entry is created in the damage table
	549	damage = json.loads(
	550	self.fs.mon_manager.raw_cluster_cmd(
	551	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
	552	"damage", "ls", '--format=json-pretty'))
	553	self.assertEqual(len(damage), 2)
	554	if damage[0]['damage_type'] == "backtrace" :
	555	self.assertEqual(damage[0]['ino'], file2_ino)
	556	self.assertEqual(damage[1]['damage_type'], "dir_frag")
	557	self.assertEqual(damage[1]['ino'], dir2_ino)
	558	else:
	559	self.assertEqual(damage[0]['damage_type'], "dir_frag")
	560	self.assertEqual(damage[0]['ino'], dir2_ino)
	561	self.assertEqual(damage[1]['damage_type'], "backtrace")
	562	self.assertEqual(damage[1]['ino'], file2_ino)
	563
	564	for entry in damage:
	565	self.fs.mon_manager.raw_cluster_cmd(
	566	'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
567	"damage", "rm", str(entry['id']))