[ceph.git] / ceph / qa / tasks / cephfs / test_recovery_pool.py


"""
Test our tools for recovering metadata from the data pool into an alternate pool
"""
import json

import logging
import os
from textwrap import dedent
import traceback
from collections import namedtuple, defaultdict

from teuthology.orchestra.run import CommandFailedError
from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology

log = logging.getLogger(__name__)


ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])


class OverlayWorkload(object):
    def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
        self._orig_fs = orig_fs
        self._recovery_fs = recovery_fs
        self._orig_mount = orig_mount
        self._recovery_mount = recovery_mount
        self._initial_state = None

        # Accumulate backtraces for every failed validation, and return them.  Backtraces
        # are rather verbose, but we only see them when something breaks, and they
        # let us see which check failed without having to decorate each check with
        # a string
        self._errors = []

    def assert_equal(self, a, b):
        try:
            if a != b:
                raise AssertionError("{0} != {1}".format(a, b))
        except AssertionError as e:
            self._errors.append(
                ValidationError(e, traceback.format_exc(3))
            )

    def write(self):
        """
        Write the workload files to the mount
        """
        raise NotImplementedError()

    def validate(self):
        """
        Read from the mount and validate that the workload files are present (i.e. have
        survived or been reconstructed from the test scenario)
        """
        raise NotImplementedError()

    def damage(self):
        """
        Damage the filesystem pools in ways that will be interesting to recover from.  By
        default just wipe everything in the metadata pool
        """
        # Delete every object in the metadata pool
        objects = self._orig_fs.rados(["ls"]).split("\n")
        for o in objects:
            self._orig_fs.rados(["rm", o])

    def flush(self):
        """
        Called after client unmount, after write: flush whatever you want
        """
        self._orig_fs.mds_asok(["flush", "journal"])
        self._recovery_fs.mds_asok(["flush", "journal"])


class SimpleOverlayWorkload(OverlayWorkload):
    """
    Single file, single directory, check that it gets recovered and so does its size
    """
    def write(self):
        self._orig_mount.run_shell(["mkdir", "subdir"])
        self._orig_mount.write_n_mb("subdir/sixmegs", 6)
        self._initial_state = self._orig_mount.stat("subdir/sixmegs")

    def validate(self):
        self._recovery_mount.run_shell(["ls", "subdir"])
        st = self._recovery_mount.stat("subdir/sixmegs")
        self.assert_equal(st['st_size'], self._initial_state['st_size'])
        return self._errors

class TestRecoveryPool(CephFSTestCase):
    MDSS_REQUIRED = 2
    CLIENTS_REQUIRED = 2
    REQUIRE_RECOVERY_FILESYSTEM = True

    def is_marked_damaged(self, rank):
        mds_map = self.fs.get_mds_map()
        return rank in mds_map['damaged']

    def _rebuild_metadata(self, workload, other_pool=None, workers=1):
        """
        That when all objects in metadata pool are removed, we can rebuild a metadata pool
        based on the contents of a data pool, and a client can see and read our files.
        """

        # First, inject some files

        workload.write()

        # Unmount the client and flush the journal: the tool should also cope with
        # situations where there is dirty metadata, but we'll test that separately
        self.mount_a.umount_wait()
        self.mount_b.umount_wait()
        workload.flush()

        # Create the alternate pool if requested
        recovery_fs = self.recovery_fs.name
        recovery_pool = self.recovery_fs.get_metadata_pool_name()
        self.recovery_fs.data_scan(['init', '--force-init',
                                    '--filesystem', recovery_fs,
                                    '--alternate-pool', recovery_pool])
        self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
        self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
        self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
        self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])

        # Stop the MDS
        self.fs.mds_stop()
        self.fs.mds_fail()

        # After recovery, we need the MDS to not be strict about stats (in production these options
        # are off by default, but in QA we need to explicitly disable them)
        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)

        # Apply any data damage the workload wants
        workload.damage()

        # Reset the MDS map in case multiple ranks were in play: recovery procedure
        # only understands how to rebuild metadata under rank 0
        self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
                '--yes-i-really-mean-it')

        self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
        self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
        self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])

        # Run the recovery procedure
        if False:
            with self.assertRaises(CommandFailedError):
                # Normal reset should fail when no objects are present, we'll use --force instead
                self.fs.journal_tool(["journal", "reset"], 0)

        self.fs.mds_stop()
        self.fs.data_scan(['scan_extents', '--alternate-pool',
                           recovery_pool, '--filesystem', self.fs.name,
                           self.fs.get_data_pool_name()])
        self.fs.data_scan(['scan_inodes', '--alternate-pool',
                           recovery_pool, '--filesystem', self.fs.name,
                           '--force-corrupt', '--force-init',
                           self.fs.get_data_pool_name()])
        self.fs.journal_tool(['event', 'recover_dentries', 'list',
                              '--alternate-pool', recovery_pool], 0)

        self.fs.data_scan(['init', '--force-init', '--filesystem',
                           self.fs.name])
        self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
                           '--force-corrupt', '--force-init',
                           self.fs.get_data_pool_name()])
        self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)

        self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
        self.fs.journal_tool(['journal', 'reset', '--force'], 0)
        self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
                                            recovery_fs + ":0")

        # Mark the MDS repaired
        self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')

        # Start the MDS
        self.fs.mds_restart()
        self.recovery_fs.mds_restart()
        self.fs.wait_for_daemons()
        self.recovery_fs.wait_for_daemons()
        status = self.recovery_fs.status()
        for rank in self.recovery_fs.get_ranks(status=status):
            self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
                                                'injectargs', '--debug-mds=20')
            self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
        log.info(str(self.mds_cluster.status()))

        # Mount a client
        self.mount_a.mount()
        self.mount_b.mount(mount_fs_name=recovery_fs)
        self.mount_a.wait_until_mounted()
        self.mount_b.wait_until_mounted()

        # See that the files are present and correct
        errors = workload.validate()
        if errors:
            log.error("Validation errors found: {0}".format(len(errors)))
            for e in errors:
                log.error(e.exception)
                log.error(e.backtrace)
            raise AssertionError("Validation failed, first error: {0}\n{1}".format(
                errors[0].exception, errors[0].backtrace
            ))

    def test_rebuild_simple(self):
        self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
                                                     self.mount_a, self.mount_b))
Commit	Line	Data
181888fb FG	1
	2	"""
	3	Test our tools for recovering metadata from the data pool into an alternate pool
	4	"""
	5	import json
	6
	7	import logging
	8	import os
	9	from textwrap import dedent
	10	import traceback
	11	from collections import namedtuple, defaultdict
	12
	13	from teuthology.orchestra.run import CommandFailedError
	14	from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
	15
	16	log = logging.getLogger(__name__)
	17
	18
	19	ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
	20
	21
	22	class OverlayWorkload(object):
	23	def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
	24	self._orig_fs = orig_fs
	25	self._recovery_fs = recovery_fs
	26	self._orig_mount = orig_mount
	27	self._recovery_mount = recovery_mount
	28	self._initial_state = None
	29
	30	# Accumulate backtraces for every failed validation, and return them. Backtraces
	31	# are rather verbose, but we only see them when something breaks, and they
	32	# let us see which check failed without having to decorate each check with
	33	# a string
	34	self._errors = []
	35
	36	def assert_equal(self, a, b):
	37	try:
	38	if a != b:
	39	raise AssertionError("{0} != {1}".format(a, b))
	40	except AssertionError as e:
	41	self._errors.append(
	42	ValidationError(e, traceback.format_exc(3))
	43	)
	44
	45	def write(self):
	46	"""
	47	Write the workload files to the mount
	48	"""
	49	raise NotImplementedError()
	50
	51	def validate(self):
	52	"""
	53	Read from the mount and validate that the workload files are present (i.e. have
	54	survived or been reconstructed from the test scenario)
	55	"""
	56	raise NotImplementedError()
	57
	58	def damage(self):
	59	"""
	60	Damage the filesystem pools in ways that will be interesting to recover from. By
	61	default just wipe everything in the metadata pool
	62	"""
	63	# Delete every object in the metadata pool
	64	objects = self._orig_fs.rados(["ls"]).split("\n")
65	for o in objects:
66	self._orig_fs.rados(["rm", o])
67
68	def flush(self):
69	"""
70	Called after client unmount, after write: flush whatever you want
71	"""
72	self._orig_fs.mds_asok(["flush", "journal"])
73	self._recovery_fs.mds_asok(["flush", "journal"])
74
75
76	class SimpleOverlayWorkload(OverlayWorkload):
77	"""
78	Single file, single directory, check that it gets recovered and so does its size
79	"""
80	def write(self):
81	self._orig_mount.run_shell(["mkdir", "subdir"])
82	self._orig_mount.write_n_mb("subdir/sixmegs", 6)
83	self._initial_state = self._orig_mount.stat("subdir/sixmegs")
84
85	def validate(self):
86	self._recovery_mount.run_shell(["ls", "subdir"])
87	st = self._recovery_mount.stat("subdir/sixmegs")
88	self.assert_equal(st['st_size'], self._initial_state['st_size'])
89	return self._errors
90
91	class TestRecoveryPool(CephFSTestCase):
92	MDSS_REQUIRED = 2
93	CLIENTS_REQUIRED = 2
94	REQUIRE_RECOVERY_FILESYSTEM = True
95
96	def is_marked_damaged(self, rank):
97	mds_map = self.fs.get_mds_map()
98	return rank in mds_map['damaged']
99
100	def _rebuild_metadata(self, workload, other_pool=None, workers=1):
101	"""
102	That when all objects in metadata pool are removed, we can rebuild a metadata pool
103	based on the contents of a data pool, and a client can see and read our files.
104	"""
105
106	# First, inject some files
107
108	workload.write()
109
110	# Unmount the client and flush the journal: the tool should also cope with
111	# situations where there is dirty metadata, but we'll test that separately
112	self.mount_a.umount_wait()
113	self.mount_b.umount_wait()
114	workload.flush()
115
116	# Create the alternate pool if requested
117	recovery_fs = self.recovery_fs.name
118	recovery_pool = self.recovery_fs.get_metadata_pool_name()
119	self.recovery_fs.data_scan(['init', '--force-init',
120	'--filesystem', recovery_fs,
121	'--alternate-pool', recovery_pool])
122	self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
123	self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
124	self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
125	self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
126
127	# Stop the MDS
128	self.fs.mds_stop()
129	self.fs.mds_fail()
130
131	# After recovery, we need the MDS to not be strict about stats (in production these options
132	# are off by default, but in QA we need to explicitly disable them)
133	self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
134	self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
135
136	# Apply any data damage the workload wants
137	workload.damage()
138
139	# Reset the MDS map in case multiple ranks were in play: recovery procedure
140	# only understands how to rebuild metadata under rank 0
141	self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
142	'--yes-i-really-mean-it')
143
181888fb FG	144	self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
	145	self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
	146	self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
	147
	148	# Run the recovery procedure
	149	if False:
	150	with self.assertRaises(CommandFailedError):
	151	# Normal reset should fail when no objects are present, we'll use --force instead
f64942e4	152	self.fs.journal_tool(["journal", "reset"], 0)
181888fb FG	153
	154	self.fs.mds_stop()
	155	self.fs.data_scan(['scan_extents', '--alternate-pool',
	156	recovery_pool, '--filesystem', self.fs.name,
	157	self.fs.get_data_pool_name()])
	158	self.fs.data_scan(['scan_inodes', '--alternate-pool',
	159	recovery_pool, '--filesystem', self.fs.name,
	160	'--force-corrupt', '--force-init',
	161	self.fs.get_data_pool_name()])
f64942e4 AA	162	self.fs.journal_tool(['event', 'recover_dentries', 'list',
f64942e4 AA	163	'--alternate-pool', recovery_pool], 0)
181888fb FG	164
	165	self.fs.data_scan(['init', '--force-init', '--filesystem',
	166	self.fs.name])
	167	self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
	168	'--force-corrupt', '--force-init',
	169	self.fs.get_data_pool_name()])
f64942e4	170	self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
181888fb	171
f64942e4 AA	172	self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
f64942e4 AA	173	self.fs.journal_tool(['journal', 'reset', '--force'], 0)
181888fb FG	174	self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
	175	recovery_fs + ":0")
	176
	177	# Mark the MDS repaired
	178	self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
	179
	180	# Start the MDS
	181	self.fs.mds_restart()
	182	self.recovery_fs.mds_restart()
	183	self.fs.wait_for_daemons()
	184	self.recovery_fs.wait_for_daemons()
f64942e4 AA	185	status = self.recovery_fs.status()
	186	for rank in self.recovery_fs.get_ranks(status=status):
	187	self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
181888fb	188	'injectargs', '--debug-mds=20')
11fdf7f2	189	self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
181888fb FG	190	log.info(str(self.mds_cluster.status()))
	191
	192	# Mount a client
	193	self.mount_a.mount()
	194	self.mount_b.mount(mount_fs_name=recovery_fs)
	195	self.mount_a.wait_until_mounted()
	196	self.mount_b.wait_until_mounted()
	197
	198	# See that the files are present and correct
	199	errors = workload.validate()
	200	if errors:
	201	log.error("Validation errors found: {0}".format(len(errors)))
	202	for e in errors:
	203	log.error(e.exception)
	204	log.error(e.backtrace)
	205	raise AssertionError("Validation failed, first error: {0}\n{1}".format(
	206	errors[0].exception, errors[0].backtrace
	207	))
	208
	209	def test_rebuild_simple(self):
	210	self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
	211	self.mount_a, self.mount_b))