[ceph.git] / ceph / qa / tasks / cephfs / test_journal_migration.py


from StringIO import StringIO
from tasks.cephfs.cephfs_test_case import CephFSTestCase
from tasks.workunit import task as workunit

JOURNAL_FORMAT_LEGACY = 0
JOURNAL_FORMAT_RESILIENT = 1


class TestJournalMigration(CephFSTestCase):
    CLIENTS_REQUIRED = 1
    MDSS_REQUIRED = 2

    def test_journal_migration(self):
        old_journal_version = JOURNAL_FORMAT_LEGACY
        new_journal_version = JOURNAL_FORMAT_RESILIENT

        self.mount_a.umount_wait()
        self.fs.mds_stop()

        # Create a filesystem using the older journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
        self.fs.mds_restart()
        self.fs.recreate()

        # Enable standby replay, to cover the bug case #8811 where
        # a standby replay might mistakenly end up trying to rewrite
        # the journal at the same time as an active daemon.
        self.fs.set_allow_standby_replay(True)

        status = self.fs.wait_for_daemons()

        self.assertTrue(self.fs.get_replay(status=status) is not None)

        # Do some client work so that the log is populated with something.
        with self.mount_a.mounted():
            self.mount_a.create_files()
            self.mount_a.check_files()  # sanity, this should always pass

            # Run a more substantial workunit so that the length of the log to be
            # coverted is going span at least a few segments
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
                },
                "timeout": "3h"
            })

        # Modify the ceph.conf to ask the MDS to use the new journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)

        # Restart the MDS.
        self.fs.mds_fail_restart()

        # This ensures that all daemons come up into a valid state
        status = self.fs.wait_for_daemons()

        # Check that files created in the initial client workload are still visible
        # in a client mount.
        with self.mount_a.mounted():
            self.mount_a.check_files()

        # Verify that the journal really has been rewritten.
        journal_version = self.fs.get_journal_version()
        if journal_version != new_journal_version:
            raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
                new_journal_version, journal_version()
            ))

        # Verify that cephfs-journal-tool can now read the rewritten journal
        inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
        if not inspect_out.endswith(": OK"):
            raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
                inspect_out
            ))

        self.fs.journal_tool(["event", "get", "json",
                              "--path", "/tmp/journal.json"], 0)
        p = self.fs.tool_remote.run(
            args=[
                "python",
                "-c",
                "import json; print len(json.load(open('/tmp/journal.json')))"
            ],
            stdout=StringIO())
        event_count = int(p.stdout.getvalue().strip())
        if event_count < 1000:
            # Approximate value of "lots", expected from having run fsstress
            raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))

        # Do some client work to check that writing the log is still working
        with self.mount_a.mounted():
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "3h"
            })

        # Check that both an active and a standby replay are still up
        status = self.fs.status()
        self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
        self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
Commit	Line	Data
7c673cae FG	1
	2	from StringIO import StringIO
	3	from tasks.cephfs.cephfs_test_case import CephFSTestCase
	4	from tasks.workunit import task as workunit
	5
	6	JOURNAL_FORMAT_LEGACY = 0
	7	JOURNAL_FORMAT_RESILIENT = 1
	8
	9
	10	class TestJournalMigration(CephFSTestCase):
	11	CLIENTS_REQUIRED = 1
31f18b77	12	MDSS_REQUIRED = 2
7c673cae FG	13
	14	def test_journal_migration(self):
	15	old_journal_version = JOURNAL_FORMAT_LEGACY
	16	new_journal_version = JOURNAL_FORMAT_RESILIENT
	17
7c673cae FG	18	self.mount_a.umount_wait()
7c673cae FG	19	self.fs.mds_stop()
31f18b77	20
31f18b77 FG	21	# Create a filesystem using the older journal format.
31f18b77 FG	22	self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
11fdf7f2	23	self.fs.mds_restart()
7c673cae	24	self.fs.recreate()
31f18b77	25
11fdf7f2 TL	26	# Enable standby replay, to cover the bug case #8811 where
	27	# a standby replay might mistakenly end up trying to rewrite
	28	# the journal at the same time as an active daemon.
	29	self.fs.set_allow_standby_replay(True)
31f18b77	30
11fdf7f2 TL	31	status = self.fs.wait_for_daemons()
	32
	33	self.assertTrue(self.fs.get_replay(status=status) is not None)
7c673cae FG	34
	35	# Do some client work so that the log is populated with something.
	36	with self.mount_a.mounted():
	37	self.mount_a.create_files()
	38	self.mount_a.check_files() # sanity, this should always pass
	39
	40	# Run a more substantial workunit so that the length of the log to be
	41	# coverted is going span at least a few segments
	42	workunit(self.ctx, {
	43	'clients': {
	44	"client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
	45	},
	46	"timeout": "3h"
	47	})
	48
	49	# Modify the ceph.conf to ask the MDS to use the new journal format.
	50	self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
	51
	52	# Restart the MDS.
11fdf7f2	53	self.fs.mds_fail_restart()
7c673cae FG	54
7c673cae FG	55	# This ensures that all daemons come up into a valid state
11fdf7f2	56	status = self.fs.wait_for_daemons()
7c673cae FG	57
	58	# Check that files created in the initial client workload are still visible
	59	# in a client mount.
	60	with self.mount_a.mounted():
	61	self.mount_a.check_files()
	62
	63	# Verify that the journal really has been rewritten.
	64	journal_version = self.fs.get_journal_version()
	65	if journal_version != new_journal_version:
	66	raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
	67	new_journal_version, journal_version()
	68	))
	69
	70	# Verify that cephfs-journal-tool can now read the rewritten journal
f64942e4	71	inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
7c673cae FG	72	if not inspect_out.endswith(": OK"):
	73	raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
	74	inspect_out
	75	))
	76
f64942e4 AA	77	self.fs.journal_tool(["event", "get", "json",
f64942e4 AA	78	"--path", "/tmp/journal.json"], 0)
7c673cae FG	79	p = self.fs.tool_remote.run(
	80	args=[
	81	"python",
	82	"-c",
	83	"import json; print len(json.load(open('/tmp/journal.json')))"
	84	],
	85	stdout=StringIO())
	86	event_count = int(p.stdout.getvalue().strip())
	87	if event_count < 1000:
	88	# Approximate value of "lots", expected from having run fsstress
	89	raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
	90
31f18b77	91	# Do some client work to check that writing the log is still working
7c673cae FG	92	with self.mount_a.mounted():
	93	workunit(self.ctx, {
	94	'clients': {
	95	"client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
	96	},
	97	"timeout": "3h"
	98	})
31f18b77 FG	99
31f18b77 FG	100	# Check that both an active and a standby replay are still up
11fdf7f2 TL	101	status = self.fs.status()
	102	self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
	103	self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)