]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_journal_migration.py
update sources to v12.1.0
[ceph.git] / ceph / qa / tasks / cephfs / test_journal_migration.py
1
2 from StringIO import StringIO
3 from tasks.cephfs.cephfs_test_case import CephFSTestCase
4 from tasks.workunit import task as workunit
5
6 JOURNAL_FORMAT_LEGACY = 0
7 JOURNAL_FORMAT_RESILIENT = 1
8
9
10 class TestJournalMigration(CephFSTestCase):
11 CLIENTS_REQUIRED = 1
12 MDSS_REQUIRED = 2
13
14 def test_journal_migration(self):
15 old_journal_version = JOURNAL_FORMAT_LEGACY
16 new_journal_version = JOURNAL_FORMAT_RESILIENT
17
18 # Pick out two daemons to use
19 mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2])
20
21 self.mount_a.umount_wait()
22 self.fs.mds_stop()
23
24 # Enable standby replay, to cover the bug case #8811 where
25 # a standby replay might mistakenly end up trying to rewrite
26 # the journal at the same time as an active daemon.
27 self.fs.set_ceph_conf('mds', 'mds standby replay', "true")
28 self.fs.set_ceph_conf('mds', 'mds standby for rank', "0")
29
30 # Create a filesystem using the older journal format.
31 self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
32 self.fs.recreate()
33 self.fs.mds_restart(mds_id=mds_a)
34 self.fs.wait_for_daemons()
35 self.assertEqual(self.fs.get_active_names(), [mds_a])
36
37 def replay_names():
38 return [s['name']
39 for s in self.fs.status().get_replays(fscid = self.fs.id)]
40
41 # Start the standby and wait for it to come up
42 self.fs.mds_restart(mds_id=mds_b)
43 self.wait_until_equal(
44 replay_names,
45 [mds_b],
46 timeout = 30)
47
48 # Do some client work so that the log is populated with something.
49 with self.mount_a.mounted():
50 self.mount_a.create_files()
51 self.mount_a.check_files() # sanity, this should always pass
52
53 # Run a more substantial workunit so that the length of the log to be
54 # coverted is going span at least a few segments
55 workunit(self.ctx, {
56 'clients': {
57 "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
58 },
59 "timeout": "3h"
60 })
61
62 # Modify the ceph.conf to ask the MDS to use the new journal format.
63 self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
64
65 # Restart the MDS.
66 self.fs.mds_fail_restart(mds_id=mds_a)
67 self.fs.mds_fail_restart(mds_id=mds_b)
68
69 # This ensures that all daemons come up into a valid state
70 self.fs.wait_for_daemons()
71
72 # Check that files created in the initial client workload are still visible
73 # in a client mount.
74 with self.mount_a.mounted():
75 self.mount_a.check_files()
76
77 # Verify that the journal really has been rewritten.
78 journal_version = self.fs.get_journal_version()
79 if journal_version != new_journal_version:
80 raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
81 new_journal_version, journal_version()
82 ))
83
84 # Verify that cephfs-journal-tool can now read the rewritten journal
85 inspect_out = self.fs.journal_tool(["journal", "inspect"])
86 if not inspect_out.endswith(": OK"):
87 raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
88 inspect_out
89 ))
90
91 self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"])
92 p = self.fs.tool_remote.run(
93 args=[
94 "python",
95 "-c",
96 "import json; print len(json.load(open('/tmp/journal.json')))"
97 ],
98 stdout=StringIO())
99 event_count = int(p.stdout.getvalue().strip())
100 if event_count < 1000:
101 # Approximate value of "lots", expected from having run fsstress
102 raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
103
104 # Do some client work to check that writing the log is still working
105 with self.mount_a.mounted():
106 workunit(self.ctx, {
107 'clients': {
108 "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
109 },
110 "timeout": "3h"
111 })
112
113 # Check that both an active and a standby replay are still up
114 self.assertEqual(len(replay_names()), 1)
115 self.assertEqual(len(self.fs.get_active_names()), 1)
116 self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running())
117 self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running())
118