]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | from StringIO import StringIO | |
3 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
4 | from tasks.workunit import task as workunit | |
5 | ||
6 | JOURNAL_FORMAT_LEGACY = 0 | |
7 | JOURNAL_FORMAT_RESILIENT = 1 | |
8 | ||
9 | ||
10 | class TestJournalMigration(CephFSTestCase): | |
11 | CLIENTS_REQUIRED = 1 | |
31f18b77 | 12 | MDSS_REQUIRED = 2 |
7c673cae FG |
13 | |
14 | def test_journal_migration(self): | |
15 | old_journal_version = JOURNAL_FORMAT_LEGACY | |
16 | new_journal_version = JOURNAL_FORMAT_RESILIENT | |
17 | ||
31f18b77 FG |
18 | # Pick out two daemons to use |
19 | mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2]) | |
7c673cae | 20 | |
7c673cae FG |
21 | self.mount_a.umount_wait() |
22 | self.fs.mds_stop() | |
31f18b77 FG |
23 | |
24 | # Enable standby replay, to cover the bug case #8811 where | |
25 | # a standby replay might mistakenly end up trying to rewrite | |
26 | # the journal at the same time as an active daemon. | |
27 | self.fs.set_ceph_conf('mds', 'mds standby replay', "true") | |
28 | self.fs.set_ceph_conf('mds', 'mds standby for rank', "0") | |
29 | ||
30 | # Create a filesystem using the older journal format. | |
31 | self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) | |
7c673cae | 32 | self.fs.recreate() |
31f18b77 | 33 | self.fs.mds_restart(mds_id=mds_a) |
7c673cae | 34 | self.fs.wait_for_daemons() |
31f18b77 FG |
35 | self.assertEqual(self.fs.get_active_names(), [mds_a]) |
36 | ||
37 | def replay_names(): | |
38 | return [s['name'] | |
39 | for s in self.fs.status().get_replays(fscid = self.fs.id)] | |
40 | ||
41 | # Start the standby and wait for it to come up | |
42 | self.fs.mds_restart(mds_id=mds_b) | |
43 | self.wait_until_equal( | |
44 | replay_names, | |
45 | [mds_b], | |
46 | timeout = 30) | |
7c673cae FG |
47 | |
48 | # Do some client work so that the log is populated with something. | |
49 | with self.mount_a.mounted(): | |
50 | self.mount_a.create_files() | |
51 | self.mount_a.check_files() # sanity, this should always pass | |
52 | ||
53 | # Run a more substantial workunit so that the length of the log to be | |
54 | # coverted is going span at least a few segments | |
55 | workunit(self.ctx, { | |
56 | 'clients': { | |
57 | "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], | |
58 | }, | |
59 | "timeout": "3h" | |
60 | }) | |
61 | ||
62 | # Modify the ceph.conf to ask the MDS to use the new journal format. | |
63 | self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) | |
64 | ||
65 | # Restart the MDS. | |
31f18b77 FG |
66 | self.fs.mds_fail_restart(mds_id=mds_a) |
67 | self.fs.mds_fail_restart(mds_id=mds_b) | |
7c673cae FG |
68 | |
69 | # This ensures that all daemons come up into a valid state | |
70 | self.fs.wait_for_daemons() | |
71 | ||
72 | # Check that files created in the initial client workload are still visible | |
73 | # in a client mount. | |
74 | with self.mount_a.mounted(): | |
75 | self.mount_a.check_files() | |
76 | ||
77 | # Verify that the journal really has been rewritten. | |
78 | journal_version = self.fs.get_journal_version() | |
79 | if journal_version != new_journal_version: | |
80 | raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( | |
81 | new_journal_version, journal_version() | |
82 | )) | |
83 | ||
84 | # Verify that cephfs-journal-tool can now read the rewritten journal | |
85 | inspect_out = self.fs.journal_tool(["journal", "inspect"]) | |
86 | if not inspect_out.endswith(": OK"): | |
87 | raise RuntimeError("Unexpected journal-tool result: '{0}'".format( | |
88 | inspect_out | |
89 | )) | |
90 | ||
91 | self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"]) | |
92 | p = self.fs.tool_remote.run( | |
93 | args=[ | |
94 | "python", | |
95 | "-c", | |
96 | "import json; print len(json.load(open('/tmp/journal.json')))" | |
97 | ], | |
98 | stdout=StringIO()) | |
99 | event_count = int(p.stdout.getvalue().strip()) | |
100 | if event_count < 1000: | |
101 | # Approximate value of "lots", expected from having run fsstress | |
102 | raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) | |
103 | ||
31f18b77 | 104 | # Do some client work to check that writing the log is still working |
7c673cae FG |
105 | with self.mount_a.mounted(): |
106 | workunit(self.ctx, { | |
107 | 'clients': { | |
108 | "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], | |
109 | }, | |
110 | "timeout": "3h" | |
111 | }) | |
31f18b77 FG |
112 | |
113 | # Check that both an active and a standby replay are still up | |
114 | self.assertEqual(len(replay_names()), 1) | |
115 | self.assertEqual(len(self.fs.get_active_names()), 1) | |
116 | self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running()) | |
117 | self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running()) | |
118 |