]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | from StringIO import StringIO | |
3 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
4 | from tasks.workunit import task as workunit | |
5 | ||
6 | JOURNAL_FORMAT_LEGACY = 0 | |
7 | JOURNAL_FORMAT_RESILIENT = 1 | |
8 | ||
9 | ||
10 | class TestJournalMigration(CephFSTestCase): | |
11 | CLIENTS_REQUIRED = 1 | |
31f18b77 | 12 | MDSS_REQUIRED = 2 |
7c673cae FG |
13 | |
14 | def test_journal_migration(self): | |
15 | old_journal_version = JOURNAL_FORMAT_LEGACY | |
16 | new_journal_version = JOURNAL_FORMAT_RESILIENT | |
17 | ||
7c673cae FG |
18 | self.mount_a.umount_wait() |
19 | self.fs.mds_stop() | |
31f18b77 | 20 | |
31f18b77 FG |
21 | # Create a filesystem using the older journal format. |
22 | self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) | |
11fdf7f2 | 23 | self.fs.mds_restart() |
7c673cae | 24 | self.fs.recreate() |
31f18b77 | 25 | |
11fdf7f2 TL |
26 | # Enable standby replay, to cover the bug case #8811 where |
27 | # a standby replay might mistakenly end up trying to rewrite | |
28 | # the journal at the same time as an active daemon. | |
29 | self.fs.set_allow_standby_replay(True) | |
31f18b77 | 30 | |
11fdf7f2 TL |
31 | status = self.fs.wait_for_daemons() |
32 | ||
33 | self.assertTrue(self.fs.get_replay(status=status) is not None) | |
7c673cae FG |
34 | |
35 | # Do some client work so that the log is populated with something. | |
36 | with self.mount_a.mounted(): | |
37 | self.mount_a.create_files() | |
38 | self.mount_a.check_files() # sanity, this should always pass | |
39 | ||
40 | # Run a more substantial workunit so that the length of the log to be | |
41 | # coverted is going span at least a few segments | |
42 | workunit(self.ctx, { | |
43 | 'clients': { | |
44 | "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], | |
45 | }, | |
46 | "timeout": "3h" | |
47 | }) | |
48 | ||
49 | # Modify the ceph.conf to ask the MDS to use the new journal format. | |
50 | self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) | |
51 | ||
52 | # Restart the MDS. | |
11fdf7f2 | 53 | self.fs.mds_fail_restart() |
7c673cae FG |
54 | |
55 | # This ensures that all daemons come up into a valid state | |
11fdf7f2 | 56 | status = self.fs.wait_for_daemons() |
7c673cae FG |
57 | |
58 | # Check that files created in the initial client workload are still visible | |
59 | # in a client mount. | |
60 | with self.mount_a.mounted(): | |
61 | self.mount_a.check_files() | |
62 | ||
63 | # Verify that the journal really has been rewritten. | |
64 | journal_version = self.fs.get_journal_version() | |
65 | if journal_version != new_journal_version: | |
66 | raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( | |
67 | new_journal_version, journal_version() | |
68 | )) | |
69 | ||
70 | # Verify that cephfs-journal-tool can now read the rewritten journal | |
f64942e4 | 71 | inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) |
7c673cae FG |
72 | if not inspect_out.endswith(": OK"): |
73 | raise RuntimeError("Unexpected journal-tool result: '{0}'".format( | |
74 | inspect_out | |
75 | )) | |
76 | ||
f64942e4 AA |
77 | self.fs.journal_tool(["event", "get", "json", |
78 | "--path", "/tmp/journal.json"], 0) | |
7c673cae FG |
79 | p = self.fs.tool_remote.run( |
80 | args=[ | |
9f95a23c | 81 | "python3", |
7c673cae | 82 | "-c", |
9f95a23c | 83 | "import json; print(len(json.load(open('/tmp/journal.json'))))" |
7c673cae FG |
84 | ], |
85 | stdout=StringIO()) | |
86 | event_count = int(p.stdout.getvalue().strip()) | |
87 | if event_count < 1000: | |
88 | # Approximate value of "lots", expected from having run fsstress | |
89 | raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) | |
90 | ||
31f18b77 | 91 | # Do some client work to check that writing the log is still working |
7c673cae FG |
92 | with self.mount_a.mounted(): |
93 | workunit(self.ctx, { | |
94 | 'clients': { | |
95 | "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], | |
96 | }, | |
97 | "timeout": "3h" | |
98 | }) | |
31f18b77 FG |
99 | |
100 | # Check that both an active and a standby replay are still up | |
11fdf7f2 TL |
101 | status = self.fs.status() |
102 | self.assertEqual(len(list(self.fs.get_replays(status=status))), 1) | |
103 | self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1) |