]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | |
7c673cae FG |
2 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
3 | from tasks.workunit import task as workunit | |
4 | ||
5 | JOURNAL_FORMAT_LEGACY = 0 | |
6 | JOURNAL_FORMAT_RESILIENT = 1 | |
7 | ||
8 | ||
9 | class TestJournalMigration(CephFSTestCase): | |
10 | CLIENTS_REQUIRED = 1 | |
31f18b77 | 11 | MDSS_REQUIRED = 2 |
7c673cae FG |
12 | |
13 | def test_journal_migration(self): | |
14 | old_journal_version = JOURNAL_FORMAT_LEGACY | |
15 | new_journal_version = JOURNAL_FORMAT_RESILIENT | |
16 | ||
7c673cae FG |
17 | self.mount_a.umount_wait() |
18 | self.fs.mds_stop() | |
31f18b77 | 19 | |
31f18b77 FG |
20 | # Create a filesystem using the older journal format. |
21 | self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) | |
11fdf7f2 | 22 | self.fs.mds_restart() |
7c673cae | 23 | self.fs.recreate() |
31f18b77 | 24 | |
11fdf7f2 TL |
25 | # Enable standby replay, to cover the bug case #8811 where |
26 | # a standby replay might mistakenly end up trying to rewrite | |
27 | # the journal at the same time as an active daemon. | |
28 | self.fs.set_allow_standby_replay(True) | |
31f18b77 | 29 | |
11fdf7f2 TL |
30 | status = self.fs.wait_for_daemons() |
31 | ||
32 | self.assertTrue(self.fs.get_replay(status=status) is not None) | |
7c673cae FG |
33 | |
34 | # Do some client work so that the log is populated with something. | |
35 | with self.mount_a.mounted(): | |
36 | self.mount_a.create_files() | |
37 | self.mount_a.check_files() # sanity, this should always pass | |
38 | ||
39 | # Run a more substantial workunit so that the length of the log to be | |
40 | # coverted is going span at least a few segments | |
41 | workunit(self.ctx, { | |
42 | 'clients': { | |
43 | "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], | |
44 | }, | |
45 | "timeout": "3h" | |
46 | }) | |
47 | ||
48 | # Modify the ceph.conf to ask the MDS to use the new journal format. | |
49 | self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) | |
50 | ||
51 | # Restart the MDS. | |
11fdf7f2 | 52 | self.fs.mds_fail_restart() |
7c673cae FG |
53 | |
54 | # This ensures that all daemons come up into a valid state | |
11fdf7f2 | 55 | status = self.fs.wait_for_daemons() |
7c673cae FG |
56 | |
57 | # Check that files created in the initial client workload are still visible | |
58 | # in a client mount. | |
59 | with self.mount_a.mounted(): | |
60 | self.mount_a.check_files() | |
61 | ||
62 | # Verify that the journal really has been rewritten. | |
63 | journal_version = self.fs.get_journal_version() | |
64 | if journal_version != new_journal_version: | |
65 | raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( | |
66 | new_journal_version, journal_version() | |
67 | )) | |
68 | ||
69 | # Verify that cephfs-journal-tool can now read the rewritten journal | |
f64942e4 | 70 | inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) |
7c673cae FG |
71 | if not inspect_out.endswith(": OK"): |
72 | raise RuntimeError("Unexpected journal-tool result: '{0}'".format( | |
73 | inspect_out | |
74 | )) | |
75 | ||
f64942e4 AA |
76 | self.fs.journal_tool(["event", "get", "json", |
77 | "--path", "/tmp/journal.json"], 0) | |
e306af50 | 78 | p = self.fs.tool_remote.sh([ |
9f95a23c | 79 | "python3", |
7c673cae | 80 | "-c", |
9f95a23c | 81 | "import json; print(len(json.load(open('/tmp/journal.json'))))" |
e306af50 TL |
82 | ]) |
83 | event_count = int(p.strip()) | |
7c673cae FG |
84 | if event_count < 1000: |
85 | # Approximate value of "lots", expected from having run fsstress | |
86 | raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) | |
87 | ||
31f18b77 | 88 | # Do some client work to check that writing the log is still working |
7c673cae FG |
89 | with self.mount_a.mounted(): |
90 | workunit(self.ctx, { | |
91 | 'clients': { | |
92 | "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], | |
93 | }, | |
94 | "timeout": "3h" | |
95 | }) | |
31f18b77 FG |
96 | |
97 | # Check that both an active and a standby replay are still up | |
11fdf7f2 TL |
98 | status = self.fs.status() |
99 | self.assertEqual(len(list(self.fs.get_replays(status=status))), 1) | |
100 | self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1) |