]>
Commit | Line | Data |
---|---|---|
94b18763 FG |
1 | """ |
2 | Test CephFS scrub (distinct from OSD scrub) functionality | |
3 | """ | |
f67539c2 TL |
4 | |
5 | from io import BytesIO | |
94b18763 | 6 | import logging |
94b18763 FG |
7 | from collections import namedtuple |
8 | ||
9f95a23c | 9 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
94b18763 FG |
10 | |
11 | log = logging.getLogger(__name__) | |
12 | ||
13 | ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) | |
14 | ||
15 | ||
1adf2230 | 16 | class Workload(CephFSTestCase): |
f67539c2 | 17 | def __init__(self, test, filesystem, mount): |
e306af50 | 18 | super().__init__() |
f67539c2 | 19 | self._test = test |
94b18763 FG |
20 | self._mount = mount |
21 | self._filesystem = filesystem | |
22 | self._initial_state = None | |
23 | ||
24 | # Accumulate backtraces for every failed validation, and return them. Backtraces | |
25 | # are rather verbose, but we only see them when something breaks, and they | |
26 | # let us see which check failed without having to decorate each check with | |
27 | # a string | |
28 | self._errors = [] | |
29 | ||
94b18763 FG |
30 | def write(self): |
31 | """ | |
32 | Write the workload files to the mount | |
33 | """ | |
34 | raise NotImplementedError() | |
35 | ||
36 | def validate(self): | |
37 | """ | |
38 | Read from the mount and validate that the workload files are present (i.e. have | |
39 | survived or been reconstructed from the test scenario) | |
40 | """ | |
41 | raise NotImplementedError() | |
42 | ||
43 | def damage(self): | |
44 | """ | |
45 | Damage the filesystem pools in ways that will be interesting to recover from. By | |
46 | default just wipe everything in the metadata pool | |
47 | """ | |
48 | # Delete every object in the metadata pool | |
f67539c2 TL |
49 | pool = self._filesystem.get_metadata_pool_name() |
50 | self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it']) | |
94b18763 FG |
51 | |
52 | def flush(self): | |
53 | """ | |
54 | Called after client unmount, after write: flush whatever you want | |
55 | """ | |
56 | self._filesystem.mds_asok(["flush", "journal"]) | |
57 | ||
58 | ||
59 | class BacktraceWorkload(Workload): | |
60 | """ | |
61 | Single file, single directory, wipe the backtrace and check it. | |
62 | """ | |
63 | def write(self): | |
64 | self._mount.run_shell(["mkdir", "subdir"]) | |
65 | self._mount.write_n_mb("subdir/sixmegs", 6) | |
66 | ||
67 | def validate(self): | |
68 | st = self._mount.stat("subdir/sixmegs") | |
69 | self._filesystem.mds_asok(["flush", "journal"]) | |
70 | bt = self._filesystem.read_backtrace(st['st_ino']) | |
71 | parent = bt['ancestors'][0]['dname'] | |
1adf2230 | 72 | self.assertEqual(parent, 'sixmegs') |
94b18763 FG |
73 | return self._errors |
74 | ||
75 | def damage(self): | |
76 | st = self._mount.stat("subdir/sixmegs") | |
77 | self._filesystem.mds_asok(["flush", "journal"]) | |
78 | self._filesystem._write_data_xattr(st['st_ino'], "parent", "") | |
79 | ||
e306af50 TL |
80 | def create_files(self, nfiles=1000): |
81 | self._mount.create_n_files("scrub-new-files/file", nfiles) | |
82 | ||
94b18763 FG |
83 | |
84 | class DupInodeWorkload(Workload): | |
85 | """ | |
86 | Duplicate an inode and try scrubbing it twice." | |
87 | """ | |
88 | ||
89 | def write(self): | |
90 | self._mount.run_shell(["mkdir", "parent"]) | |
91 | self._mount.run_shell(["mkdir", "parent/child"]) | |
92 | self._mount.write_n_mb("parent/parentfile", 6) | |
93 | self._mount.write_n_mb("parent/child/childfile", 6) | |
94 | ||
95 | def damage(self): | |
e306af50 | 96 | self._mount.umount_wait() |
94b18763 | 97 | self._filesystem.mds_asok(["flush", "journal"]) |
f67539c2 TL |
98 | self._filesystem.fail() |
99 | d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"]) | |
100 | self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d)) | |
101 | self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True) | |
102 | self._filesystem.set_joinable() | |
94b18763 FG |
103 | self._filesystem.wait_for_daemons() |
104 | ||
105 | def validate(self): | |
b3b6e05e | 106 | out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"]) |
1adf2230 | 107 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
108 | self.assertEqual(out_json["return_code"], 0) |
109 | self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
1adf2230 | 110 | self.assertTrue(self._filesystem.are_daemons_healthy()) |
94b18763 FG |
111 | return self._errors |
112 | ||
113 | ||
114 | class TestScrub(CephFSTestCase): | |
115 | MDSS_REQUIRED = 1 | |
116 | ||
e306af50 TL |
117 | def setUp(self): |
118 | super().setUp() | |
119 | ||
94b18763 FG |
120 | def _scrub(self, workload, workers=1): |
121 | """ | |
122 | That when all objects in metadata pool are removed, we can rebuild a metadata pool | |
123 | based on the contents of a data pool, and a client can see and read our files. | |
124 | """ | |
125 | ||
126 | # First, inject some files | |
127 | ||
128 | workload.write() | |
129 | ||
130 | # are off by default, but in QA we need to explicitly disable them) | |
131 | self.fs.set_ceph_conf('mds', 'mds verify scatter', False) | |
132 | self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) | |
133 | ||
134 | # Apply any data damage the workload wants | |
135 | workload.damage() | |
136 | ||
b3b6e05e | 137 | out_json = self.fs.run_scrub(["start", "/", "recursive,repair"]) |
1adf2230 | 138 | self.assertNotEqual(out_json, None) |
f67539c2 TL |
139 | self.assertEqual(out_json["return_code"], 0) |
140 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
94b18763 FG |
141 | |
142 | # See that the files are present and correct | |
143 | errors = workload.validate() | |
144 | if errors: | |
145 | log.error("Validation errors found: {0}".format(len(errors))) | |
146 | for e in errors: | |
147 | log.error(e.exception) | |
148 | log.error(e.backtrace) | |
149 | raise AssertionError("Validation failed, first error: {0}\n{1}".format( | |
150 | errors[0].exception, errors[0].backtrace | |
151 | )) | |
152 | ||
e306af50 TL |
153 | def _get_damage_count(self, damage_type='backtrace'): |
154 | out_json = self.fs.rank_tell(["damage", "ls"]) | |
155 | self.assertNotEqual(out_json, None) | |
156 | ||
157 | damage_count = 0 | |
158 | for it in out_json: | |
159 | if it['damage_type'] == damage_type: | |
160 | damage_count += 1 | |
161 | return damage_count | |
162 | ||
163 | def _scrub_new_files(self, workload): | |
164 | """ | |
165 | That scrubbing new files does not lead to errors | |
166 | """ | |
167 | workload.create_files(1000) | |
f67539c2 | 168 | self.fs.wait_until_scrub_complete() |
e306af50 TL |
169 | self.assertEqual(self._get_damage_count(), 0) |
170 | ||
171 | def test_scrub_backtrace_for_new_files(self): | |
f67539c2 | 172 | self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a)) |
e306af50 | 173 | |
94b18763 | 174 | def test_scrub_backtrace(self): |
f67539c2 | 175 | self._scrub(BacktraceWorkload(self, self.fs, self.mount_a)) |
94b18763 FG |
176 | |
177 | def test_scrub_dup_inode(self): | |
f67539c2 | 178 | self._scrub(DupInodeWorkload(self, self.fs, self.mount_a)) |