]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | import logging |
2 | import errno | |
3 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
4 | from teuthology.contextutil import safe_while | |
5 | from teuthology.orchestra.run import CommandFailedError | |
6 | ||
7 | log = logging.getLogger(__name__) | |
8 | ||
9 | class TestScrub2(CephFSTestCase): | |
10 | MDSS_REQUIRED = 3 | |
11 | CLIENTS_REQUIRED = 1 | |
12 | ||
13 | def _check_scrub_status(self, result=None, reverse=False): | |
14 | self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=1, | |
15 | sleep=5, timeout=30, | |
16 | reverse=reverse), True) | |
17 | self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=2, | |
18 | sleep=5, timeout=30, | |
19 | reverse=reverse), True) | |
20 | self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=0, | |
21 | sleep=5, timeout=30, | |
22 | reverse=reverse), True) | |
23 | ||
24 | def _check_task_status_na(self, timo=120): | |
25 | """ check absence of scrub status in ceph status """ | |
26 | with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: | |
27 | while proceed(): | |
28 | active = self.fs.get_active_names() | |
29 | log.debug("current active={0}".format(active)) | |
30 | task_status = self.fs.get_task_status("scrub status") | |
31 | if not active[0] in task_status: | |
32 | return True | |
33 | ||
34 | def _check_task_status(self, expected_status, timo=120): | |
35 | """ check scrub status for current active mds in ceph status """ | |
36 | with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: | |
37 | while proceed(): | |
38 | active = self.fs.get_active_names() | |
39 | log.debug("current active={0}".format(active)) | |
40 | task_status = self.fs.get_task_status("scrub status") | |
41 | try: | |
42 | if task_status[active[0]].startswith(expected_status): | |
43 | return True | |
44 | except KeyError: | |
45 | pass | |
46 | ||
47 | def _find_path_inos(self, root_path): | |
48 | inos = [] | |
49 | p = self.mount_a.run_shell(["find", root_path]) | |
50 | paths = p.stdout.getvalue().strip().split() | |
51 | for path in paths: | |
52 | inos.append(self.mount_a.path_to_ino(path)) | |
53 | return inos | |
54 | ||
55 | def _setup_subtrees(self): | |
56 | self.fs.set_max_mds(3) | |
57 | self.fs.wait_for_daemons() | |
58 | status = self.fs.status() | |
59 | ||
60 | path = 'd1/d2/d3/d4/d5/d6/d7/d8' | |
61 | self.mount_a.run_shell(['mkdir', '-p', path]) | |
62 | self.mount_a.run_shell(['sync', path]) | |
63 | ||
64 | self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0") | |
65 | self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1") | |
66 | self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2") | |
67 | ||
68 | self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0) | |
69 | self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1) | |
70 | self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2) | |
71 | ||
72 | for rank in range(3): | |
73 | self.fs.rank_tell(["flush", "journal"], rank) | |
74 | ||
75 | def test_apply_tag(self): | |
76 | self._setup_subtrees() | |
77 | inos = self._find_path_inos('d1/d2/d3/') | |
78 | ||
79 | tag = "tag123" | |
80 | out_json = self.fs.rank_tell(["tag", "path", "/d1/d2/d3", tag], 0) | |
81 | self.assertNotEqual(out_json, None) | |
82 | self.assertEqual(out_json["return_code"], 0) | |
83 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
84 | ||
85 | def assertTagged(ino): | |
86 | file_obj_name = "{0:x}.00000000".format(ino) | |
87 | self.fs.radosm(["getxattr", file_obj_name, "scrub_tag"]) | |
88 | ||
89 | for ino in inos: | |
90 | assertTagged(ino) | |
91 | ||
92 | def test_scrub_backtrace(self): | |
93 | self._setup_subtrees() | |
94 | inos = self._find_path_inos('d1/d2/d3/') | |
95 | ||
96 | for ino in inos: | |
97 | file_obj_name = "{0:x}.00000000".format(ino) | |
98 | self.fs.radosm(["rmxattr", file_obj_name, "parent"]) | |
99 | ||
b3b6e05e | 100 | out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) |
f67539c2 TL |
101 | self.assertNotEqual(out_json, None) |
102 | self.assertEqual(out_json["return_code"], 0) | |
103 | self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) | |
104 | ||
105 | def _check_damage(mds_rank, inos): | |
106 | all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) | |
107 | damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"] | |
108 | return len(damage) >= len(inos) | |
109 | ||
110 | self.assertTrue(_check_damage(0, inos[0:2])) | |
111 | self.assertTrue(_check_damage(1, inos[2:4])) | |
112 | self.assertTrue(_check_damage(2, inos[4:6])) | |
113 | ||
114 | def test_scrub_non_mds0(self): | |
115 | self._setup_subtrees() | |
116 | ||
117 | def expect_exdev(cmd, mds): | |
118 | try: | |
119 | self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd) | |
120 | except CommandFailedError as e: | |
121 | if e.exitstatus == errno.EXDEV: | |
122 | pass | |
123 | else: | |
124 | raise | |
125 | else: | |
126 | raise RuntimeError("expected failure") | |
127 | ||
128 | rank1 = self.fs.get_rank(rank=1) | |
129 | expect_exdev(["scrub", "start", "/d1/d2/d3"], rank1["name"]) | |
130 | expect_exdev(["scrub", "abort"], rank1["name"]) | |
131 | expect_exdev(["scrub", "pause"], rank1["name"]) | |
132 | expect_exdev(["scrub", "resume"], rank1["name"]) | |
133 | ||
134 | def test_scrub_abort_mds0(self): | |
135 | self._setup_subtrees() | |
136 | ||
137 | inos = self._find_path_inos('d1/d2/d3/') | |
138 | ||
139 | for ino in inos: | |
140 | file_obj_name = "{0:x}.00000000".format(ino) | |
141 | self.fs.radosm(["rmxattr", file_obj_name, "parent"]) | |
142 | ||
b3b6e05e | 143 | out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) |
f67539c2 TL |
144 | self.assertNotEqual(out_json, None) |
145 | ||
146 | res = self.fs.run_scrub(["abort"]) | |
147 | self.assertEqual(res['return_code'], 0) | |
148 | ||
149 | # Abort and verify in both mdss. We also check the status in rank 0 mds because | |
150 | # it is supposed to gather the scrub status from other mdss. | |
151 | self._check_scrub_status() | |
152 | ||
153 | # sleep enough to fetch updated task status | |
154 | checked = self._check_task_status_na() | |
155 | self.assertTrue(checked) | |
156 | ||
157 | def test_scrub_pause_and_resume_mds0(self): | |
158 | self._setup_subtrees() | |
159 | ||
160 | inos = self._find_path_inos('d1/d2/d3/') | |
161 | ||
162 | for ino in inos: | |
163 | file_obj_name = "{0:x}.00000000".format(ino) | |
164 | self.fs.radosm(["rmxattr", file_obj_name, "parent"]) | |
165 | ||
b3b6e05e | 166 | out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) |
f67539c2 TL |
167 | self.assertNotEqual(out_json, None) |
168 | ||
169 | res = self.fs.run_scrub(["pause"]) | |
170 | self.assertEqual(res['return_code'], 0) | |
171 | ||
172 | self._check_scrub_status(result="PAUSED") | |
173 | ||
174 | checked = self._check_task_status("paused") | |
175 | self.assertTrue(checked) | |
176 | ||
177 | # resume and verify | |
178 | res = self.fs.run_scrub(["resume"]) | |
179 | self.assertEqual(res['return_code'], 0) | |
180 | ||
181 | self._check_scrub_status(result="PAUSED", reverse=True) | |
182 | ||
183 | checked = self._check_task_status_na() | |
184 | self.assertTrue(checked) | |
185 | ||
186 | def test_scrub_pause_and_resume_with_abort_mds0(self): | |
187 | self._setup_subtrees() | |
188 | ||
189 | inos = self._find_path_inos('d1/d2/d3/') | |
190 | ||
191 | for ino in inos: | |
192 | file_obj_name = "{0:x}.00000000".format(ino) | |
193 | self.fs.radosm(["rmxattr", file_obj_name, "parent"]) | |
194 | ||
b3b6e05e | 195 | out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) |
f67539c2 TL |
196 | self.assertNotEqual(out_json, None) |
197 | ||
198 | res = self.fs.run_scrub(["pause"]) | |
199 | self.assertEqual(res['return_code'], 0) | |
200 | ||
201 | self._check_scrub_status(result="PAUSED") | |
202 | ||
203 | checked = self._check_task_status("paused") | |
204 | self.assertTrue(checked) | |
205 | ||
206 | res = self.fs.run_scrub(["abort"]) | |
207 | self.assertEqual(res['return_code'], 0) | |
208 | ||
209 | self._check_scrub_status(result="PAUSED") | |
210 | self._check_scrub_status(result="0 inodes") | |
211 | ||
212 | # scrub status should still be paused... | |
213 | checked = self._check_task_status("paused") | |
214 | self.assertTrue(checked) | |
215 | ||
216 | # resume and verify | |
217 | res = self.fs.run_scrub(["resume"]) | |
218 | self.assertEqual(res['return_code'], 0) | |
219 | ||
220 | self._check_scrub_status(result="PAUSED", reverse=True) | |
221 | ||
222 | checked = self._check_task_status_na() | |
223 | self.assertTrue(checked) |