]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | import logging |
f6b5b4d7 | 2 | import random |
7c673cae FG |
3 | import time |
4 | from tasks.cephfs.fuse_mount import FuseMount | |
5 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
20effc67 | 6 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
7 | |
8 | log = logging.getLogger(__name__) | |
9 | ||
10 | class TestExports(CephFSTestCase): | |
31f18b77 | 11 | MDSS_REQUIRED = 2 |
28e407b8 | 12 | CLIENTS_REQUIRED = 2 |
31f18b77 | 13 | |
f6b5b4d7 TL |
14 | def test_session_race(self): |
15 | """ | |
16 | Test session creation race. | |
17 | ||
18 | See: https://tracker.ceph.com/issues/24072#change-113056 | |
19 | """ | |
20 | ||
7c673cae | 21 | self.fs.set_max_mds(2) |
f6b5b4d7 | 22 | status = self.fs.wait_for_daemons() |
7c673cae | 23 | |
f6b5b4d7 | 24 | rank1 = self.fs.get_rank(rank=1, status=status) |
7c673cae | 25 | |
f6b5b4d7 TL |
26 | # Create a directory that is pre-exported to rank 1 |
27 | self.mount_a.run_shell(["mkdir", "-p", "a/aa"]) | |
28 | self.mount_a.setfattr("a", "ceph.dir.pin", "1") | |
29 | self._wait_subtrees([('/a', 1)], status=status, rank=1) | |
7c673cae | 30 | |
f6b5b4d7 TL |
31 | # Now set the mds config to allow the race |
32 | self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1) | |
7c673cae | 33 | |
f6b5b4d7 TL |
34 | # Now create another directory and try to export it |
35 | self.mount_b.run_shell(["mkdir", "-p", "b/bb"]) | |
36 | self.mount_b.setfattr("b", "ceph.dir.pin", "1") | |
7c673cae | 37 | |
f6b5b4d7 | 38 | time.sleep(5) |
7c673cae | 39 | |
f6b5b4d7 TL |
40 | # Now turn off the race so that it doesn't wait again |
41 | self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1) | |
7c673cae | 42 | |
f6b5b4d7 TL |
43 | # Now try to create a session with rank 1 by accessing a dir known to |
44 | # be there, if buggy, this should cause the rank 1 to crash: | |
45 | self.mount_b.run_shell(["ls", "a"]) | |
7c673cae | 46 | |
f6b5b4d7 TL |
47 | # Check if rank1 changed (standby tookover?) |
48 | new_rank1 = self.fs.get_rank(rank=1) | |
49 | self.assertEqual(rank1['gid'], new_rank1['gid']) | |
7c673cae | 50 | |
f6b5b4d7 TL |
51 | class TestExportPin(CephFSTestCase): |
52 | MDSS_REQUIRED = 3 | |
53 | CLIENTS_REQUIRED = 1 | |
7c673cae | 54 | |
f6b5b4d7 TL |
55 | def setUp(self): |
56 | CephFSTestCase.setUp(self) | |
7c673cae | 57 | |
f6b5b4d7 TL |
58 | self.fs.set_max_mds(3) |
59 | self.status = self.fs.wait_for_daemons() | |
7c673cae | 60 | |
f6b5b4d7 | 61 | self.mount_a.run_shell_payload("mkdir -p 1/2/3/4") |
7c673cae | 62 | |
f6b5b4d7 TL |
63 | def test_noop(self): |
64 | self.mount_a.setfattr("1", "ceph.dir.pin", "-1") | |
65 | time.sleep(30) # for something to not happen | |
66 | self._wait_subtrees([], status=self.status) | |
11fdf7f2 | 67 | |
f6b5b4d7 TL |
68 | def test_negative(self): |
69 | self.mount_a.setfattr("1", "ceph.dir.pin", "-2341") | |
70 | time.sleep(30) # for something to not happen | |
71 | self._wait_subtrees([], status=self.status) | |
92f5a8d4 | 72 | |
f6b5b4d7 TL |
73 | def test_empty_pin(self): |
74 | self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1") | |
75 | time.sleep(30) # for something to not happen | |
76 | self._wait_subtrees([], status=self.status) | |
92f5a8d4 | 77 | |
f6b5b4d7 TL |
78 | def test_trivial(self): |
79 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
80 | self._wait_subtrees([('/1', 1)], status=self.status, rank=1) | |
92f5a8d4 | 81 | |
f6b5b4d7 | 82 | def test_export_targets(self): |
92f5a8d4 | 83 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") |
f6b5b4d7 TL |
84 | self._wait_subtrees([('/1', 1)], status=self.status, rank=1) |
85 | self.status = self.fs.status() | |
86 | r0 = self.status.get_rank(self.fs.id, 0) | |
87 | self.assertTrue(sorted(r0['export_targets']) == [1]) | |
92f5a8d4 | 88 | |
f6b5b4d7 TL |
89 | def test_redundant(self): |
90 | # redundant pin /1/2 to rank 1 | |
91 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
92 | self._wait_subtrees([('/1', 1)], status=self.status, rank=1) | |
92f5a8d4 | 93 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") |
f6b5b4d7 | 94 | self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1) |
92f5a8d4 | 95 | |
f6b5b4d7 TL |
96 | def test_reassignment(self): |
97 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") | |
98 | self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1) | |
92f5a8d4 | 99 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") |
f6b5b4d7 | 100 | self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0) |
92f5a8d4 | 101 | |
f6b5b4d7 TL |
102 | def test_phantom_rank(self): |
103 | self.mount_a.setfattr("1", "ceph.dir.pin", "0") | |
104 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "10") | |
105 | time.sleep(30) # wait for nothing weird to happen | |
106 | self._wait_subtrees([('/1', 0)], status=self.status) | |
107 | ||
108 | def test_nested(self): | |
109 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
110 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") | |
92f5a8d4 | 111 | self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2") |
f6b5b4d7 TL |
112 | self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2) |
113 | ||
114 | def test_nested_unset(self): | |
115 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
116 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "2") | |
117 | self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1) | |
118 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1") | |
119 | self._wait_subtrees([('/1', 1)], status=self.status, rank=1) | |
120 | ||
121 | def test_rename(self): | |
122 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
123 | self.mount_a.run_shell_payload("mkdir -p 9/8/7") | |
124 | self.mount_a.setfattr("9/8", "ceph.dir.pin", "0") | |
125 | self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0) | |
126 | self.mount_a.run_shell_payload("mv 9/8 1/2") | |
127 | self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0) | |
92f5a8d4 | 128 | |
f6b5b4d7 TL |
129 | def test_getfattr(self): |
130 | # pin /1 to rank 0 | |
131 | self.mount_a.setfattr("1", "ceph.dir.pin", "1") | |
132 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") | |
133 | self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1) | |
92f5a8d4 TL |
134 | |
135 | if not isinstance(self.mount_a, FuseMount): | |
e306af50 | 136 | p = self.mount_a.client_remote.sh('uname -r', wait=True) |
92f5a8d4 TL |
137 | dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin") |
138 | log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin) | |
e306af50 | 139 | if str(p) < "5" and not(dir_pin): |
9f95a23c | 140 | self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin") |
e306af50 TL |
141 | self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1') |
142 | self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0') | |
28e407b8 | 143 | |
f6b5b4d7 TL |
144 | def test_export_pin_cache_drop(self): |
145 | """ | |
146 | That the export pin does not prevent empty (nothing in cache) subtree merging. | |
28e407b8 | 147 | """ |
28e407b8 | 148 | |
f6b5b4d7 TL |
149 | self.mount_a.setfattr("1", "ceph.dir.pin", "0") |
150 | self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") | |
151 | self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status) | |
152 | self.mount_a.umount_wait() # release all caps | |
153 | def _drop(): | |
154 | self.fs.ranks_tell(["cache", "drop"], status=self.status) | |
155 | # drop cache multiple times to clear replica pins | |
156 | self._wait_subtrees([], status=self.status, action=_drop) | |
157 | ||
1e59de90 TL |
158 | def test_open_file(self): |
159 | """ | |
160 | Test opening a file via a hard link that is not in the same mds as the inode. | |
161 | ||
162 | See https://tracker.ceph.com/issues/58411 | |
163 | """ | |
164 | ||
165 | self.mount_a.run_shell_payload("mkdir -p target link") | |
166 | self.mount_a.touch("target/test.txt") | |
167 | self.mount_a.run_shell_payload("ln target/test.txt link/test.txt") | |
168 | self.mount_a.setfattr("target", "ceph.dir.pin", "0") | |
169 | self.mount_a.setfattr("link", "ceph.dir.pin", "1") | |
170 | self._wait_subtrees([("/target", 0), ("/link", 1)], status=self.status) | |
171 | ||
172 | # Release client cache, otherwise the bug may not be triggered even if buggy. | |
173 | self.mount_a.remount() | |
174 | ||
175 | # Open the file with access mode(O_CREAT|O_WRONLY|O_TRUNC), | |
176 | # this should cause the rank 1 to crash if buggy. | |
177 | # It's OK to use 'truncate -s 0 link/test.txt' here, | |
178 | # its access mode is (O_CREAT|O_WRONLY), it can also trigger this bug. | |
179 | log.info("test open mode (O_CREAT|O_WRONLY|O_TRUNC)") | |
180 | proc = self.mount_a.open_for_writing("link/test.txt") | |
181 | time.sleep(1) | |
182 | success = proc.finished and self.fs.rank_is_running(rank=1) | |
183 | ||
184 | # Test other write modes too. | |
185 | if success: | |
186 | self.mount_a.remount() | |
187 | log.info("test open mode (O_WRONLY|O_TRUNC)") | |
188 | proc = self.mount_a.open_for_writing("link/test.txt", creat=False) | |
189 | time.sleep(1) | |
190 | success = proc.finished and self.fs.rank_is_running(rank=1) | |
191 | if success: | |
192 | self.mount_a.remount() | |
193 | log.info("test open mode (O_CREAT|O_WRONLY)") | |
194 | proc = self.mount_a.open_for_writing("link/test.txt", trunc=False) | |
195 | time.sleep(1) | |
196 | success = proc.finished and self.fs.rank_is_running(rank=1) | |
197 | ||
198 | # Test open modes too. | |
199 | if success: | |
200 | self.mount_a.remount() | |
201 | log.info("test open mode (O_RDONLY)") | |
202 | proc = self.mount_a.open_for_reading("link/test.txt") | |
203 | time.sleep(1) | |
204 | success = proc.finished and self.fs.rank_is_running(rank=1) | |
205 | ||
206 | if success: | |
207 | # All tests done, rank 1 didn't crash. | |
208 | return | |
209 | ||
210 | if not proc.finished: | |
211 | log.warning("open operation is blocked, kill it") | |
212 | proc.kill() | |
213 | ||
214 | if not self.fs.rank_is_running(rank=1): | |
215 | log.warning("rank 1 crashed") | |
216 | ||
217 | self.mount_a.umount_wait(force=True) | |
218 | ||
219 | self.assertTrue(success, "open operation failed") | |
220 | ||
f6b5b4d7 TL |
221 | class TestEphemeralPins(CephFSTestCase): |
222 | MDSS_REQUIRED = 3 | |
223 | CLIENTS_REQUIRED = 1 | |
224 | ||
225 | def setUp(self): | |
226 | CephFSTestCase.setUp(self) | |
227 | ||
228 | self.config_set('mds', 'mds_export_ephemeral_random', True) | |
229 | self.config_set('mds', 'mds_export_ephemeral_distributed', True) | |
230 | self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0) | |
231 | ||
232 | self.mount_a.run_shell_payload(""" | |
233 | set -e | |
234 | ||
235 | # Use up a random number of inode numbers so the ephemeral pinning is not the same every test. | |
236 | mkdir .inode_number_thrash | |
237 | count=$((RANDOM % 1024)) | |
238 | for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done | |
239 | rm -rf .inode_number_thrash | |
240 | """) | |
241 | ||
242 | self.fs.set_max_mds(3) | |
243 | self.status = self.fs.wait_for_daemons() | |
244 | ||
245 | def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True): | |
246 | return self.mount_a.run_shell_payload(f""" | |
b3b6e05e | 247 | set -ex |
f6b5b4d7 TL |
248 | mkdir -p {path} |
249 | {f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""} | |
250 | {f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""} | |
251 | {f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""} | |
252 | for ((i = 0; i < {count}; i++)); do | |
253 | mkdir -p "{path}/$i" | |
254 | echo file > "{path}/$i/file" | |
255 | done | |
256 | """, wait=wait) | |
257 | ||
258 | def test_ephemeral_pin_dist_override(self): | |
259 | """ | |
260 | That an ephemeral distributed pin overrides a normal export pin. | |
28e407b8 AA |
261 | """ |
262 | ||
f6b5b4d7 | 263 | self._setup_tree(distributed=True) |
f67539c2 | 264 | subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") |
f6b5b4d7 TL |
265 | for s in subtrees: |
266 | path = s['dir']['path'] | |
267 | if path == '/tree': | |
f6b5b4d7 TL |
268 | self.assertTrue(s['distributed_ephemeral_pin']) |
269 | ||
270 | def test_ephemeral_pin_dist_override_pin(self): | |
271 | """ | |
272 | That an export pin overrides an ephemerally pinned directory. | |
273 | """ | |
28e407b8 | 274 | |
f67539c2 TL |
275 | self._setup_tree(distributed=True) |
276 | subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") | |
277 | self.mount_a.setfattr("tree", "ceph.dir.pin", "0") | |
f6b5b4d7 TL |
278 | time.sleep(15) |
279 | subtrees = self._get_subtrees(status=self.status, rank=0) | |
280 | for s in subtrees: | |
281 | path = s['dir']['path'] | |
f67539c2 | 282 | if path == '/tree': |
f6b5b4d7 TL |
283 | self.assertEqual(s['auth_first'], 0) |
284 | self.assertFalse(s['distributed_ephemeral_pin']) | |
f6b5b4d7 TL |
285 | # it has been merged into /tree |
286 | ||
287 | def test_ephemeral_pin_dist_off(self): | |
288 | """ | |
289 | That turning off ephemeral distributed pin merges subtrees. | |
290 | """ | |
28e407b8 | 291 | |
f67539c2 TL |
292 | self._setup_tree(distributed=True) |
293 | self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") | |
f6b5b4d7 | 294 | self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0") |
f67539c2 TL |
295 | time.sleep(15) |
296 | subtrees = self._get_subtrees(status=self.status, rank=0) | |
297 | for s in subtrees: | |
298 | path = s['dir']['path'] | |
299 | if path == '/tree': | |
300 | self.assertFalse(s['distributed_ephemeral_pin']) | |
301 | ||
28e407b8 | 302 | |
f6b5b4d7 TL |
303 | def test_ephemeral_pin_dist_conf_off(self): |
304 | """ | |
305 | That turning off ephemeral distributed pin config prevents distribution. | |
306 | """ | |
28e407b8 | 307 | |
f67539c2 | 308 | self._setup_tree() |
f6b5b4d7 TL |
309 | self.config_set('mds', 'mds_export_ephemeral_distributed', False) |
310 | self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") | |
f67539c2 TL |
311 | time.sleep(15) |
312 | subtrees = self._get_subtrees(status=self.status, rank=0) | |
313 | for s in subtrees: | |
314 | path = s['dir']['path'] | |
315 | if path == '/tree': | |
316 | self.assertFalse(s['distributed_ephemeral_pin']) | |
28e407b8 | 317 | |
f67539c2 | 318 | def _test_ephemeral_pin_dist_conf_off_merge(self): |
f6b5b4d7 TL |
319 | """ |
320 | That turning off ephemeral distributed pin config merges subtrees. | |
f67539c2 | 321 | FIXME: who triggers the merge? |
f6b5b4d7 | 322 | """ |
28e407b8 | 323 | |
f67539c2 TL |
324 | self._setup_tree(distributed=True) |
325 | self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") | |
f6b5b4d7 TL |
326 | self.config_set('mds', 'mds_export_ephemeral_distributed', False) |
327 | self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status) | |
28e407b8 | 328 | |
f6b5b4d7 TL |
329 | def test_ephemeral_pin_dist_override_before(self): |
330 | """ | |
331 | That a conventional export pin overrides the distributed policy _before_ distributed policy is set. | |
332 | """ | |
28e407b8 | 333 | |
f6b5b4d7 TL |
334 | count = 10 |
335 | self._setup_tree(count=count) | |
336 | test = [] | |
337 | for i in range(count): | |
338 | path = f"tree/{i}" | |
339 | self.mount_a.setfattr(path, "ceph.dir.pin", "1") | |
340 | test.append(("/"+path, 1)) | |
341 | self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") | |
f67539c2 | 342 | time.sleep(15) # for something to not happen... |
f6b5b4d7 TL |
343 | self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") |
344 | ||
345 | def test_ephemeral_pin_dist_override_after(self): | |
346 | """ | |
347 | That a conventional export pin overrides the distributed policy _after_ distributed policy is set. | |
348 | """ | |
349 | ||
f67539c2 TL |
350 | self._setup_tree(distributed=True) |
351 | self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") | |
f6b5b4d7 | 352 | test = [] |
f67539c2 TL |
353 | for i in range(10): |
354 | path = f"tree/{i}" | |
355 | self.mount_a.setfattr(path, "ceph.dir.pin", "1") | |
356 | test.append(("/"+path, 1)) | |
357 | self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") | |
f6b5b4d7 TL |
358 | |
359 | def test_ephemeral_pin_dist_failover(self): | |
360 | """ | |
361 | That MDS failover does not cause unnecessary migrations. | |
362 | """ | |
363 | ||
364 | # pin /tree so it does not export during failover | |
f67539c2 TL |
365 | self._setup_tree(distributed=True) |
366 | self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") | |
f6b5b4d7 TL |
367 | #test = [(s['dir']['path'], s['auth_first']) for s in subtrees] |
368 | before = self.fs.ranks_perf(lambda p: p['mds']['exported']) | |
369 | log.info(f"export stats: {before}") | |
370 | self.fs.rank_fail(rank=1) | |
371 | self.status = self.fs.wait_for_daemons() | |
372 | time.sleep(10) # waiting for something to not happen | |
373 | after = self.fs.ranks_perf(lambda p: p['mds']['exported']) | |
374 | log.info(f"export stats: {after}") | |
375 | self.assertEqual(before, after) | |
376 | ||
377 | def test_ephemeral_pin_distribution(self): | |
378 | """ | |
379 | That ephemerally pinned subtrees are somewhat evenly distributed. | |
380 | """ | |
381 | ||
b3b6e05e TL |
382 | max_mds = 3 |
383 | frags = 128 | |
384 | ||
385 | self.fs.set_max_mds(max_mds) | |
f6b5b4d7 TL |
386 | self.status = self.fs.wait_for_daemons() |
387 | ||
b3b6e05e | 388 | self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds) |
f67539c2 TL |
389 | self._setup_tree(count=1000, distributed=True) |
390 | ||
b3b6e05e | 391 | subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all") |
f6b5b4d7 TL |
392 | nsubtrees = len(subtrees) |
393 | ||
394 | # Check if distribution is uniform | |
395 | rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees)) | |
396 | rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees)) | |
397 | rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees)) | |
b3b6e05e TL |
398 | self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15) |
399 | self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15) | |
400 | self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15) | |
f6b5b4d7 | 401 | |
f67539c2 | 402 | |
f6b5b4d7 TL |
403 | def test_ephemeral_random(self): |
404 | """ | |
405 | That 100% randomness causes all children to be pinned. | |
406 | """ | |
407 | self._setup_tree(random=1.0) | |
408 | self._wait_random_subtrees(100, status=self.status, rank="all") | |
409 | ||
410 | def test_ephemeral_random_max(self): | |
411 | """ | |
412 | That the config mds_export_ephemeral_random_max is not exceeded. | |
413 | """ | |
414 | ||
415 | r = 0.5 | |
416 | count = 1000 | |
417 | self._setup_tree(count=count, random=r) | |
418 | subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all") | |
419 | self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) | |
420 | self._setup_tree(path="tree/new", count=count) | |
421 | time.sleep(30) # for something not to happen... | |
422 | subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/") | |
423 | self.assertLessEqual(len(subtrees), int(.01*count*1.25)) | |
424 | ||
425 | def test_ephemeral_random_max_config(self): | |
426 | """ | |
427 | That the config mds_export_ephemeral_random_max config rejects new OOB policies. | |
428 | """ | |
429 | ||
430 | self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) | |
431 | try: | |
432 | p = self._setup_tree(count=1, random=0.02, wait=False) | |
433 | p.wait() | |
434 | except CommandFailedError as e: | |
435 | log.info(f"{e}") | |
436 | self.assertIn("Invalid", p.stderr.getvalue()) | |
437 | else: | |
438 | raise RuntimeError("mds_export_ephemeral_random_max ignored!") | |
439 | ||
440 | def test_ephemeral_random_dist(self): | |
441 | """ | |
f67539c2 | 442 | That ephemeral distributed pin overrides ephemeral random pin |
f6b5b4d7 TL |
443 | """ |
444 | ||
f67539c2 TL |
445 | self._setup_tree(random=1.0, distributed=True) |
446 | self._wait_distributed_subtrees(3 * 2, status=self.status) | |
447 | ||
448 | time.sleep(15) | |
449 | subtrees = self._get_subtrees(status=self.status, rank=0) | |
450 | for s in subtrees: | |
451 | path = s['dir']['path'] | |
452 | if path.startswith('/tree'): | |
453 | self.assertFalse(s['random_ephemeral_pin']) | |
f6b5b4d7 TL |
454 | |
455 | def test_ephemeral_random_pin_override_before(self): | |
456 | """ | |
457 | That a conventional export pin overrides the random policy before creating new directories. | |
458 | """ | |
459 | ||
460 | self._setup_tree(count=0, random=1.0) | |
461 | self._setup_tree(path="tree/pin", count=10, export=1) | |
462 | self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") | |
463 | ||
464 | def test_ephemeral_random_pin_override_after(self): | |
465 | """ | |
466 | That a conventional export pin overrides the random policy after creating new directories. | |
467 | """ | |
468 | ||
469 | count = 10 | |
470 | self._setup_tree(count=0, random=1.0) | |
471 | self._setup_tree(path="tree/pin", count=count) | |
472 | self._wait_random_subtrees(count+1, status=self.status, rank="all") | |
473 | self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1") | |
474 | self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") | |
475 | ||
476 | def test_ephemeral_randomness(self): | |
477 | """ | |
478 | That the randomness is reasonable. | |
479 | """ | |
480 | ||
481 | r = random.uniform(0.25, 0.75) # ratios don't work for small r! | |
482 | count = 1000 | |
483 | self._setup_tree(count=count, random=r) | |
484 | subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") | |
485 | time.sleep(30) # for max to not be exceeded | |
486 | subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") | |
487 | self.assertLessEqual(len(subtrees), int(r*count*1.50)) | |
488 | ||
489 | def test_ephemeral_random_cache_drop(self): | |
490 | """ | |
491 | That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging. | |
492 | """ | |
493 | ||
494 | count = 100 | |
495 | self._setup_tree(count=count, random=1.0) | |
496 | self._wait_random_subtrees(count, status=self.status, rank="all") | |
497 | self.mount_a.umount_wait() # release all caps | |
498 | def _drop(): | |
499 | self.fs.ranks_tell(["cache", "drop"], status=self.status) | |
500 | self._wait_subtrees([], status=self.status, action=_drop) | |
501 | ||
502 | def test_ephemeral_random_failover(self): | |
503 | """ | |
504 | That the random ephemeral pins stay pinned across MDS failover. | |
505 | """ | |
506 | ||
507 | count = 100 | |
508 | r = 0.5 | |
f67539c2 | 509 | self._setup_tree(count=count, random=r) |
f6b5b4d7 TL |
510 | # wait for all random subtrees to be created, not a specific count |
511 | time.sleep(30) | |
512 | subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) | |
f67539c2 TL |
513 | before = [(s['dir']['path'], s['auth_first']) for s in subtrees] |
514 | before.sort(); | |
515 | ||
f6b5b4d7 TL |
516 | self.fs.rank_fail(rank=1) |
517 | self.status = self.fs.wait_for_daemons() | |
f67539c2 | 518 | |
f6b5b4d7 | 519 | time.sleep(30) # waiting for something to not happen |
f67539c2 TL |
520 | subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) |
521 | after = [(s['dir']['path'], s['auth_first']) for s in subtrees] | |
522 | after.sort(); | |
523 | log.info(f"subtrees before: {before}") | |
524 | log.info(f"subtrees after: {after}") | |
525 | ||
f6b5b4d7 TL |
526 | self.assertEqual(before, after) |
527 | ||
528 | def test_ephemeral_pin_grow_mds(self): | |
529 | """ | |
530 | That consistent hashing works to reduce the number of migrations. | |
531 | """ | |
532 | ||
533 | self.fs.set_max_mds(2) | |
534 | self.status = self.fs.wait_for_daemons() | |
535 | ||
f67539c2 TL |
536 | self._setup_tree(random=1.0) |
537 | subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") | |
f6b5b4d7 TL |
538 | |
539 | self.fs.set_max_mds(3) | |
540 | self.status = self.fs.wait_for_daemons() | |
541 | ||
542 | # Sleeping for a while to allow the ephemeral pin migrations to complete | |
543 | time.sleep(30) | |
544 | ||
f67539c2 | 545 | subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") |
f6b5b4d7 TL |
546 | count = 0 |
547 | for old_subtree in subtrees_old: | |
548 | for new_subtree in subtrees_new: | |
549 | if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): | |
550 | count = count + 1 | |
551 | break | |
552 | ||
553 | log.info("{0} migrations have occured due to the cluster resizing".format(count)) | |
554 | # ~50% of subtrees from the two rank will migrate to another rank | |
555 | self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget | |
556 | ||
557 | def test_ephemeral_pin_shrink_mds(self): | |
558 | """ | |
559 | That consistent hashing works to reduce the number of migrations. | |
560 | """ | |
561 | ||
562 | self.fs.set_max_mds(3) | |
563 | self.status = self.fs.wait_for_daemons() | |
564 | ||
f67539c2 TL |
565 | self._setup_tree(random=1.0) |
566 | subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") | |
f6b5b4d7 TL |
567 | |
568 | self.fs.set_max_mds(2) | |
569 | self.status = self.fs.wait_for_daemons() | |
570 | time.sleep(30) | |
571 | ||
f67539c2 | 572 | subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") |
f6b5b4d7 TL |
573 | count = 0 |
574 | for old_subtree in subtrees_old: | |
575 | for new_subtree in subtrees_new: | |
576 | if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): | |
577 | count = count + 1 | |
578 | break | |
579 | ||
580 | log.info("{0} migrations have occured due to the cluster resizing".format(count)) | |
581 | # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2 | |
582 | self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget |