]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_exports.py
4 from tasks
.cephfs
.fuse_mount
import FuseMount
5 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
6 from teuthology
.exceptions
import CommandFailedError
8 log
= logging
.getLogger(__name__
)
10 class TestExports(CephFSTestCase
):
14 def test_session_race(self
):
16 Test session creation race.
18 See: https://tracker.ceph.com/issues/24072#change-113056
21 self
.fs
.set_max_mds(2)
22 status
= self
.fs
.wait_for_daemons()
24 rank1
= self
.fs
.get_rank(rank
=1, status
=status
)
26 # Create a directory that is pre-exported to rank 1
27 self
.mount_a
.run_shell(["mkdir", "-p", "a/aa"])
28 self
.mount_a
.setfattr("a", "ceph.dir.pin", "1")
29 self
._wait
_subtrees
([('/a', 1)], status
=status
, rank
=1)
31 # Now set the mds config to allow the race
32 self
.fs
.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank
=1)
34 # Now create another directory and try to export it
35 self
.mount_b
.run_shell(["mkdir", "-p", "b/bb"])
36 self
.mount_b
.setfattr("b", "ceph.dir.pin", "1")
40 # Now turn off the race so that it doesn't wait again
41 self
.fs
.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank
=1)
43 # Now try to create a session with rank 1 by accessing a dir known to
44 # be there, if buggy, this should cause the rank 1 to crash:
45 self
.mount_b
.run_shell(["ls", "a"])
47 # Check if rank1 changed (standby tookover?)
48 new_rank1
= self
.fs
.get_rank(rank
=1)
49 self
.assertEqual(rank1
['gid'], new_rank1
['gid'])
51 class TestExportPin(CephFSTestCase
):
56 CephFSTestCase
.setUp(self
)
58 self
.fs
.set_max_mds(3)
59 self
.status
= self
.fs
.wait_for_daemons()
61 self
.mount_a
.run_shell_payload("mkdir -p 1/2/3/4")
64 self
.mount_a
.setfattr("1", "ceph.dir.pin", "-1")
65 time
.sleep(30) # for something to not happen
66 self
._wait
_subtrees
([], status
=self
.status
)
68 def test_negative(self
):
69 self
.mount_a
.setfattr("1", "ceph.dir.pin", "-2341")
70 time
.sleep(30) # for something to not happen
71 self
._wait
_subtrees
([], status
=self
.status
)
73 def test_empty_pin(self
):
74 self
.mount_a
.setfattr("1/2/3/4", "ceph.dir.pin", "1")
75 time
.sleep(30) # for something to not happen
76 self
._wait
_subtrees
([], status
=self
.status
)
78 def test_trivial(self
):
79 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
80 self
._wait
_subtrees
([('/1', 1)], status
=self
.status
, rank
=1)
82 def test_export_targets(self
):
83 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
84 self
._wait
_subtrees
([('/1', 1)], status
=self
.status
, rank
=1)
85 self
.status
= self
.fs
.status()
86 r0
= self
.status
.get_rank(self
.fs
.id, 0)
87 self
.assertTrue(sorted(r0
['export_targets']) == [1])
89 def test_redundant(self
):
90 # redundant pin /1/2 to rank 1
91 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
92 self
._wait
_subtrees
([('/1', 1)], status
=self
.status
, rank
=1)
93 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "1")
94 self
._wait
_subtrees
([('/1', 1), ('/1/2', 1)], status
=self
.status
, rank
=1)
96 def test_reassignment(self
):
97 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "1")
98 self
._wait
_subtrees
([('/1/2', 1)], status
=self
.status
, rank
=1)
99 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "0")
100 self
._wait
_subtrees
([('/1/2', 0)], status
=self
.status
, rank
=0)
102 def test_phantom_rank(self
):
103 self
.mount_a
.setfattr("1", "ceph.dir.pin", "0")
104 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "10")
105 time
.sleep(30) # wait for nothing weird to happen
106 self
._wait
_subtrees
([('/1', 0)], status
=self
.status
)
108 def test_nested(self
):
109 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
110 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "0")
111 self
.mount_a
.setfattr("1/2/3", "ceph.dir.pin", "2")
112 self
._wait
_subtrees
([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status
=self
.status
, rank
=2)
114 def test_nested_unset(self
):
115 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
116 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "2")
117 self
._wait
_subtrees
([('/1', 1), ('/1/2', 2)], status
=self
.status
, rank
=1)
118 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "-1")
119 self
._wait
_subtrees
([('/1', 1)], status
=self
.status
, rank
=1)
121 def test_rename(self
):
122 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
123 self
.mount_a
.run_shell_payload("mkdir -p 9/8/7")
124 self
.mount_a
.setfattr("9/8", "ceph.dir.pin", "0")
125 self
._wait
_subtrees
([('/1', 1), ("/9/8", 0)], status
=self
.status
, rank
=0)
126 self
.mount_a
.run_shell_payload("mv 9/8 1/2")
127 self
._wait
_subtrees
([('/1', 1), ("/1/2/8", 0)], status
=self
.status
, rank
=0)
129 def test_getfattr(self
):
131 self
.mount_a
.setfattr("1", "ceph.dir.pin", "1")
132 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "0")
133 self
._wait
_subtrees
([('/1', 1), ('/1/2', 0)], status
=self
.status
, rank
=1)
135 if not isinstance(self
.mount_a
, FuseMount
):
136 p
= self
.mount_a
.client_remote
.sh('uname -r', wait
=True)
137 dir_pin
= self
.mount_a
.getfattr("1", "ceph.dir.pin")
138 log
.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin
)
139 if str(p
) < "5" and not(dir_pin
):
140 self
.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin")
141 self
.assertEqual(self
.mount_a
.getfattr("1", "ceph.dir.pin"), '1')
142 self
.assertEqual(self
.mount_a
.getfattr("1/2", "ceph.dir.pin"), '0')
144 def test_export_pin_cache_drop(self
):
146 That the export pin does not prevent empty (nothing in cache) subtree merging.
149 self
.mount_a
.setfattr("1", "ceph.dir.pin", "0")
150 self
.mount_a
.setfattr("1/2", "ceph.dir.pin", "1")
151 self
._wait
_subtrees
([('/1', 0), ('/1/2', 1)], status
=self
.status
)
152 self
.mount_a
.umount_wait() # release all caps
154 self
.fs
.ranks_tell(["cache", "drop"], status
=self
.status
)
155 # drop cache multiple times to clear replica pins
156 self
._wait
_subtrees
([], status
=self
.status
, action
=_drop
)
158 def test_open_file(self
):
160 Test opening a file via a hard link that is not in the same mds as the inode.
162 See https://tracker.ceph.com/issues/58411
165 self
.mount_a
.run_shell_payload("mkdir -p target link")
166 self
.mount_a
.touch("target/test.txt")
167 self
.mount_a
.run_shell_payload("ln target/test.txt link/test.txt")
168 self
.mount_a
.setfattr("target", "ceph.dir.pin", "0")
169 self
.mount_a
.setfattr("link", "ceph.dir.pin", "1")
170 self
._wait
_subtrees
([("/target", 0), ("/link", 1)], status
=self
.status
)
172 # Release client cache, otherwise the bug may not be triggered even if buggy.
173 self
.mount_a
.remount()
175 # Open the file with access mode(O_CREAT|O_WRONLY|O_TRUNC),
176 # this should cause the rank 1 to crash if buggy.
177 # It's OK to use 'truncate -s 0 link/test.txt' here,
178 # its access mode is (O_CREAT|O_WRONLY), it can also trigger this bug.
179 log
.info("test open mode (O_CREAT|O_WRONLY|O_TRUNC)")
180 proc
= self
.mount_a
.open_for_writing("link/test.txt")
182 success
= proc
.finished
and self
.fs
.rank_is_running(rank
=1)
184 # Test other write modes too.
186 self
.mount_a
.remount()
187 log
.info("test open mode (O_WRONLY|O_TRUNC)")
188 proc
= self
.mount_a
.open_for_writing("link/test.txt", creat
=False)
190 success
= proc
.finished
and self
.fs
.rank_is_running(rank
=1)
192 self
.mount_a
.remount()
193 log
.info("test open mode (O_CREAT|O_WRONLY)")
194 proc
= self
.mount_a
.open_for_writing("link/test.txt", trunc
=False)
196 success
= proc
.finished
and self
.fs
.rank_is_running(rank
=1)
198 # Test open modes too.
200 self
.mount_a
.remount()
201 log
.info("test open mode (O_RDONLY)")
202 proc
= self
.mount_a
.open_for_reading("link/test.txt")
204 success
= proc
.finished
and self
.fs
.rank_is_running(rank
=1)
207 # All tests done, rank 1 didn't crash.
210 if not proc
.finished
:
211 log
.warning("open operation is blocked, kill it")
214 if not self
.fs
.rank_is_running(rank
=1):
215 log
.warning("rank 1 crashed")
217 self
.mount_a
.umount_wait(force
=True)
219 self
.assertTrue(success
, "open operation failed")
221 class TestEphemeralPins(CephFSTestCase
):
226 CephFSTestCase
.setUp(self
)
228 self
.config_set('mds', 'mds_export_ephemeral_random', True)
229 self
.config_set('mds', 'mds_export_ephemeral_distributed', True)
230 self
.config_set('mds', 'mds_export_ephemeral_random_max', 1.0)
232 self
.mount_a
.run_shell_payload("""
235 # Use up a random number of inode numbers so the ephemeral pinning is not the same every test.
236 mkdir .inode_number_thrash
237 count=$((RANDOM % 1024))
238 for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done
239 rm -rf .inode_number_thrash
242 self
.fs
.set_max_mds(3)
243 self
.status
= self
.fs
.wait_for_daemons()
245 def _setup_tree(self
, path
="tree", export
=-1, distributed
=False, random
=0.0, count
=100, wait
=True):
246 return self
.mount_a
.run_shell_payload(f
"""
249 {f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""}
250 {f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""}
251 {f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""}
252 for ((i = 0; i < {count}; i++)); do
254 echo file > "{path}/$i/file"
258 def test_ephemeral_pin_dist_override(self
):
260 That an ephemeral distributed pin overrides a normal export pin.
263 self
._setup
_tree
(distributed
=True)
264 subtrees
= self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
266 path
= s
['dir']['path']
268 self
.assertTrue(s
['distributed_ephemeral_pin'])
270 def test_ephemeral_pin_dist_override_pin(self
):
272 That an export pin overrides an ephemerally pinned directory.
275 self
._setup
_tree
(distributed
=True)
276 subtrees
= self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
277 self
.mount_a
.setfattr("tree", "ceph.dir.pin", "0")
279 subtrees
= self
._get
_subtrees
(status
=self
.status
, rank
=0)
281 path
= s
['dir']['path']
283 self
.assertEqual(s
['auth_first'], 0)
284 self
.assertFalse(s
['distributed_ephemeral_pin'])
285 # it has been merged into /tree
287 def test_ephemeral_pin_dist_off(self
):
289 That turning off ephemeral distributed pin merges subtrees.
292 self
._setup
_tree
(distributed
=True)
293 self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
294 self
.mount_a
.setfattr("tree", "ceph.dir.pin.distributed", "0")
296 subtrees
= self
._get
_subtrees
(status
=self
.status
, rank
=0)
298 path
= s
['dir']['path']
300 self
.assertFalse(s
['distributed_ephemeral_pin'])
303 def test_ephemeral_pin_dist_conf_off(self
):
305 That turning off ephemeral distributed pin config prevents distribution.
309 self
.config_set('mds', 'mds_export_ephemeral_distributed', False)
310 self
.mount_a
.setfattr("tree", "ceph.dir.pin.distributed", "1")
312 subtrees
= self
._get
_subtrees
(status
=self
.status
, rank
=0)
314 path
= s
['dir']['path']
316 self
.assertFalse(s
['distributed_ephemeral_pin'])
318 def _test_ephemeral_pin_dist_conf_off_merge(self
):
320 That turning off ephemeral distributed pin config merges subtrees.
321 FIXME: who triggers the merge?
324 self
._setup
_tree
(distributed
=True)
325 self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
326 self
.config_set('mds', 'mds_export_ephemeral_distributed', False)
327 self
._wait
_subtrees
([('/tree', 0)], timeout
=60, status
=self
.status
)
329 def test_ephemeral_pin_dist_override_before(self
):
331 That a conventional export pin overrides the distributed policy _before_ distributed policy is set.
335 self
._setup
_tree
(count
=count
)
337 for i
in range(count
):
339 self
.mount_a
.setfattr(path
, "ceph.dir.pin", "1")
340 test
.append(("/"+path
, 1))
341 self
.mount_a
.setfattr("tree", "ceph.dir.pin.distributed", "1")
342 time
.sleep(15) # for something to not happen...
343 self
._wait
_subtrees
(test
, timeout
=60, status
=self
.status
, rank
="all", path
="/tree/")
345 def test_ephemeral_pin_dist_override_after(self
):
347 That a conventional export pin overrides the distributed policy _after_ distributed policy is set.
350 self
._setup
_tree
(distributed
=True)
351 self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
355 self
.mount_a
.setfattr(path
, "ceph.dir.pin", "1")
356 test
.append(("/"+path
, 1))
357 self
._wait
_subtrees
(test
, timeout
=60, status
=self
.status
, rank
="all", path
="/tree/")
359 def test_ephemeral_pin_dist_failover(self
):
361 That MDS failover does not cause unnecessary migrations.
364 # pin /tree so it does not export during failover
365 self
._setup
_tree
(distributed
=True)
366 self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
, rank
="all")
367 #test = [(s['dir']['path'], s['auth_first']) for s in subtrees]
368 before
= self
.fs
.ranks_perf(lambda p
: p
['mds']['exported'])
369 log
.info(f
"export stats: {before}")
370 self
.fs
.rank_fail(rank
=1)
371 self
.status
= self
.fs
.wait_for_daemons()
372 time
.sleep(10) # waiting for something to not happen
373 after
= self
.fs
.ranks_perf(lambda p
: p
['mds']['exported'])
374 log
.info(f
"export stats: {after}")
375 self
.assertEqual(before
, after
)
377 def test_ephemeral_pin_distribution(self
):
379 That ephemerally pinned subtrees are somewhat evenly distributed.
385 self
.fs
.set_max_mds(max_mds
)
386 self
.status
= self
.fs
.wait_for_daemons()
388 self
.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags
-1) / max_mds
)
389 self
._setup
_tree
(count
=1000, distributed
=True)
391 subtrees
= self
._wait
_distributed
_subtrees
(frags
, status
=self
.status
, rank
="all")
392 nsubtrees
= len(subtrees
)
394 # Check if distribution is uniform
395 rank0
= list(filter(lambda x
: x
['auth_first'] == 0, subtrees
))
396 rank1
= list(filter(lambda x
: x
['auth_first'] == 1, subtrees
))
397 rank2
= list(filter(lambda x
: x
['auth_first'] == 2, subtrees
))
398 self
.assertGreaterEqual(len(rank0
)/nsubtrees
, 0.15)
399 self
.assertGreaterEqual(len(rank1
)/nsubtrees
, 0.15)
400 self
.assertGreaterEqual(len(rank2
)/nsubtrees
, 0.15)
403 def test_ephemeral_random(self
):
405 That 100% randomness causes all children to be pinned.
407 self
._setup
_tree
(random
=1.0)
408 self
._wait
_random
_subtrees
(100, status
=self
.status
, rank
="all")
410 def test_ephemeral_random_max(self
):
412 That the config mds_export_ephemeral_random_max is not exceeded.
417 self
._setup
_tree
(count
=count
, random
=r
)
418 subtrees
= self
._wait
_random
_subtrees
(int(r
*count
*.75), status
=self
.status
, rank
="all")
419 self
.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
420 self
._setup
_tree
(path
="tree/new", count
=count
)
421 time
.sleep(30) # for something not to happen...
422 subtrees
= self
._get
_subtrees
(status
=self
.status
, rank
="all", path
="tree/new/")
423 self
.assertLessEqual(len(subtrees
), int(.01*count
*1.25))
425 def test_ephemeral_random_max_config(self
):
427 That the config mds_export_ephemeral_random_max config rejects new OOB policies.
430 self
.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
432 p
= self
._setup
_tree
(count
=1, random
=0.02, wait
=False)
434 except CommandFailedError
as e
:
436 self
.assertIn("Invalid", p
.stderr
.getvalue())
438 raise RuntimeError("mds_export_ephemeral_random_max ignored!")
440 def test_ephemeral_random_dist(self
):
442 That ephemeral distributed pin overrides ephemeral random pin
445 self
._setup
_tree
(random
=1.0, distributed
=True)
446 self
._wait
_distributed
_subtrees
(3 * 2, status
=self
.status
)
449 subtrees
= self
._get
_subtrees
(status
=self
.status
, rank
=0)
451 path
= s
['dir']['path']
452 if path
.startswith('/tree'):
453 self
.assertFalse(s
['random_ephemeral_pin'])
455 def test_ephemeral_random_pin_override_before(self
):
457 That a conventional export pin overrides the random policy before creating new directories.
460 self
._setup
_tree
(count
=0, random
=1.0)
461 self
._setup
_tree
(path
="tree/pin", count
=10, export
=1)
462 self
._wait
_subtrees
([("/tree/pin", 1)], status
=self
.status
, rank
=1, path
="/tree/pin")
464 def test_ephemeral_random_pin_override_after(self
):
466 That a conventional export pin overrides the random policy after creating new directories.
470 self
._setup
_tree
(count
=0, random
=1.0)
471 self
._setup
_tree
(path
="tree/pin", count
=count
)
472 self
._wait
_random
_subtrees
(count
+1, status
=self
.status
, rank
="all")
473 self
.mount_a
.setfattr("tree/pin", "ceph.dir.pin", "1")
474 self
._wait
_subtrees
([("/tree/pin", 1)], status
=self
.status
, rank
=1, path
="/tree/pin")
476 def test_ephemeral_randomness(self
):
478 That the randomness is reasonable.
481 r
= random
.uniform(0.25, 0.75) # ratios don't work for small r!
483 self
._setup
_tree
(count
=count
, random
=r
)
484 subtrees
= self
._wait
_random
_subtrees
(int(r
*count
*.50), status
=self
.status
, rank
="all")
485 time
.sleep(30) # for max to not be exceeded
486 subtrees
= self
._wait
_random
_subtrees
(int(r
*count
*.50), status
=self
.status
, rank
="all")
487 self
.assertLessEqual(len(subtrees
), int(r
*count
*1.50))
489 def test_ephemeral_random_cache_drop(self
):
491 That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging.
495 self
._setup
_tree
(count
=count
, random
=1.0)
496 self
._wait
_random
_subtrees
(count
, status
=self
.status
, rank
="all")
497 self
.mount_a
.umount_wait() # release all caps
499 self
.fs
.ranks_tell(["cache", "drop"], status
=self
.status
)
500 self
._wait
_subtrees
([], status
=self
.status
, action
=_drop
)
502 def test_ephemeral_random_failover(self
):
504 That the random ephemeral pins stay pinned across MDS failover.
509 self
._setup
_tree
(count
=count
, random
=r
)
510 # wait for all random subtrees to be created, not a specific count
512 subtrees
= self
._wait
_random
_subtrees
(1, status
=self
.status
, rank
=1)
513 before
= [(s
['dir']['path'], s
['auth_first']) for s
in subtrees
]
516 self
.fs
.rank_fail(rank
=1)
517 self
.status
= self
.fs
.wait_for_daemons()
519 time
.sleep(30) # waiting for something to not happen
520 subtrees
= self
._wait
_random
_subtrees
(1, status
=self
.status
, rank
=1)
521 after
= [(s
['dir']['path'], s
['auth_first']) for s
in subtrees
]
523 log
.info(f
"subtrees before: {before}")
524 log
.info(f
"subtrees after: {after}")
526 self
.assertEqual(before
, after
)
528 def test_ephemeral_pin_grow_mds(self
):
530 That consistent hashing works to reduce the number of migrations.
533 self
.fs
.set_max_mds(2)
534 self
.status
= self
.fs
.wait_for_daemons()
536 self
._setup
_tree
(random
=1.0)
537 subtrees_old
= self
._wait
_random
_subtrees
(100, status
=self
.status
, rank
="all")
539 self
.fs
.set_max_mds(3)
540 self
.status
= self
.fs
.wait_for_daemons()
542 # Sleeping for a while to allow the ephemeral pin migrations to complete
545 subtrees_new
= self
._wait
_random
_subtrees
(100, status
=self
.status
, rank
="all")
547 for old_subtree
in subtrees_old
:
548 for new_subtree
in subtrees_new
:
549 if (old_subtree
['dir']['path'] == new_subtree
['dir']['path']) and (old_subtree
['auth_first'] != new_subtree
['auth_first']):
553 log
.info("{0} migrations have occured due to the cluster resizing".format(count
))
554 # ~50% of subtrees from the two rank will migrate to another rank
555 self
.assertLessEqual((count
/len(subtrees_old
)), (0.5)*1.25) # with 25% overbudget
557 def test_ephemeral_pin_shrink_mds(self
):
559 That consistent hashing works to reduce the number of migrations.
562 self
.fs
.set_max_mds(3)
563 self
.status
= self
.fs
.wait_for_daemons()
565 self
._setup
_tree
(random
=1.0)
566 subtrees_old
= self
._wait
_random
_subtrees
(100, status
=self
.status
, rank
="all")
568 self
.fs
.set_max_mds(2)
569 self
.status
= self
.fs
.wait_for_daemons()
572 subtrees_new
= self
._wait
_random
_subtrees
(100, status
=self
.status
, rank
="all")
574 for old_subtree
in subtrees_old
:
575 for new_subtree
in subtrees_new
:
576 if (old_subtree
['dir']['path'] == new_subtree
['dir']['path']) and (old_subtree
['auth_first'] != new_subtree
['auth_first']):
580 log
.info("{0} migrations have occured due to the cluster resizing".format(count
))
581 # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
582 self
.assertLessEqual((count
/len(subtrees_old
)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget