]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_exports.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / cephfs / test_exports.py
1 import logging
2 import random
3 import time
4 from tasks.cephfs.fuse_mount import FuseMount
5 from tasks.cephfs.cephfs_test_case import CephFSTestCase
6 from teuthology.exceptions import CommandFailedError
7
8 log = logging.getLogger(__name__)
9
10 class TestExports(CephFSTestCase):
11 MDSS_REQUIRED = 2
12 CLIENTS_REQUIRED = 2
13
14 def test_session_race(self):
15 """
16 Test session creation race.
17
18 See: https://tracker.ceph.com/issues/24072#change-113056
19 """
20
21 self.fs.set_max_mds(2)
22 status = self.fs.wait_for_daemons()
23
24 rank1 = self.fs.get_rank(rank=1, status=status)
25
26 # Create a directory that is pre-exported to rank 1
27 self.mount_a.run_shell(["mkdir", "-p", "a/aa"])
28 self.mount_a.setfattr("a", "ceph.dir.pin", "1")
29 self._wait_subtrees([('/a', 1)], status=status, rank=1)
30
31 # Now set the mds config to allow the race
32 self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1)
33
34 # Now create another directory and try to export it
35 self.mount_b.run_shell(["mkdir", "-p", "b/bb"])
36 self.mount_b.setfattr("b", "ceph.dir.pin", "1")
37
38 time.sleep(5)
39
40 # Now turn off the race so that it doesn't wait again
41 self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1)
42
43 # Now try to create a session with rank 1 by accessing a dir known to
44 # be there, if buggy, this should cause the rank 1 to crash:
45 self.mount_b.run_shell(["ls", "a"])
46
47 # Check if rank1 changed (standby tookover?)
48 new_rank1 = self.fs.get_rank(rank=1)
49 self.assertEqual(rank1['gid'], new_rank1['gid'])
50
51 class TestExportPin(CephFSTestCase):
52 MDSS_REQUIRED = 3
53 CLIENTS_REQUIRED = 1
54
55 def setUp(self):
56 CephFSTestCase.setUp(self)
57
58 self.fs.set_max_mds(3)
59 self.status = self.fs.wait_for_daemons()
60
61 self.mount_a.run_shell_payload("mkdir -p 1/2/3/4")
62
63 def test_noop(self):
64 self.mount_a.setfattr("1", "ceph.dir.pin", "-1")
65 time.sleep(30) # for something to not happen
66 self._wait_subtrees([], status=self.status)
67
68 def test_negative(self):
69 self.mount_a.setfattr("1", "ceph.dir.pin", "-2341")
70 time.sleep(30) # for something to not happen
71 self._wait_subtrees([], status=self.status)
72
73 def test_empty_pin(self):
74 self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1")
75 time.sleep(30) # for something to not happen
76 self._wait_subtrees([], status=self.status)
77
78 def test_trivial(self):
79 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
80 self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
81
82 def test_export_targets(self):
83 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
84 self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
85 self.status = self.fs.status()
86 r0 = self.status.get_rank(self.fs.id, 0)
87 self.assertTrue(sorted(r0['export_targets']) == [1])
88
89 def test_redundant(self):
90 # redundant pin /1/2 to rank 1
91 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
92 self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
93 self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
94 self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1)
95
96 def test_reassignment(self):
97 self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
98 self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1)
99 self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
100 self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0)
101
102 def test_phantom_rank(self):
103 self.mount_a.setfattr("1", "ceph.dir.pin", "0")
104 self.mount_a.setfattr("1/2", "ceph.dir.pin", "10")
105 time.sleep(30) # wait for nothing weird to happen
106 self._wait_subtrees([('/1', 0)], status=self.status)
107
108 def test_nested(self):
109 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
110 self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
111 self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
112 self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2)
113
114 def test_nested_unset(self):
115 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
116 self.mount_a.setfattr("1/2", "ceph.dir.pin", "2")
117 self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1)
118 self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1")
119 self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
120
121 def test_rename(self):
122 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
123 self.mount_a.run_shell_payload("mkdir -p 9/8/7")
124 self.mount_a.setfattr("9/8", "ceph.dir.pin", "0")
125 self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0)
126 self.mount_a.run_shell_payload("mv 9/8 1/2")
127 self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0)
128
129 def test_getfattr(self):
130 # pin /1 to rank 0
131 self.mount_a.setfattr("1", "ceph.dir.pin", "1")
132 self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
133 self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1)
134
135 if not isinstance(self.mount_a, FuseMount):
136 p = self.mount_a.client_remote.sh('uname -r', wait=True)
137 dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin")
138 log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin)
139 if str(p) < "5" and not(dir_pin):
140 self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin")
141 self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1')
142 self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0')
143
144 def test_export_pin_cache_drop(self):
145 """
146 That the export pin does not prevent empty (nothing in cache) subtree merging.
147 """
148
149 self.mount_a.setfattr("1", "ceph.dir.pin", "0")
150 self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
151 self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status)
152 self.mount_a.umount_wait() # release all caps
153 def _drop():
154 self.fs.ranks_tell(["cache", "drop"], status=self.status)
155 # drop cache multiple times to clear replica pins
156 self._wait_subtrees([], status=self.status, action=_drop)
157
158 class TestEphemeralPins(CephFSTestCase):
159 MDSS_REQUIRED = 3
160 CLIENTS_REQUIRED = 1
161
162 def setUp(self):
163 CephFSTestCase.setUp(self)
164
165 self.config_set('mds', 'mds_export_ephemeral_random', True)
166 self.config_set('mds', 'mds_export_ephemeral_distributed', True)
167 self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0)
168
169 self.mount_a.run_shell_payload("""
170 set -e
171
172 # Use up a random number of inode numbers so the ephemeral pinning is not the same every test.
173 mkdir .inode_number_thrash
174 count=$((RANDOM % 1024))
175 for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done
176 rm -rf .inode_number_thrash
177 """)
178
179 self.fs.set_max_mds(3)
180 self.status = self.fs.wait_for_daemons()
181
182 def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True):
183 return self.mount_a.run_shell_payload(f"""
184 set -ex
185 mkdir -p {path}
186 {f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""}
187 {f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""}
188 {f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""}
189 for ((i = 0; i < {count}; i++)); do
190 mkdir -p "{path}/$i"
191 echo file > "{path}/$i/file"
192 done
193 """, wait=wait)
194
195 def test_ephemeral_pin_dist_override(self):
196 """
197 That an ephemeral distributed pin overrides a normal export pin.
198 """
199
200 self._setup_tree(distributed=True)
201 subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
202 for s in subtrees:
203 path = s['dir']['path']
204 if path == '/tree':
205 self.assertTrue(s['distributed_ephemeral_pin'])
206
207 def test_ephemeral_pin_dist_override_pin(self):
208 """
209 That an export pin overrides an ephemerally pinned directory.
210 """
211
212 self._setup_tree(distributed=True)
213 subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
214 self.mount_a.setfattr("tree", "ceph.dir.pin", "0")
215 time.sleep(15)
216 subtrees = self._get_subtrees(status=self.status, rank=0)
217 for s in subtrees:
218 path = s['dir']['path']
219 if path == '/tree':
220 self.assertEqual(s['auth_first'], 0)
221 self.assertFalse(s['distributed_ephemeral_pin'])
222 # it has been merged into /tree
223
224 def test_ephemeral_pin_dist_off(self):
225 """
226 That turning off ephemeral distributed pin merges subtrees.
227 """
228
229 self._setup_tree(distributed=True)
230 self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
231 self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0")
232 time.sleep(15)
233 subtrees = self._get_subtrees(status=self.status, rank=0)
234 for s in subtrees:
235 path = s['dir']['path']
236 if path == '/tree':
237 self.assertFalse(s['distributed_ephemeral_pin'])
238
239
240 def test_ephemeral_pin_dist_conf_off(self):
241 """
242 That turning off ephemeral distributed pin config prevents distribution.
243 """
244
245 self._setup_tree()
246 self.config_set('mds', 'mds_export_ephemeral_distributed', False)
247 self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
248 time.sleep(15)
249 subtrees = self._get_subtrees(status=self.status, rank=0)
250 for s in subtrees:
251 path = s['dir']['path']
252 if path == '/tree':
253 self.assertFalse(s['distributed_ephemeral_pin'])
254
255 def _test_ephemeral_pin_dist_conf_off_merge(self):
256 """
257 That turning off ephemeral distributed pin config merges subtrees.
258 FIXME: who triggers the merge?
259 """
260
261 self._setup_tree(distributed=True)
262 self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
263 self.config_set('mds', 'mds_export_ephemeral_distributed', False)
264 self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status)
265
266 def test_ephemeral_pin_dist_override_before(self):
267 """
268 That a conventional export pin overrides the distributed policy _before_ distributed policy is set.
269 """
270
271 count = 10
272 self._setup_tree(count=count)
273 test = []
274 for i in range(count):
275 path = f"tree/{i}"
276 self.mount_a.setfattr(path, "ceph.dir.pin", "1")
277 test.append(("/"+path, 1))
278 self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
279 time.sleep(15) # for something to not happen...
280 self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
281
282 def test_ephemeral_pin_dist_override_after(self):
283 """
284 That a conventional export pin overrides the distributed policy _after_ distributed policy is set.
285 """
286
287 self._setup_tree(distributed=True)
288 self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
289 test = []
290 for i in range(10):
291 path = f"tree/{i}"
292 self.mount_a.setfattr(path, "ceph.dir.pin", "1")
293 test.append(("/"+path, 1))
294 self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
295
296 def test_ephemeral_pin_dist_failover(self):
297 """
298 That MDS failover does not cause unnecessary migrations.
299 """
300
301 # pin /tree so it does not export during failover
302 self._setup_tree(distributed=True)
303 self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
304 #test = [(s['dir']['path'], s['auth_first']) for s in subtrees]
305 before = self.fs.ranks_perf(lambda p: p['mds']['exported'])
306 log.info(f"export stats: {before}")
307 self.fs.rank_fail(rank=1)
308 self.status = self.fs.wait_for_daemons()
309 time.sleep(10) # waiting for something to not happen
310 after = self.fs.ranks_perf(lambda p: p['mds']['exported'])
311 log.info(f"export stats: {after}")
312 self.assertEqual(before, after)
313
314 def test_ephemeral_pin_distribution(self):
315 """
316 That ephemerally pinned subtrees are somewhat evenly distributed.
317 """
318
319 max_mds = 3
320 frags = 128
321
322 self.fs.set_max_mds(max_mds)
323 self.status = self.fs.wait_for_daemons()
324
325 self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds)
326 self._setup_tree(count=1000, distributed=True)
327
328 subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all")
329 nsubtrees = len(subtrees)
330
331 # Check if distribution is uniform
332 rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees))
333 rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees))
334 rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees))
335 self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15)
336 self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15)
337 self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15)
338
339
340 def test_ephemeral_random(self):
341 """
342 That 100% randomness causes all children to be pinned.
343 """
344 self._setup_tree(random=1.0)
345 self._wait_random_subtrees(100, status=self.status, rank="all")
346
347 def test_ephemeral_random_max(self):
348 """
349 That the config mds_export_ephemeral_random_max is not exceeded.
350 """
351
352 r = 0.5
353 count = 1000
354 self._setup_tree(count=count, random=r)
355 subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all")
356 self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
357 self._setup_tree(path="tree/new", count=count)
358 time.sleep(30) # for something not to happen...
359 subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/")
360 self.assertLessEqual(len(subtrees), int(.01*count*1.25))
361
362 def test_ephemeral_random_max_config(self):
363 """
364 That the config mds_export_ephemeral_random_max config rejects new OOB policies.
365 """
366
367 self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
368 try:
369 p = self._setup_tree(count=1, random=0.02, wait=False)
370 p.wait()
371 except CommandFailedError as e:
372 log.info(f"{e}")
373 self.assertIn("Invalid", p.stderr.getvalue())
374 else:
375 raise RuntimeError("mds_export_ephemeral_random_max ignored!")
376
377 def test_ephemeral_random_dist(self):
378 """
379 That ephemeral distributed pin overrides ephemeral random pin
380 """
381
382 self._setup_tree(random=1.0, distributed=True)
383 self._wait_distributed_subtrees(3 * 2, status=self.status)
384
385 time.sleep(15)
386 subtrees = self._get_subtrees(status=self.status, rank=0)
387 for s in subtrees:
388 path = s['dir']['path']
389 if path.startswith('/tree'):
390 self.assertFalse(s['random_ephemeral_pin'])
391
392 def test_ephemeral_random_pin_override_before(self):
393 """
394 That a conventional export pin overrides the random policy before creating new directories.
395 """
396
397 self._setup_tree(count=0, random=1.0)
398 self._setup_tree(path="tree/pin", count=10, export=1)
399 self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
400
401 def test_ephemeral_random_pin_override_after(self):
402 """
403 That a conventional export pin overrides the random policy after creating new directories.
404 """
405
406 count = 10
407 self._setup_tree(count=0, random=1.0)
408 self._setup_tree(path="tree/pin", count=count)
409 self._wait_random_subtrees(count+1, status=self.status, rank="all")
410 self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1")
411 self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
412
413 def test_ephemeral_randomness(self):
414 """
415 That the randomness is reasonable.
416 """
417
418 r = random.uniform(0.25, 0.75) # ratios don't work for small r!
419 count = 1000
420 self._setup_tree(count=count, random=r)
421 subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
422 time.sleep(30) # for max to not be exceeded
423 subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
424 self.assertLessEqual(len(subtrees), int(r*count*1.50))
425
426 def test_ephemeral_random_cache_drop(self):
427 """
428 That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging.
429 """
430
431 count = 100
432 self._setup_tree(count=count, random=1.0)
433 self._wait_random_subtrees(count, status=self.status, rank="all")
434 self.mount_a.umount_wait() # release all caps
435 def _drop():
436 self.fs.ranks_tell(["cache", "drop"], status=self.status)
437 self._wait_subtrees([], status=self.status, action=_drop)
438
439 def test_ephemeral_random_failover(self):
440 """
441 That the random ephemeral pins stay pinned across MDS failover.
442 """
443
444 count = 100
445 r = 0.5
446 self._setup_tree(count=count, random=r)
447 # wait for all random subtrees to be created, not a specific count
448 time.sleep(30)
449 subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
450 before = [(s['dir']['path'], s['auth_first']) for s in subtrees]
451 before.sort();
452
453 self.fs.rank_fail(rank=1)
454 self.status = self.fs.wait_for_daemons()
455
456 time.sleep(30) # waiting for something to not happen
457 subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
458 after = [(s['dir']['path'], s['auth_first']) for s in subtrees]
459 after.sort();
460 log.info(f"subtrees before: {before}")
461 log.info(f"subtrees after: {after}")
462
463 self.assertEqual(before, after)
464
465 def test_ephemeral_pin_grow_mds(self):
466 """
467 That consistent hashing works to reduce the number of migrations.
468 """
469
470 self.fs.set_max_mds(2)
471 self.status = self.fs.wait_for_daemons()
472
473 self._setup_tree(random=1.0)
474 subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
475
476 self.fs.set_max_mds(3)
477 self.status = self.fs.wait_for_daemons()
478
479 # Sleeping for a while to allow the ephemeral pin migrations to complete
480 time.sleep(30)
481
482 subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
483 count = 0
484 for old_subtree in subtrees_old:
485 for new_subtree in subtrees_new:
486 if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
487 count = count + 1
488 break
489
490 log.info("{0} migrations have occured due to the cluster resizing".format(count))
491 # ~50% of subtrees from the two rank will migrate to another rank
492 self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget
493
494 def test_ephemeral_pin_shrink_mds(self):
495 """
496 That consistent hashing works to reduce the number of migrations.
497 """
498
499 self.fs.set_max_mds(3)
500 self.status = self.fs.wait_for_daemons()
501
502 self._setup_tree(random=1.0)
503 subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
504
505 self.fs.set_max_mds(2)
506 self.status = self.fs.wait_for_daemons()
507 time.sleep(30)
508
509 subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
510 count = 0
511 for old_subtree in subtrees_old:
512 for new_subtree in subtrees_new:
513 if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
514 count = count + 1
515 break
516
517 log.info("{0} migrations have occured due to the cluster resizing".format(count))
518 # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
519 self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget