]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Teuthology task for exercising CephFS client recovery | |
4 | """ | |
5 | ||
6 | import logging | |
7 | from textwrap import dedent | |
8 | import time | |
9 | import distutils.version as version | |
10 | import re | |
11 | import os | |
12 | ||
9f95a23c | 13 | from teuthology.orchestra import run |
20effc67 | 14 | from teuthology.exceptions import CommandFailedError |
28e407b8 | 15 | from tasks.cephfs.fuse_mount import FuseMount |
7c673cae FG |
16 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
17 | from teuthology.packaging import get_package_version | |
7c673cae FG |
18 | |
19 | log = logging.getLogger(__name__) | |
20 | ||
21 | ||
22 | # Arbitrary timeouts for operations involving restarting | |
23 | # an MDS or waiting for it to come up | |
24 | MDS_RESTART_GRACE = 60 | |
25 | ||
26 | ||
27 | class TestClientNetworkRecovery(CephFSTestCase): | |
7c673cae FG |
28 | REQUIRE_ONE_CLIENT_REMOTE = True |
29 | CLIENTS_REQUIRED = 2 | |
30 | ||
f64942e4 | 31 | LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] |
7c673cae FG |
32 | |
33 | # Environment references | |
7c673cae FG |
34 | mds_reconnect_timeout = None |
35 | ms_max_backoff = None | |
36 | ||
37 | def test_network_death(self): | |
38 | """ | |
39 | Simulate software freeze or temporary network failure. | |
40 | ||
41 | Check that the client blocks I/O during failure, and completes | |
42 | I/O after failure. | |
43 | """ | |
44 | ||
f64942e4 | 45 | session_timeout = self.fs.get_var("session_timeout") |
494da23a | 46 | self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) |
f64942e4 | 47 | |
7c673cae FG |
48 | # We only need one client |
49 | self.mount_b.umount_wait() | |
50 | ||
51 | # Initially our one client session should be visible | |
52 | client_id = self.mount_a.get_global_id() | |
53 | ls_data = self._session_list() | |
54 | self.assert_session_count(1, ls_data) | |
55 | self.assertEqual(ls_data[0]['id'], client_id) | |
56 | self.assert_session_state(client_id, "open") | |
57 | ||
58 | # ...and capable of doing I/O without blocking | |
59 | self.mount_a.create_files() | |
60 | ||
61 | # ...but if we turn off the network | |
62 | self.fs.set_clients_block(True) | |
63 | ||
64 | # ...and try and start an I/O | |
65 | write_blocked = self.mount_a.write_background() | |
66 | ||
67 | # ...then it should block | |
68 | self.assertFalse(write_blocked.finished) | |
69 | self.assert_session_state(client_id, "open") | |
f64942e4 | 70 | time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale |
7c673cae FG |
71 | self.assertFalse(write_blocked.finished) |
72 | self.assert_session_state(client_id, "stale") | |
73 | ||
74 | # ...until we re-enable I/O | |
75 | self.fs.set_clients_block(False) | |
76 | ||
77 | # ...when it should complete promptly | |
78 | a = time.time() | |
79 | self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2) | |
80 | write_blocked.wait() # Already know we're finished, wait() to raise exception on errors | |
81 | recovery_time = time.time() - a | |
82 | log.info("recovery time: {0}".format(recovery_time)) | |
83 | self.assert_session_state(client_id, "open") | |
84 | ||
85 | ||
86 | class TestClientRecovery(CephFSTestCase): | |
7c673cae FG |
87 | CLIENTS_REQUIRED = 2 |
88 | ||
f64942e4 | 89 | LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] |
7c673cae FG |
90 | |
91 | # Environment references | |
7c673cae FG |
92 | mds_reconnect_timeout = None |
93 | ms_max_backoff = None | |
94 | ||
95 | def test_basic(self): | |
96 | # Check that two clients come up healthy and see each others' files | |
97 | # ===================================================== | |
98 | self.mount_a.create_files() | |
99 | self.mount_a.check_files() | |
100 | self.mount_a.umount_wait() | |
101 | ||
102 | self.mount_b.check_files() | |
103 | ||
e306af50 | 104 | self.mount_a.mount_wait() |
7c673cae FG |
105 | |
106 | # Check that the admin socket interface is correctly reporting | |
107 | # two sessions | |
108 | # ===================================================== | |
109 | ls_data = self._session_list() | |
110 | self.assert_session_count(2, ls_data) | |
111 | ||
112 | self.assertSetEqual( | |
113 | set([l['id'] for l in ls_data]), | |
114 | {self.mount_a.get_global_id(), self.mount_b.get_global_id()} | |
115 | ) | |
116 | ||
117 | def test_restart(self): | |
118 | # Check that after an MDS restart both clients reconnect and continue | |
119 | # to handle I/O | |
120 | # ===================================================== | |
121 | self.fs.mds_fail_restart() | |
122 | self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) | |
123 | ||
124 | self.mount_a.create_destroy() | |
125 | self.mount_b.create_destroy() | |
126 | ||
127 | def _session_num_caps(self, client_id): | |
128 | ls_data = self.fs.mds_asok(['session', 'ls']) | |
129 | return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) | |
130 | ||
131 | def test_reconnect_timeout(self): | |
132 | # Reconnect timeout | |
133 | # ================= | |
134 | # Check that if I stop an MDS and a client goes away, the MDS waits | |
135 | # for the reconnect period | |
7c673cae FG |
136 | |
137 | mount_a_client_id = self.mount_a.get_global_id() | |
522d829b TL |
138 | |
139 | self.fs.fail() | |
140 | ||
7c673cae FG |
141 | self.mount_a.umount_wait(force=True) |
142 | ||
f67539c2 | 143 | self.fs.set_joinable() |
7c673cae FG |
144 | |
145 | self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) | |
146 | # Check that the MDS locally reports its state correctly | |
147 | status = self.fs.mds_asok(['status']) | |
148 | self.assertIn("reconnect_status", status) | |
149 | ||
150 | ls_data = self._session_list() | |
151 | self.assert_session_count(2, ls_data) | |
152 | ||
153 | # The session for the dead client should have the 'reconnect' flag set | |
154 | self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) | |
155 | ||
156 | # Wait for the reconnect state to clear, this should take the | |
157 | # reconnect timeout period. | |
158 | in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) | |
159 | # Check that the period we waited to enter active is within a factor | |
160 | # of two of the reconnect timeout. | |
e306af50 | 161 | self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2, |
7c673cae FG |
162 | "Should have been in reconnect phase for {0} but only took {1}".format( |
163 | self.mds_reconnect_timeout, in_reconnect_for | |
164 | )) | |
165 | ||
166 | self.assert_session_count(1) | |
167 | ||
168 | # Check that the client that timed out during reconnect can | |
169 | # mount again and do I/O | |
e306af50 | 170 | self.mount_a.mount_wait() |
7c673cae FG |
171 | self.mount_a.create_destroy() |
172 | ||
173 | self.assert_session_count(2) | |
174 | ||
175 | def test_reconnect_eviction(self): | |
176 | # Eviction during reconnect | |
177 | # ========================= | |
178 | mount_a_client_id = self.mount_a.get_global_id() | |
179 | ||
f67539c2 | 180 | self.fs.fail() |
7c673cae FG |
181 | |
182 | # The mount goes away while the MDS is offline | |
183 | self.mount_a.kill() | |
184 | ||
92f5a8d4 TL |
185 | # wait for it to die |
186 | time.sleep(5) | |
187 | ||
f67539c2 | 188 | self.fs.set_joinable() |
7c673cae FG |
189 | |
190 | # Enter reconnect phase | |
191 | self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) | |
192 | self.assert_session_count(2) | |
193 | ||
194 | # Evict the stuck client | |
195 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
196 | self.assert_session_count(1) | |
197 | ||
198 | # Observe that we proceed to active phase without waiting full reconnect timeout | |
199 | evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) | |
200 | # Once we evict the troublemaker, the reconnect phase should complete | |
201 | # in well under the reconnect timeout. | |
202 | self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, | |
203 | "reconnect did not complete soon enough after eviction, took {0}".format( | |
204 | evict_til_active | |
205 | )) | |
206 | ||
207 | # We killed earlier so must clean up before trying to use again | |
208 | self.mount_a.kill_cleanup() | |
209 | ||
210 | # Bring the client back | |
e306af50 | 211 | self.mount_a.mount_wait() |
7c673cae FG |
212 | self.mount_a.create_destroy() |
213 | ||
494da23a | 214 | def _test_stale_caps(self, write): |
f64942e4 AA |
215 | session_timeout = self.fs.get_var("session_timeout") |
216 | ||
7c673cae FG |
217 | # Capability release from stale session |
218 | # ===================================== | |
494da23a TL |
219 | if write: |
220 | cap_holder = self.mount_a.open_background() | |
221 | else: | |
222 | self.mount_a.run_shell(["touch", "background_file"]) | |
223 | self.mount_a.umount_wait() | |
e306af50 | 224 | self.mount_a.mount_wait() |
494da23a TL |
225 | cap_holder = self.mount_a.open_background(write=False) |
226 | ||
227 | self.assert_session_count(2) | |
228 | mount_a_gid = self.mount_a.get_global_id() | |
7c673cae FG |
229 | |
230 | # Wait for the file to be visible from another client, indicating | |
231 | # that mount_a has completed its network ops | |
232 | self.mount_b.wait_for_visible() | |
233 | ||
234 | # Simulate client death | |
f67539c2 | 235 | self.mount_a.suspend_netns() |
7c673cae | 236 | |
eafe8130 TL |
237 | # wait for it to die so it doesn't voluntarily release buffer cap |
238 | time.sleep(5) | |
239 | ||
7c673cae | 240 | try: |
f64942e4 | 241 | # Now, after session_timeout seconds, the waiter should |
7c673cae FG |
242 | # complete their operation when the MDS marks the holder's |
243 | # session stale. | |
244 | cap_waiter = self.mount_b.write_background() | |
245 | a = time.time() | |
246 | cap_waiter.wait() | |
247 | b = time.time() | |
248 | ||
249 | # Should have succeeded | |
250 | self.assertEqual(cap_waiter.exitstatus, 0) | |
251 | ||
494da23a TL |
252 | if write: |
253 | self.assert_session_count(1) | |
254 | else: | |
255 | self.assert_session_state(mount_a_gid, "stale") | |
256 | ||
7c673cae FG |
257 | cap_waited = b - a |
258 | log.info("cap_waiter waited {0}s".format(cap_waited)) | |
f64942e4 | 259 | self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0, |
7c673cae | 260 | "Capability handover took {0}, expected approx {1}".format( |
f64942e4 | 261 | cap_waited, session_timeout |
7c673cae FG |
262 | )) |
263 | ||
f67539c2 | 264 | self.mount_a._kill_background(cap_holder) |
7c673cae FG |
265 | finally: |
266 | # teardown() doesn't quite handle this case cleanly, so help it out | |
f67539c2 | 267 | self.mount_a.resume_netns() |
7c673cae | 268 | |
494da23a TL |
269 | def test_stale_read_caps(self): |
270 | self._test_stale_caps(False) | |
271 | ||
272 | def test_stale_write_caps(self): | |
273 | self._test_stale_caps(True) | |
274 | ||
7c673cae FG |
275 | def test_evicted_caps(self): |
276 | # Eviction while holding a capability | |
277 | # =================================== | |
278 | ||
f64942e4 AA |
279 | session_timeout = self.fs.get_var("session_timeout") |
280 | ||
7c673cae FG |
281 | # Take out a write capability on a file on client A, |
282 | # and then immediately kill it. | |
283 | cap_holder = self.mount_a.open_background() | |
284 | mount_a_client_id = self.mount_a.get_global_id() | |
285 | ||
286 | # Wait for the file to be visible from another client, indicating | |
287 | # that mount_a has completed its network ops | |
288 | self.mount_b.wait_for_visible() | |
289 | ||
290 | # Simulate client death | |
f67539c2 | 291 | self.mount_a.suspend_netns() |
7c673cae | 292 | |
eafe8130 TL |
293 | # wait for it to die so it doesn't voluntarily release buffer cap |
294 | time.sleep(5) | |
295 | ||
7c673cae FG |
296 | try: |
297 | # The waiter should get stuck waiting for the capability | |
298 | # held on the MDS by the now-dead client A | |
299 | cap_waiter = self.mount_b.write_background() | |
300 | time.sleep(5) | |
301 | self.assertFalse(cap_waiter.finished) | |
302 | ||
303 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
304 | # Now, because I evicted the old holder of the capability, it should | |
305 | # immediately get handed over to the waiter | |
306 | a = time.time() | |
307 | cap_waiter.wait() | |
308 | b = time.time() | |
309 | cap_waited = b - a | |
310 | log.info("cap_waiter waited {0}s".format(cap_waited)) | |
311 | # This is the check that it happened 'now' rather than waiting | |
312 | # for the session timeout | |
f64942e4 | 313 | self.assertLess(cap_waited, session_timeout / 2.0, |
7c673cae | 314 | "Capability handover took {0}, expected less than {1}".format( |
f64942e4 | 315 | cap_waited, session_timeout / 2.0 |
7c673cae FG |
316 | )) |
317 | ||
f67539c2 | 318 | self.mount_a._kill_background(cap_holder) |
7c673cae | 319 | finally: |
f67539c2 | 320 | self.mount_a.resume_netns() |
7c673cae FG |
321 | |
322 | def test_trim_caps(self): | |
323 | # Trim capability when reconnecting MDS | |
324 | # =================================== | |
325 | ||
326 | count = 500 | |
327 | # Create lots of files | |
328 | for i in range(count): | |
329 | self.mount_a.run_shell(["touch", "f{0}".format(i)]) | |
330 | ||
331 | # Populate mount_b's cache | |
31f18b77 | 332 | self.mount_b.run_shell(["ls", "-l"]) |
7c673cae FG |
333 | |
334 | client_id = self.mount_b.get_global_id() | |
335 | num_caps = self._session_num_caps(client_id) | |
336 | self.assertGreaterEqual(num_caps, count) | |
337 | ||
338 | # Restart MDS. client should trim its cache when reconnecting to the MDS | |
339 | self.fs.mds_fail_restart() | |
340 | self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) | |
341 | ||
342 | num_caps = self._session_num_caps(client_id) | |
343 | self.assertLess(num_caps, count, | |
344 | "should have less than {0} capabilities, have {1}".format( | |
345 | count, num_caps | |
346 | )) | |
347 | ||
31f18b77 | 348 | def _is_flockable(self): |
7c673cae FG |
349 | a_version_str = get_package_version(self.mount_a.client_remote, "fuse") |
350 | b_version_str = get_package_version(self.mount_b.client_remote, "fuse") | |
351 | flock_version_str = "2.9" | |
352 | ||
353 | version_regex = re.compile(r"[0-9\.]+") | |
354 | a_result = version_regex.match(a_version_str) | |
355 | self.assertTrue(a_result) | |
356 | b_result = version_regex.match(b_version_str) | |
357 | self.assertTrue(b_result) | |
358 | a_version = version.StrictVersion(a_result.group()) | |
359 | b_version = version.StrictVersion(b_result.group()) | |
360 | flock_version=version.StrictVersion(flock_version_str) | |
361 | ||
7c673cae | 362 | if (a_version >= flock_version and b_version >= flock_version): |
31f18b77 FG |
363 | log.info("flock locks are available") |
364 | return True | |
7c673cae FG |
365 | else: |
366 | log.info("not testing flock locks, machines have versions {av} and {bv}".format( | |
367 | av=a_version_str,bv=b_version_str)) | |
31f18b77 FG |
368 | return False |
369 | ||
370 | def test_filelock(self): | |
371 | """ | |
372 | Check that file lock doesn't get lost after an MDS restart | |
373 | """ | |
7c673cae | 374 | |
31f18b77 | 375 | flockable = self._is_flockable() |
7c673cae FG |
376 | lock_holder = self.mount_a.lock_background(do_flock=flockable) |
377 | ||
378 | self.mount_b.wait_for_visible("background_file-2") | |
379 | self.mount_b.check_filelock(do_flock=flockable) | |
380 | ||
381 | self.fs.mds_fail_restart() | |
382 | self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) | |
383 | ||
384 | self.mount_b.check_filelock(do_flock=flockable) | |
385 | ||
386 | # Tear down the background process | |
f67539c2 | 387 | self.mount_a._kill_background(lock_holder) |
7c673cae | 388 | |
31f18b77 FG |
389 | def test_filelock_eviction(self): |
390 | """ | |
391 | Check that file lock held by evicted client is given to | |
392 | waiting client. | |
393 | """ | |
394 | if not self._is_flockable(): | |
395 | self.skipTest("flock is not available") | |
396 | ||
397 | lock_holder = self.mount_a.lock_background() | |
398 | self.mount_b.wait_for_visible("background_file-2") | |
399 | self.mount_b.check_filelock() | |
400 | ||
401 | lock_taker = self.mount_b.lock_and_release() | |
402 | # Check the taker is waiting (doesn't get it immediately) | |
403 | time.sleep(2) | |
404 | self.assertFalse(lock_holder.finished) | |
405 | self.assertFalse(lock_taker.finished) | |
406 | ||
181888fb FG |
407 | try: |
408 | mount_a_client_id = self.mount_a.get_global_id() | |
409 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
31f18b77 | 410 | |
181888fb FG |
411 | # Evicting mount_a should let mount_b's attempt to take the lock |
412 | # succeed | |
413 | self.wait_until_true(lambda: lock_taker.finished, timeout=10) | |
414 | finally: | |
f67539c2 TL |
415 | # Tear down the background process |
416 | self.mount_a._kill_background(lock_holder) | |
417 | ||
181888fb FG |
418 | # teardown() doesn't quite handle this case cleanly, so help it out |
419 | self.mount_a.kill() | |
420 | self.mount_a.kill_cleanup() | |
421 | ||
422 | # Bring the client back | |
e306af50 | 423 | self.mount_a.mount_wait() |
31f18b77 | 424 | |
7c673cae | 425 | def test_dir_fsync(self): |
9f95a23c | 426 | self._test_fsync(True); |
7c673cae FG |
427 | |
428 | def test_create_fsync(self): | |
9f95a23c | 429 | self._test_fsync(False); |
7c673cae FG |
430 | |
431 | def _test_fsync(self, dirfsync): | |
432 | """ | |
433 | That calls to fsync guarantee visibility of metadata to another | |
434 | client immediately after the fsyncing client dies. | |
435 | """ | |
436 | ||
437 | # Leave this guy out until he's needed | |
438 | self.mount_b.umount_wait() | |
439 | ||
440 | # Create dir + child dentry on client A, and fsync the dir | |
441 | path = os.path.join(self.mount_a.mountpoint, "subdir") | |
442 | self.mount_a.run_python( | |
443 | dedent(""" | |
444 | import os | |
445 | import time | |
446 | ||
447 | path = "{path}" | |
448 | ||
9f95a23c | 449 | print("Starting creation...") |
7c673cae FG |
450 | start = time.time() |
451 | ||
452 | os.mkdir(path) | |
453 | dfd = os.open(path, os.O_DIRECTORY) | |
454 | ||
455 | fd = open(os.path.join(path, "childfile"), "w") | |
9f95a23c | 456 | print("Finished creation in {{0}}s".format(time.time() - start)) |
7c673cae | 457 | |
9f95a23c | 458 | print("Starting fsync...") |
7c673cae FG |
459 | start = time.time() |
460 | if {dirfsync}: | |
461 | os.fsync(dfd) | |
462 | else: | |
463 | os.fsync(fd) | |
9f95a23c | 464 | print("Finished fsync in {{0}}s".format(time.time() - start)) |
7c673cae FG |
465 | """.format(path=path,dirfsync=str(dirfsync))) |
466 | ) | |
467 | ||
468 | # Immediately kill the MDS and then client A | |
f67539c2 | 469 | self.fs.fail() |
7c673cae FG |
470 | self.mount_a.kill() |
471 | self.mount_a.kill_cleanup() | |
472 | ||
473 | # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay | |
f67539c2 | 474 | self.fs.set_joinable() |
7c673cae FG |
475 | log.info("Waiting for reconnect...") |
476 | self.fs.wait_for_state("up:reconnect") | |
477 | log.info("Waiting for active...") | |
478 | self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout) | |
479 | log.info("Reached active...") | |
480 | ||
481 | # Is the child dentry visible from mount B? | |
e306af50 | 482 | self.mount_b.mount_wait() |
7c673cae | 483 | self.mount_b.run_shell(["ls", "subdir/childfile"]) |
28e407b8 | 484 | |
11fdf7f2 TL |
485 | def test_unmount_for_evicted_client(self): |
486 | """Test if client hangs on unmount after evicting the client.""" | |
487 | mount_a_client_id = self.mount_a.get_global_id() | |
488 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
489 | ||
490 | self.mount_a.umount_wait(require_clean=True, timeout=30) | |
491 | ||
20effc67 TL |
492 | def test_mount_after_evicted_client(self): |
493 | """Test if a new mount of same fs works after client eviction.""" | |
494 | ||
495 | # trash this : we need it to use same remote as mount_a | |
496 | self.mount_b.umount_wait() | |
497 | ||
498 | cl = self.mount_a.__class__ | |
499 | ||
500 | # create a new instance of mount_a's class with most of the | |
501 | # same settings, but mounted on mount_b's mountpoint. | |
502 | m = cl(ctx=self.mount_a.ctx, | |
503 | client_config=self.mount_a.client_config, | |
504 | test_dir=self.mount_a.test_dir, | |
505 | client_id=self.mount_a.client_id, | |
506 | client_remote=self.mount_a.client_remote, | |
507 | client_keyring_path=self.mount_a.client_keyring_path, | |
508 | cephfs_name=self.mount_a.cephfs_name, | |
509 | cephfs_mntpt= self.mount_a.cephfs_mntpt, | |
510 | hostfs_mntpt=self.mount_b.hostfs_mntpt, | |
511 | brxnet=self.mount_a.ceph_brx_net) | |
512 | ||
513 | # evict mount_a | |
514 | mount_a_client_id = self.mount_a.get_global_id() | |
515 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
516 | ||
517 | m.mount_wait() | |
518 | m.create_files() | |
519 | m.check_files() | |
520 | m.umount_wait(require_clean=True) | |
521 | ||
28e407b8 AA |
522 | def test_stale_renew(self): |
523 | if not isinstance(self.mount_a, FuseMount): | |
9f95a23c | 524 | self.skipTest("Require FUSE client to handle signal STOP/CONT") |
28e407b8 | 525 | |
f64942e4 AA |
526 | session_timeout = self.fs.get_var("session_timeout") |
527 | ||
28e407b8 AA |
528 | self.mount_a.run_shell(["mkdir", "testdir"]) |
529 | self.mount_a.run_shell(["touch", "testdir/file1"]) | |
530 | # populate readdir cache | |
531 | self.mount_a.run_shell(["ls", "testdir"]) | |
532 | self.mount_b.run_shell(["ls", "testdir"]) | |
533 | ||
534 | # check if readdir cache is effective | |
535 | initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) | |
536 | self.mount_b.run_shell(["ls", "testdir"]) | |
537 | current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) | |
538 | self.assertEqual(current_readdirs, initial_readdirs); | |
539 | ||
540 | mount_b_gid = self.mount_b.get_global_id() | |
28e407b8 | 541 | # stop ceph-fuse process of mount_b |
522d829b | 542 | self.mount_b.suspend_netns() |
28e407b8 AA |
543 | |
544 | self.assert_session_state(mount_b_gid, "open") | |
f64942e4 | 545 | time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale |
28e407b8 AA |
546 | |
547 | self.mount_a.run_shell(["touch", "testdir/file2"]) | |
494da23a | 548 | self.assert_session_state(mount_b_gid, "stale") |
28e407b8 AA |
549 | |
550 | # resume ceph-fuse process of mount_b | |
522d829b | 551 | self.mount_b.resume_netns() |
28e407b8 AA |
552 | # Is the new file visible from mount_b? (caps become invalid after session stale) |
553 | self.mount_b.run_shell(["ls", "testdir/file2"]) | |
554 | ||
11fdf7f2 TL |
555 | def test_abort_conn(self): |
556 | """ | |
557 | Check that abort_conn() skips closing mds sessions. | |
558 | """ | |
559 | if not isinstance(self.mount_a, FuseMount): | |
9f95a23c | 560 | self.skipTest("Testing libcephfs function") |
28e407b8 | 561 | |
494da23a | 562 | self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) |
11fdf7f2 TL |
563 | session_timeout = self.fs.get_var("session_timeout") |
564 | ||
565 | self.mount_a.umount_wait() | |
566 | self.mount_b.umount_wait() | |
567 | ||
568 | gid_str = self.mount_a.run_python(dedent(""" | |
569 | import cephfs as libcephfs | |
570 | cephfs = libcephfs.LibCephFS(conffile='') | |
571 | cephfs.mount() | |
572 | client_id = cephfs.get_instance_id() | |
573 | cephfs.abort_conn() | |
9f95a23c | 574 | print(client_id) |
11fdf7f2 TL |
575 | """) |
576 | ) | |
577 | gid = int(gid_str); | |
578 | ||
579 | self.assert_session_state(gid, "open") | |
580 | time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale | |
581 | self.assert_session_state(gid, "stale") | |
eafe8130 TL |
582 | |
583 | def test_dont_mark_unresponsive_client_stale(self): | |
584 | """ | |
585 | Test that an unresponsive client holding caps is not marked stale or | |
586 | evicted unless another clients wants its caps. | |
587 | """ | |
588 | if not isinstance(self.mount_a, FuseMount): | |
589 | self.skipTest("Require FUSE client to handle signal STOP/CONT") | |
590 | ||
591 | # XXX: To conduct this test we need at least two clients since a | |
592 | # single client is never evcited by MDS. | |
593 | SESSION_TIMEOUT = 30 | |
594 | SESSION_AUTOCLOSE = 50 | |
595 | time_at_beg = time.time() | |
596 | mount_a_gid = self.mount_a.get_global_id() | |
9f95a23c | 597 | _ = self.mount_a.client_pid |
eafe8130 TL |
598 | self.fs.set_var('session_timeout', SESSION_TIMEOUT) |
599 | self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE) | |
600 | self.assert_session_count(2, self.fs.mds_asok(['session', 'ls'])) | |
601 | ||
602 | # test that client holding cap not required by any other client is not | |
603 | # marked stale when it becomes unresponsive. | |
604 | self.mount_a.run_shell(['mkdir', 'dir']) | |
605 | self.mount_a.send_signal('sigstop') | |
606 | time.sleep(SESSION_TIMEOUT + 2) | |
607 | self.assert_session_state(mount_a_gid, "open") | |
608 | ||
609 | # test that other clients have to wait to get the caps from | |
610 | # unresponsive client until session_autoclose. | |
611 | self.mount_b.run_shell(['stat', 'dir']) | |
612 | self.assert_session_count(1, self.fs.mds_asok(['session', 'ls'])) | |
613 | self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE) | |
614 | ||
615 | self.mount_a.send_signal('sigcont') | |
92f5a8d4 TL |
616 | |
617 | def test_config_session_timeout(self): | |
618 | self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) | |
619 | session_timeout = self.fs.get_var("session_timeout") | |
620 | mount_a_gid = self.mount_a.get_global_id() | |
621 | ||
622 | self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)]) | |
623 | ||
624 | self.mount_a.kill(); | |
625 | ||
626 | self.assert_session_count(2) | |
627 | ||
628 | time.sleep(session_timeout * 1.5) | |
629 | self.assert_session_state(mount_a_gid, "open") | |
630 | ||
631 | time.sleep(session_timeout) | |
632 | self.assert_session_count(1) | |
633 | ||
634 | self.mount_a.kill_cleanup() | |
9f95a23c | 635 | |
f67539c2 | 636 | def test_reconnect_after_blocklisted(self): |
9f95a23c | 637 | """ |
f67539c2 TL |
638 | Test reconnect after blocklisted. |
639 | - writing to a fd that was opened before blocklist should return -EBADF | |
9f95a23c TL |
640 | - reading/writing to a file with lost file locks should return -EIO |
641 | - readonly fd should continue to work | |
642 | """ | |
643 | ||
644 | self.mount_a.umount_wait() | |
645 | ||
646 | if isinstance(self.mount_a, FuseMount): | |
1e59de90 | 647 | self.mount_a.mount_wait(mntargs=['--client_reconnect_stale=1', '--fuse_disable_pagecache=1']) |
9f95a23c TL |
648 | else: |
649 | try: | |
522d829b | 650 | self.mount_a.mount_wait(mntopts=['recover_session=clean']) |
9f95a23c TL |
651 | except CommandFailedError: |
652 | self.mount_a.kill_cleanup() | |
653 | self.skipTest("Not implemented in current kernel") | |
654 | ||
655 | self.mount_a.wait_until_mounted() | |
656 | ||
f67539c2 | 657 | path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blocklisted') |
9f95a23c TL |
658 | pyscript = dedent(""" |
659 | import os | |
660 | import sys | |
661 | import fcntl | |
662 | import errno | |
663 | import time | |
664 | ||
665 | fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666) | |
666 | fd2 = os.open("{path}.1", os.O_RDONLY) | |
667 | fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666) | |
668 | fd4 = os.open("{path}.2", os.O_RDONLY) | |
669 | ||
670 | os.write(fd1, b'content') | |
671 | os.read(fd2, 1); | |
672 | ||
673 | os.write(fd3, b'content') | |
674 | os.read(fd4, 1); | |
675 | fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB) | |
676 | ||
f67539c2 | 677 | print("blocklist") |
9f95a23c TL |
678 | sys.stdout.flush() |
679 | ||
680 | sys.stdin.readline() | |
681 | ||
682 | # wait for mds to close session | |
683 | time.sleep(10); | |
684 | ||
685 | # trigger 'open session' message. kclient relies on 'session reject' message | |
f67539c2 | 686 | # to detect if itself is blocklisted |
9f95a23c TL |
687 | try: |
688 | os.stat("{path}.1") | |
689 | except: | |
690 | pass | |
691 | ||
692 | # wait for auto reconnect | |
693 | time.sleep(10); | |
694 | ||
695 | try: | |
696 | os.write(fd1, b'content') | |
697 | except OSError as e: | |
698 | if e.errno != errno.EBADF: | |
699 | raise | |
700 | else: | |
701 | raise RuntimeError("write() failed to raise error") | |
702 | ||
703 | os.read(fd2, 1); | |
704 | ||
705 | try: | |
706 | os.read(fd4, 1) | |
707 | except OSError as e: | |
708 | if e.errno != errno.EIO: | |
709 | raise | |
710 | else: | |
711 | raise RuntimeError("read() failed to raise error") | |
712 | """).format(path=path) | |
713 | rproc = self.mount_a.client_remote.run( | |
522d829b | 714 | args=['python3', '-c', pyscript], |
9f95a23c TL |
715 | wait=False, stdin=run.PIPE, stdout=run.PIPE) |
716 | ||
717 | rproc.stdout.readline() | |
718 | ||
719 | mount_a_client_id = self.mount_a.get_global_id() | |
720 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
721 | ||
722 | rproc.stdin.writelines(['done\n']) | |
723 | rproc.stdin.flush() | |
724 | ||
725 | rproc.wait() | |
726 | self.assertEqual(rproc.exitstatus, 0) | |
1e59de90 TL |
727 | |
728 | def test_refuse_client_session(self): | |
729 | """ | |
730 | Test that client cannot start session when file system flag | |
731 | refuse_client_session is set | |
732 | """ | |
733 | ||
734 | self.mount_a.umount_wait() | |
735 | self.fs.set_refuse_client_session(True) | |
736 | with self.assertRaises(CommandFailedError): | |
737 | self.mount_a.mount_wait() | |
738 | ||
739 | def test_refuse_client_session_on_reconnect(self): | |
740 | """ | |
741 | Test that client cannot reconnect when filesystem comes online and | |
742 | file system flag refuse_client_session is set | |
743 | """ | |
744 | ||
745 | self.mount_a.create_files() | |
746 | self.mount_a.check_files() | |
747 | ||
748 | self.fs.fail() | |
749 | self.fs.set_refuse_client_session(True) | |
750 | self.fs.set_joinable() | |
751 | with self.assert_cluster_log('client could not reconnect as' | |
752 | ' file system flag' | |
753 | ' refuse_client_session is set'): | |
754 | time.sleep(self.fs.get_var("session_timeout") * 1.5) | |
755 | self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0) | |
756 | self.mount_a.umount_wait(force=True) | |
757 |