3 Teuthology task for exercising CephFS client recovery
7 from textwrap
import dedent
9 import distutils
.version
as version
13 from teuthology
.orchestra
.run
import CommandFailedError
, ConnectionLostError
14 from tasks
.cephfs
.fuse_mount
import FuseMount
15 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
16 from teuthology
.packaging
import get_package_version
17 from unittest
import SkipTest
20 log
= logging
.getLogger(__name__
)
23 # Arbitrary timeouts for operations involving restarting
24 # an MDS or waiting for it to come up
25 MDS_RESTART_GRACE
= 60
28 class TestClientNetworkRecovery(CephFSTestCase
):
29 REQUIRE_KCLIENT_REMOTE
= True
30 REQUIRE_ONE_CLIENT_REMOTE
= True
33 LOAD_SETTINGS
= ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
35 # Environment references
36 mds_session_timeout
= None
37 mds_reconnect_timeout
= None
40 def test_network_death(self
):
42 Simulate software freeze or temporary network failure.
44 Check that the client blocks I/O during failure, and completes
48 # We only need one client
49 self
.mount_b
.umount_wait()
51 # Initially our one client session should be visible
52 client_id
= self
.mount_a
.get_global_id()
53 ls_data
= self
._session
_list
()
54 self
.assert_session_count(1, ls_data
)
55 self
.assertEqual(ls_data
[0]['id'], client_id
)
56 self
.assert_session_state(client_id
, "open")
58 # ...and capable of doing I/O without blocking
59 self
.mount_a
.create_files()
61 # ...but if we turn off the network
62 self
.fs
.set_clients_block(True)
64 # ...and try and start an I/O
65 write_blocked
= self
.mount_a
.write_background()
67 # ...then it should block
68 self
.assertFalse(write_blocked
.finished
)
69 self
.assert_session_state(client_id
, "open")
70 time
.sleep(self
.mds_session_timeout
* 1.5) # Long enough for MDS to consider session stale
71 self
.assertFalse(write_blocked
.finished
)
72 self
.assert_session_state(client_id
, "stale")
74 # ...until we re-enable I/O
75 self
.fs
.set_clients_block(False)
77 # ...when it should complete promptly
79 self
.wait_until_true(lambda: write_blocked
.finished
, self
.ms_max_backoff
* 2)
80 write_blocked
.wait() # Already know we're finished, wait() to raise exception on errors
81 recovery_time
= time
.time() - a
82 log
.info("recovery time: {0}".format(recovery_time
))
83 self
.assert_session_state(client_id
, "open")
86 class TestClientRecovery(CephFSTestCase
):
87 REQUIRE_KCLIENT_REMOTE
= True
90 LOAD_SETTINGS
= ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
92 # Environment references
93 mds_session_timeout
= None
94 mds_reconnect_timeout
= None
98 # Check that two clients come up healthy and see each others' files
99 # =====================================================
100 self
.mount_a
.create_files()
101 self
.mount_a
.check_files()
102 self
.mount_a
.umount_wait()
104 self
.mount_b
.check_files()
107 self
.mount_a
.wait_until_mounted()
109 # Check that the admin socket interface is correctly reporting
111 # =====================================================
112 ls_data
= self
._session
_list
()
113 self
.assert_session_count(2, ls_data
)
116 set([l
['id'] for l
in ls_data
]),
117 {self
.mount_a
.get_global_id(), self
.mount_b
.get_global_id()}
120 def test_restart(self
):
121 # Check that after an MDS restart both clients reconnect and continue
123 # =====================================================
124 self
.fs
.mds_fail_restart()
125 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
127 self
.mount_a
.create_destroy()
128 self
.mount_b
.create_destroy()
130 def _session_num_caps(self
, client_id
):
131 ls_data
= self
.fs
.mds_asok(['session', 'ls'])
132 return int(self
._session
_by
_id
(ls_data
).get(client_id
, {'num_caps': None})['num_caps'])
134 def test_reconnect_timeout(self
):
137 # Check that if I stop an MDS and a client goes away, the MDS waits
138 # for the reconnect period
142 mount_a_client_id
= self
.mount_a
.get_global_id()
143 self
.mount_a
.umount_wait(force
=True)
145 self
.fs
.mds_restart()
147 self
.fs
.wait_for_state('up:reconnect', reject
='up:active', timeout
=MDS_RESTART_GRACE
)
148 # Check that the MDS locally reports its state correctly
149 status
= self
.fs
.mds_asok(['status'])
150 self
.assertIn("reconnect_status", status
)
152 ls_data
= self
._session
_list
()
153 self
.assert_session_count(2, ls_data
)
155 # The session for the dead client should have the 'reconnect' flag set
156 self
.assertTrue(self
.get_session(mount_a_client_id
)['reconnecting'])
158 # Wait for the reconnect state to clear, this should take the
159 # reconnect timeout period.
160 in_reconnect_for
= self
.fs
.wait_for_state('up:active', timeout
=self
.mds_reconnect_timeout
* 2)
161 # Check that the period we waited to enter active is within a factor
162 # of two of the reconnect timeout.
163 self
.assertGreater(in_reconnect_for
, self
.mds_reconnect_timeout
/ 2,
164 "Should have been in reconnect phase for {0} but only took {1}".format(
165 self
.mds_reconnect_timeout
, in_reconnect_for
168 self
.assert_session_count(1)
170 # Check that the client that timed out during reconnect can
171 # mount again and do I/O
173 self
.mount_a
.wait_until_mounted()
174 self
.mount_a
.create_destroy()
176 self
.assert_session_count(2)
178 def test_reconnect_eviction(self
):
179 # Eviction during reconnect
180 # =========================
181 mount_a_client_id
= self
.mount_a
.get_global_id()
186 # The mount goes away while the MDS is offline
189 self
.fs
.mds_restart()
191 # Enter reconnect phase
192 self
.fs
.wait_for_state('up:reconnect', reject
='up:active', timeout
=MDS_RESTART_GRACE
)
193 self
.assert_session_count(2)
195 # Evict the stuck client
196 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
197 self
.assert_session_count(1)
199 # Observe that we proceed to active phase without waiting full reconnect timeout
200 evict_til_active
= self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
201 # Once we evict the troublemaker, the reconnect phase should complete
202 # in well under the reconnect timeout.
203 self
.assertLess(evict_til_active
, self
.mds_reconnect_timeout
* 0.5,
204 "reconnect did not complete soon enough after eviction, took {0}".format(
208 # We killed earlier so must clean up before trying to use again
209 self
.mount_a
.kill_cleanup()
211 # Bring the client back
213 self
.mount_a
.wait_until_mounted()
214 self
.mount_a
.create_destroy()
216 def test_stale_caps(self
):
217 # Capability release from stale session
218 # =====================================
219 cap_holder
= self
.mount_a
.open_background()
221 # Wait for the file to be visible from another client, indicating
222 # that mount_a has completed its network ops
223 self
.mount_b
.wait_for_visible()
225 # Simulate client death
229 # Now, after mds_session_timeout seconds, the waiter should
230 # complete their operation when the MDS marks the holder's
232 cap_waiter
= self
.mount_b
.write_background()
237 # Should have succeeded
238 self
.assertEqual(cap_waiter
.exitstatus
, 0)
241 log
.info("cap_waiter waited {0}s".format(cap_waited
))
242 self
.assertTrue(self
.mds_session_timeout
/ 2.0 <= cap_waited
<= self
.mds_session_timeout
* 2.0,
243 "Capability handover took {0}, expected approx {1}".format(
244 cap_waited
, self
.mds_session_timeout
247 cap_holder
.stdin
.close()
250 except (CommandFailedError
, ConnectionLostError
):
251 # We killed it (and possibly its node), so it raises an error
254 # teardown() doesn't quite handle this case cleanly, so help it out
255 self
.mount_a
.kill_cleanup()
258 self
.mount_a
.wait_until_mounted()
260 def test_evicted_caps(self
):
261 # Eviction while holding a capability
262 # ===================================
264 # Take out a write capability on a file on client A,
265 # and then immediately kill it.
266 cap_holder
= self
.mount_a
.open_background()
267 mount_a_client_id
= self
.mount_a
.get_global_id()
269 # Wait for the file to be visible from another client, indicating
270 # that mount_a has completed its network ops
271 self
.mount_b
.wait_for_visible()
273 # Simulate client death
277 # The waiter should get stuck waiting for the capability
278 # held on the MDS by the now-dead client A
279 cap_waiter
= self
.mount_b
.write_background()
281 self
.assertFalse(cap_waiter
.finished
)
283 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
284 # Now, because I evicted the old holder of the capability, it should
285 # immediately get handed over to the waiter
290 log
.info("cap_waiter waited {0}s".format(cap_waited
))
291 # This is the check that it happened 'now' rather than waiting
292 # for the session timeout
293 self
.assertLess(cap_waited
, self
.mds_session_timeout
/ 2.0,
294 "Capability handover took {0}, expected less than {1}".format(
295 cap_waited
, self
.mds_session_timeout
/ 2.0
298 cap_holder
.stdin
.close()
301 except (CommandFailedError
, ConnectionLostError
):
302 # We killed it (and possibly its node), so it raises an error
305 self
.mount_a
.kill_cleanup()
308 self
.mount_a
.wait_until_mounted()
310 def test_trim_caps(self
):
311 # Trim capability when reconnecting MDS
312 # ===================================
315 # Create lots of files
316 for i
in range(count
):
317 self
.mount_a
.run_shell(["touch", "f{0}".format(i
)])
319 # Populate mount_b's cache
320 self
.mount_b
.run_shell(["ls", "-l"])
322 client_id
= self
.mount_b
.get_global_id()
323 num_caps
= self
._session
_num
_caps
(client_id
)
324 self
.assertGreaterEqual(num_caps
, count
)
326 # Restart MDS. client should trim its cache when reconnecting to the MDS
327 self
.fs
.mds_fail_restart()
328 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
330 num_caps
= self
._session
_num
_caps
(client_id
)
331 self
.assertLess(num_caps
, count
,
332 "should have less than {0} capabilities, have {1}".format(
336 def _is_flockable(self
):
337 a_version_str
= get_package_version(self
.mount_a
.client_remote
, "fuse")
338 b_version_str
= get_package_version(self
.mount_b
.client_remote
, "fuse")
339 flock_version_str
= "2.9"
341 version_regex
= re
.compile(r
"[0-9\.]+")
342 a_result
= version_regex
.match(a_version_str
)
343 self
.assertTrue(a_result
)
344 b_result
= version_regex
.match(b_version_str
)
345 self
.assertTrue(b_result
)
346 a_version
= version
.StrictVersion(a_result
.group())
347 b_version
= version
.StrictVersion(b_result
.group())
348 flock_version
=version
.StrictVersion(flock_version_str
)
350 if (a_version
>= flock_version
and b_version
>= flock_version
):
351 log
.info("flock locks are available")
354 log
.info("not testing flock locks, machines have versions {av} and {bv}".format(
355 av
=a_version_str
,bv
=b_version_str
))
358 def test_filelock(self
):
360 Check that file lock doesn't get lost after an MDS restart
363 flockable
= self
._is
_flockable
()
364 lock_holder
= self
.mount_a
.lock_background(do_flock
=flockable
)
366 self
.mount_b
.wait_for_visible("background_file-2")
367 self
.mount_b
.check_filelock(do_flock
=flockable
)
369 self
.fs
.mds_fail_restart()
370 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
372 self
.mount_b
.check_filelock(do_flock
=flockable
)
374 # Tear down the background process
375 lock_holder
.stdin
.close()
378 except (CommandFailedError
, ConnectionLostError
):
379 # We killed it, so it raises an error
382 def test_filelock_eviction(self
):
384 Check that file lock held by evicted client is given to
387 if not self
._is
_flockable
():
388 self
.skipTest("flock is not available")
390 lock_holder
= self
.mount_a
.lock_background()
391 self
.mount_b
.wait_for_visible("background_file-2")
392 self
.mount_b
.check_filelock()
394 lock_taker
= self
.mount_b
.lock_and_release()
395 # Check the taker is waiting (doesn't get it immediately)
397 self
.assertFalse(lock_holder
.finished
)
398 self
.assertFalse(lock_taker
.finished
)
401 mount_a_client_id
= self
.mount_a
.get_global_id()
402 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
404 # Evicting mount_a should let mount_b's attempt to take the lock
406 self
.wait_until_true(lambda: lock_taker
.finished
, timeout
=10)
408 # teardown() doesn't quite handle this case cleanly, so help it out
410 self
.mount_a
.kill_cleanup()
412 # Bring the client back
414 self
.mount_a
.wait_until_mounted()
416 def test_dir_fsync(self
):
417 self
._test
_fsync
(True);
419 def test_create_fsync(self
):
420 self
._test
_fsync
(False);
422 def _test_fsync(self
, dirfsync
):
424 That calls to fsync guarantee visibility of metadata to another
425 client immediately after the fsyncing client dies.
428 # Leave this guy out until he's needed
429 self
.mount_b
.umount_wait()
431 # Create dir + child dentry on client A, and fsync the dir
432 path
= os
.path
.join(self
.mount_a
.mountpoint
, "subdir")
433 self
.mount_a
.run_python(
440 print "Starting creation..."
444 dfd = os.open(path, os.O_DIRECTORY)
446 fd = open(os.path.join(path, "childfile"), "w")
447 print "Finished creation in {{0}}s".format(time.time() - start)
449 print "Starting fsync..."
455 print "Finished fsync in {{0}}s".format(time.time() - start)
456 """.format(path
=path
,dirfsync
=str(dirfsync
)))
459 # Immediately kill the MDS and then client A
463 self
.mount_a
.kill_cleanup()
465 # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
466 self
.fs
.mds_restart()
467 log
.info("Waiting for reconnect...")
468 self
.fs
.wait_for_state("up:reconnect")
469 log
.info("Waiting for active...")
470 self
.fs
.wait_for_state("up:active", timeout
=MDS_RESTART_GRACE
+ self
.mds_reconnect_timeout
)
471 log
.info("Reached active...")
473 # Is the child dentry visible from mount B?
475 self
.mount_b
.wait_until_mounted()
476 self
.mount_b
.run_shell(["ls", "subdir/childfile"])
478 def test_stale_renew(self
):
479 if not isinstance(self
.mount_a
, FuseMount
):
480 raise SkipTest("Require FUSE client to handle signal STOP/CONT")
482 self
.mount_a
.run_shell(["mkdir", "testdir"])
483 self
.mount_a
.run_shell(["touch", "testdir/file1"])
484 # populate readdir cache
485 self
.mount_a
.run_shell(["ls", "testdir"])
486 self
.mount_b
.run_shell(["ls", "testdir"])
488 # check if readdir cache is effective
489 initial_readdirs
= self
.fs
.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
490 self
.mount_b
.run_shell(["ls", "testdir"])
491 current_readdirs
= self
.fs
.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
492 self
.assertEqual(current_readdirs
, initial_readdirs
);
494 mount_b_gid
= self
.mount_b
.get_global_id()
495 mount_b_pid
= self
.mount_b
.get_client_pid()
496 # stop ceph-fuse process of mount_b
497 self
.mount_b
.client_remote
.run(args
=["sudo", "kill", "-STOP", mount_b_pid
])
499 self
.assert_session_state(mount_b_gid
, "open")
500 time
.sleep(self
.mds_session_timeout
* 1.5) # Long enough for MDS to consider session stale
501 self
.assert_session_state(mount_b_gid
, "stale")
503 self
.mount_a
.run_shell(["touch", "testdir/file2"])
505 # resume ceph-fuse process of mount_b
506 self
.mount_b
.client_remote
.run(args
=["sudo", "kill", "-CONT", mount_b_pid
])
507 # Is the new file visible from mount_b? (caps become invalid after session stale)
508 self
.mount_b
.run_shell(["ls", "testdir/file2"])
510 def test_unmount_for_evicted_client(self
):
511 """Test if client hangs on unmount after evicting the client."""
512 mount_a_client_id
= self
.mount_a
.get_global_id()
513 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
515 self
.mount_a
.umount_wait(require_clean
=True, timeout
=30)