3 Teuthology task for exercising CephFS client recovery
7 from textwrap
import dedent
9 import distutils
.version
as version
13 from teuthology
.orchestra
.run
import CommandFailedError
, ConnectionLostError
14 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
15 from teuthology
.packaging
import get_package_version
18 log
= logging
.getLogger(__name__
)
21 # Arbitrary timeouts for operations involving restarting
22 # an MDS or waiting for it to come up
23 MDS_RESTART_GRACE
= 60
26 class TestClientNetworkRecovery(CephFSTestCase
):
27 REQUIRE_KCLIENT_REMOTE
= True
28 REQUIRE_ONE_CLIENT_REMOTE
= True
31 LOAD_SETTINGS
= ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
33 # Environment references
34 mds_session_timeout
= None
35 mds_reconnect_timeout
= None
38 def test_network_death(self
):
40 Simulate software freeze or temporary network failure.
42 Check that the client blocks I/O during failure, and completes
46 # We only need one client
47 self
.mount_b
.umount_wait()
49 # Initially our one client session should be visible
50 client_id
= self
.mount_a
.get_global_id()
51 ls_data
= self
._session
_list
()
52 self
.assert_session_count(1, ls_data
)
53 self
.assertEqual(ls_data
[0]['id'], client_id
)
54 self
.assert_session_state(client_id
, "open")
56 # ...and capable of doing I/O without blocking
57 self
.mount_a
.create_files()
59 # ...but if we turn off the network
60 self
.fs
.set_clients_block(True)
62 # ...and try and start an I/O
63 write_blocked
= self
.mount_a
.write_background()
65 # ...then it should block
66 self
.assertFalse(write_blocked
.finished
)
67 self
.assert_session_state(client_id
, "open")
68 time
.sleep(self
.mds_session_timeout
* 1.5) # Long enough for MDS to consider session stale
69 self
.assertFalse(write_blocked
.finished
)
70 self
.assert_session_state(client_id
, "stale")
72 # ...until we re-enable I/O
73 self
.fs
.set_clients_block(False)
75 # ...when it should complete promptly
77 self
.wait_until_true(lambda: write_blocked
.finished
, self
.ms_max_backoff
* 2)
78 write_blocked
.wait() # Already know we're finished, wait() to raise exception on errors
79 recovery_time
= time
.time() - a
80 log
.info("recovery time: {0}".format(recovery_time
))
81 self
.assert_session_state(client_id
, "open")
84 class TestClientRecovery(CephFSTestCase
):
85 REQUIRE_KCLIENT_REMOTE
= True
88 LOAD_SETTINGS
= ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
90 # Environment references
91 mds_session_timeout
= None
92 mds_reconnect_timeout
= None
96 # Check that two clients come up healthy and see each others' files
97 # =====================================================
98 self
.mount_a
.create_files()
99 self
.mount_a
.check_files()
100 self
.mount_a
.umount_wait()
102 self
.mount_b
.check_files()
105 self
.mount_a
.wait_until_mounted()
107 # Check that the admin socket interface is correctly reporting
109 # =====================================================
110 ls_data
= self
._session
_list
()
111 self
.assert_session_count(2, ls_data
)
114 set([l
['id'] for l
in ls_data
]),
115 {self
.mount_a
.get_global_id(), self
.mount_b
.get_global_id()}
118 def test_restart(self
):
119 # Check that after an MDS restart both clients reconnect and continue
121 # =====================================================
122 self
.fs
.mds_fail_restart()
123 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
125 self
.mount_a
.create_destroy()
126 self
.mount_b
.create_destroy()
128 def _session_num_caps(self
, client_id
):
129 ls_data
= self
.fs
.mds_asok(['session', 'ls'])
130 return int(self
._session
_by
_id
(ls_data
).get(client_id
, {'num_caps': None})['num_caps'])
132 def test_reconnect_timeout(self
):
135 # Check that if I stop an MDS and a client goes away, the MDS waits
136 # for the reconnect period
140 mount_a_client_id
= self
.mount_a
.get_global_id()
141 self
.mount_a
.umount_wait(force
=True)
143 self
.fs
.mds_restart()
145 self
.fs
.wait_for_state('up:reconnect', reject
='up:active', timeout
=MDS_RESTART_GRACE
)
146 # Check that the MDS locally reports its state correctly
147 status
= self
.fs
.mds_asok(['status'])
148 self
.assertIn("reconnect_status", status
)
150 ls_data
= self
._session
_list
()
151 self
.assert_session_count(2, ls_data
)
153 # The session for the dead client should have the 'reconnect' flag set
154 self
.assertTrue(self
.get_session(mount_a_client_id
)['reconnecting'])
156 # Wait for the reconnect state to clear, this should take the
157 # reconnect timeout period.
158 in_reconnect_for
= self
.fs
.wait_for_state('up:active', timeout
=self
.mds_reconnect_timeout
* 2)
159 # Check that the period we waited to enter active is within a factor
160 # of two of the reconnect timeout.
161 self
.assertGreater(in_reconnect_for
, self
.mds_reconnect_timeout
/ 2,
162 "Should have been in reconnect phase for {0} but only took {1}".format(
163 self
.mds_reconnect_timeout
, in_reconnect_for
166 self
.assert_session_count(1)
168 # Check that the client that timed out during reconnect can
169 # mount again and do I/O
171 self
.mount_a
.wait_until_mounted()
172 self
.mount_a
.create_destroy()
174 self
.assert_session_count(2)
176 def test_reconnect_eviction(self
):
177 # Eviction during reconnect
178 # =========================
179 mount_a_client_id
= self
.mount_a
.get_global_id()
184 # The mount goes away while the MDS is offline
187 self
.fs
.mds_restart()
189 # Enter reconnect phase
190 self
.fs
.wait_for_state('up:reconnect', reject
='up:active', timeout
=MDS_RESTART_GRACE
)
191 self
.assert_session_count(2)
193 # Evict the stuck client
194 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
195 self
.assert_session_count(1)
197 # Observe that we proceed to active phase without waiting full reconnect timeout
198 evict_til_active
= self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
199 # Once we evict the troublemaker, the reconnect phase should complete
200 # in well under the reconnect timeout.
201 self
.assertLess(evict_til_active
, self
.mds_reconnect_timeout
* 0.5,
202 "reconnect did not complete soon enough after eviction, took {0}".format(
206 # We killed earlier so must clean up before trying to use again
207 self
.mount_a
.kill_cleanup()
209 # Bring the client back
211 self
.mount_a
.wait_until_mounted()
212 self
.mount_a
.create_destroy()
214 def test_stale_caps(self
):
215 # Capability release from stale session
216 # =====================================
217 cap_holder
= self
.mount_a
.open_background()
219 # Wait for the file to be visible from another client, indicating
220 # that mount_a has completed its network ops
221 self
.mount_b
.wait_for_visible()
223 # Simulate client death
227 # Now, after mds_session_timeout seconds, the waiter should
228 # complete their operation when the MDS marks the holder's
230 cap_waiter
= self
.mount_b
.write_background()
235 # Should have succeeded
236 self
.assertEqual(cap_waiter
.exitstatus
, 0)
239 log
.info("cap_waiter waited {0}s".format(cap_waited
))
240 self
.assertTrue(self
.mds_session_timeout
/ 2.0 <= cap_waited
<= self
.mds_session_timeout
* 2.0,
241 "Capability handover took {0}, expected approx {1}".format(
242 cap_waited
, self
.mds_session_timeout
245 cap_holder
.stdin
.close()
248 except (CommandFailedError
, ConnectionLostError
):
249 # We killed it (and possibly its node), so it raises an error
252 # teardown() doesn't quite handle this case cleanly, so help it out
253 self
.mount_a
.kill_cleanup()
256 self
.mount_a
.wait_until_mounted()
258 def test_evicted_caps(self
):
259 # Eviction while holding a capability
260 # ===================================
262 # Take out a write capability on a file on client A,
263 # and then immediately kill it.
264 cap_holder
= self
.mount_a
.open_background()
265 mount_a_client_id
= self
.mount_a
.get_global_id()
267 # Wait for the file to be visible from another client, indicating
268 # that mount_a has completed its network ops
269 self
.mount_b
.wait_for_visible()
271 # Simulate client death
275 # The waiter should get stuck waiting for the capability
276 # held on the MDS by the now-dead client A
277 cap_waiter
= self
.mount_b
.write_background()
279 self
.assertFalse(cap_waiter
.finished
)
281 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
282 # Now, because I evicted the old holder of the capability, it should
283 # immediately get handed over to the waiter
288 log
.info("cap_waiter waited {0}s".format(cap_waited
))
289 # This is the check that it happened 'now' rather than waiting
290 # for the session timeout
291 self
.assertLess(cap_waited
, self
.mds_session_timeout
/ 2.0,
292 "Capability handover took {0}, expected less than {1}".format(
293 cap_waited
, self
.mds_session_timeout
/ 2.0
296 cap_holder
.stdin
.close()
299 except (CommandFailedError
, ConnectionLostError
):
300 # We killed it (and possibly its node), so it raises an error
303 self
.mount_a
.kill_cleanup()
306 self
.mount_a
.wait_until_mounted()
308 def test_trim_caps(self
):
309 # Trim capability when reconnecting MDS
310 # ===================================
313 # Create lots of files
314 for i
in range(count
):
315 self
.mount_a
.run_shell(["touch", "f{0}".format(i
)])
317 # Populate mount_b's cache
318 self
.mount_b
.run_shell(["ls", "-l"])
320 client_id
= self
.mount_b
.get_global_id()
321 num_caps
= self
._session
_num
_caps
(client_id
)
322 self
.assertGreaterEqual(num_caps
, count
)
324 # Restart MDS. client should trim its cache when reconnecting to the MDS
325 self
.fs
.mds_fail_restart()
326 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
328 num_caps
= self
._session
_num
_caps
(client_id
)
329 self
.assertLess(num_caps
, count
,
330 "should have less than {0} capabilities, have {1}".format(
334 def _is_flockable(self
):
335 a_version_str
= get_package_version(self
.mount_a
.client_remote
, "fuse")
336 b_version_str
= get_package_version(self
.mount_b
.client_remote
, "fuse")
337 flock_version_str
= "2.9"
339 version_regex
= re
.compile(r
"[0-9\.]+")
340 a_result
= version_regex
.match(a_version_str
)
341 self
.assertTrue(a_result
)
342 b_result
= version_regex
.match(b_version_str
)
343 self
.assertTrue(b_result
)
344 a_version
= version
.StrictVersion(a_result
.group())
345 b_version
= version
.StrictVersion(b_result
.group())
346 flock_version
=version
.StrictVersion(flock_version_str
)
348 if (a_version
>= flock_version
and b_version
>= flock_version
):
349 log
.info("flock locks are available")
352 log
.info("not testing flock locks, machines have versions {av} and {bv}".format(
353 av
=a_version_str
,bv
=b_version_str
))
356 def test_filelock(self
):
358 Check that file lock doesn't get lost after an MDS restart
361 flockable
= self
._is
_flockable
()
362 lock_holder
= self
.mount_a
.lock_background(do_flock
=flockable
)
364 self
.mount_b
.wait_for_visible("background_file-2")
365 self
.mount_b
.check_filelock(do_flock
=flockable
)
367 self
.fs
.mds_fail_restart()
368 self
.fs
.wait_for_state('up:active', timeout
=MDS_RESTART_GRACE
)
370 self
.mount_b
.check_filelock(do_flock
=flockable
)
372 # Tear down the background process
373 lock_holder
.stdin
.close()
376 except (CommandFailedError
, ConnectionLostError
):
377 # We killed it, so it raises an error
380 def test_filelock_eviction(self
):
382 Check that file lock held by evicted client is given to
385 if not self
._is
_flockable
():
386 self
.skipTest("flock is not available")
388 lock_holder
= self
.mount_a
.lock_background()
389 self
.mount_b
.wait_for_visible("background_file-2")
390 self
.mount_b
.check_filelock()
392 lock_taker
= self
.mount_b
.lock_and_release()
393 # Check the taker is waiting (doesn't get it immediately)
395 self
.assertFalse(lock_holder
.finished
)
396 self
.assertFalse(lock_taker
.finished
)
399 mount_a_client_id
= self
.mount_a
.get_global_id()
400 self
.fs
.mds_asok(['session', 'evict', "%s" % mount_a_client_id
])
402 # Evicting mount_a should let mount_b's attempt to take the lock
404 self
.wait_until_true(lambda: lock_taker
.finished
, timeout
=10)
406 # teardown() doesn't quite handle this case cleanly, so help it out
408 self
.mount_a
.kill_cleanup()
410 # Bring the client back
412 self
.mount_a
.wait_until_mounted()
414 def test_dir_fsync(self
):
415 self
._test
_fsync
(True);
417 def test_create_fsync(self
):
418 self
._test
_fsync
(False);
420 def _test_fsync(self
, dirfsync
):
422 That calls to fsync guarantee visibility of metadata to another
423 client immediately after the fsyncing client dies.
426 # Leave this guy out until he's needed
427 self
.mount_b
.umount_wait()
429 # Create dir + child dentry on client A, and fsync the dir
430 path
= os
.path
.join(self
.mount_a
.mountpoint
, "subdir")
431 self
.mount_a
.run_python(
438 print "Starting creation..."
442 dfd = os.open(path, os.O_DIRECTORY)
444 fd = open(os.path.join(path, "childfile"), "w")
445 print "Finished creation in {{0}}s".format(time.time() - start)
447 print "Starting fsync..."
453 print "Finished fsync in {{0}}s".format(time.time() - start)
454 """.format(path
=path
,dirfsync
=str(dirfsync
)))
457 # Immediately kill the MDS and then client A
461 self
.mount_a
.kill_cleanup()
463 # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
464 self
.fs
.mds_restart()
465 log
.info("Waiting for reconnect...")
466 self
.fs
.wait_for_state("up:reconnect")
467 log
.info("Waiting for active...")
468 self
.fs
.wait_for_state("up:active", timeout
=MDS_RESTART_GRACE
+ self
.mds_reconnect_timeout
)
469 log
.info("Reached active...")
471 # Is the child dentry visible from mount B?
473 self
.mount_b
.wait_until_mounted()
474 self
.mount_b
.run_shell(["ls", "subdir/childfile"])