update sources to 12.2.10

[ceph.git] / ceph / qa / tasks / cephfs / test_misc.py
diff --git a/ceph/qa/tasks/cephfs/test_misc.py b/ceph/qa/tasks/cephfs/test_misc.py

index d857cfddf01a47f972957c963318ebf4c5105c1f..4158538fd41d25f3d17c15171300f28ba8fb6751 100644 (file)
--- a/ceph/qa/tasks/cephfs/test_misc.py
+++ b/ceph/qa/tasks/cephfs/test_misc.py
@@ -2,11 +2,13 @@
  from unittest import SkipTest
  from tasks.cephfs.fuse_mount import FuseMount
  from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from teuthology.orchestra.run import CommandFailedError
+from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
  import errno
  import time
  import json
+import logging
  
+log = logging.getLogger(__name__)
  
  class TestMisc(CephFSTestCase):
      CLIENTS_REQUIRED = 2
@@ -130,6 +132,59 @@ class TestMisc(CephFSTestCase):
          ls_data = self.fs.mds_asok(['session', 'ls'])
          self.assert_session_count(1, ls_data)
  
+    def test_cap_revoke_nonresponder(self):
+        """
+        Check that a client is evicted if it has not responded to cap revoke
+        request for configured number of seconds.
+        """
+        session_timeout = self.fs.get_var("session_timeout")
+        eviction_timeout = session_timeout / 2.0
+
+        self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
+                          str(eviction_timeout)])
+
+        cap_holder = self.mount_a.open_background()
+
+        # Wait for the file to be visible from another client, indicating
+        # that mount_a has completed its network ops
+        self.mount_b.wait_for_visible()
+
+        # Simulate client death
+        self.mount_a.kill()
+
+        try:
+            # The waiter should get stuck waiting for the capability
+            # held on the MDS by the now-dead client A
+            cap_waiter = self.mount_b.write_background()
+
+            a = time.time()
+            time.sleep(eviction_timeout)
+            cap_waiter.wait()
+            b = time.time()
+            cap_waited = b - a
+            log.info("cap_waiter waited {0}s".format(cap_waited))
+
+            # check if the cap is transferred before session timeout kicked in.
+            # this is a good enough check to ensure that the client got evicted
+            # by the cap auto evicter rather than transitioning to stale state
+            # and then getting evicted.
+            self.assertLess(cap_waited, session_timeout,
+                            "Capability handover took {0}, expected less than {1}".format(
+                                cap_waited, session_timeout
+                            ))
+
+            cap_holder.stdin.close()
+            try:
+                cap_holder.wait()
+            except (CommandFailedError, ConnectionLostError):
+                # We killed it (and possibly its node), so it raises an error
+                pass
+        finally:
+            self.mount_a.kill_cleanup()
+
+        self.mount_a.mount()
+        self.mount_a.wait_until_mounted()
+
      def test_filtered_df(self):
          pool_name = self.fs.get_data_pool_name()
          raw_df = self.fs.get_pool_df(pool_name)