[ceph.git] / ceph / qa / tasks / cephfs / test_client_recovery.py


"""
Teuthology task for exercising CephFS client recovery
"""

import logging
from textwrap import dedent
import time
import distutils.version as version
import re
import os

from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
from tasks.cephfs.fuse_mount import FuseMount
from tasks.cephfs.cephfs_test_case import CephFSTestCase
from teuthology.packaging import get_package_version
from unittest import SkipTest


log = logging.getLogger(__name__)


# Arbitrary timeouts for operations involving restarting
# an MDS or waiting for it to come up
MDS_RESTART_GRACE = 60


class TestClientNetworkRecovery(CephFSTestCase):
    REQUIRE_KCLIENT_REMOTE = True
    REQUIRE_ONE_CLIENT_REMOTE = True
    CLIENTS_REQUIRED = 2

    LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]

    # Environment references
    mds_session_timeout = None
    mds_reconnect_timeout = None
    ms_max_backoff = None

    def test_network_death(self):
        """
        Simulate software freeze or temporary network failure.

        Check that the client blocks I/O during failure, and completes
        I/O after failure.
        """

        # We only need one client
        self.mount_b.umount_wait()

        # Initially our one client session should be visible
        client_id = self.mount_a.get_global_id()
        ls_data = self._session_list()
        self.assert_session_count(1, ls_data)
        self.assertEqual(ls_data[0]['id'], client_id)
        self.assert_session_state(client_id, "open")

        # ...and capable of doing I/O without blocking
        self.mount_a.create_files()

        # ...but if we turn off the network
        self.fs.set_clients_block(True)

        # ...and try and start an I/O
        write_blocked = self.mount_a.write_background()

        # ...then it should block
        self.assertFalse(write_blocked.finished)
        self.assert_session_state(client_id, "open")
        time.sleep(self.mds_session_timeout * 1.5)  # Long enough for MDS to consider session stale
        self.assertFalse(write_blocked.finished)
        self.assert_session_state(client_id, "stale")

        # ...until we re-enable I/O
        self.fs.set_clients_block(False)

        # ...when it should complete promptly
        a = time.time()
        self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
        write_blocked.wait()  # Already know we're finished, wait() to raise exception on errors
        recovery_time = time.time() - a
        log.info("recovery time: {0}".format(recovery_time))
        self.assert_session_state(client_id, "open")


class TestClientRecovery(CephFSTestCase):
    REQUIRE_KCLIENT_REMOTE = True
    CLIENTS_REQUIRED = 2

    LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]

    # Environment references
    mds_session_timeout = None
    mds_reconnect_timeout = None
    ms_max_backoff = None

    def test_basic(self):
        # Check that two clients come up healthy and see each others' files
        # =====================================================
        self.mount_a.create_files()
        self.mount_a.check_files()
        self.mount_a.umount_wait()

        self.mount_b.check_files()

        self.mount_a.mount()
        self.mount_a.wait_until_mounted()

        # Check that the admin socket interface is correctly reporting
        # two sessions
        # =====================================================
        ls_data = self._session_list()
        self.assert_session_count(2, ls_data)

        self.assertSetEqual(
            set([l['id'] for l in ls_data]),
            {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
        )

    def test_restart(self):
        # Check that after an MDS restart both clients reconnect and continue
        # to handle I/O
        # =====================================================
        self.fs.mds_fail_restart()
        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)

        self.mount_a.create_destroy()
        self.mount_b.create_destroy()

    def _session_num_caps(self, client_id):
        ls_data = self.fs.mds_asok(['session', 'ls'])
        return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])

    def test_reconnect_timeout(self):
        # Reconnect timeout
        # =================
        # Check that if I stop an MDS and a client goes away, the MDS waits
        # for the reconnect period
        self.fs.mds_stop()
        self.fs.mds_fail()

        mount_a_client_id = self.mount_a.get_global_id()
        self.mount_a.umount_wait(force=True)

        self.fs.mds_restart()

        self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
        # Check that the MDS locally reports its state correctly
        status = self.fs.mds_asok(['status'])
        self.assertIn("reconnect_status", status)

        ls_data = self._session_list()
        self.assert_session_count(2, ls_data)

        # The session for the dead client should have the 'reconnect' flag set
        self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])

        # Wait for the reconnect state to clear, this should take the
        # reconnect timeout period.
        in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
        # Check that the period we waited to enter active is within a factor
        # of two of the reconnect timeout.
        self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2,
                           "Should have been in reconnect phase for {0} but only took {1}".format(
                               self.mds_reconnect_timeout, in_reconnect_for
                           ))

        self.assert_session_count(1)

        # Check that the client that timed out during reconnect can
        # mount again and do I/O
        self.mount_a.mount()
        self.mount_a.wait_until_mounted()
        self.mount_a.create_destroy()

        self.assert_session_count(2)

    def test_reconnect_eviction(self):
        # Eviction during reconnect
        # =========================
        mount_a_client_id = self.mount_a.get_global_id()

        self.fs.mds_stop()
        self.fs.mds_fail()

        # The mount goes away while the MDS is offline
        self.mount_a.kill()

        self.fs.mds_restart()

        # Enter reconnect phase
        self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
        self.assert_session_count(2)

        # Evict the stuck client
        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
        self.assert_session_count(1)

        # Observe that we proceed to active phase without waiting full reconnect timeout
        evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
        # Once we evict the troublemaker, the reconnect phase should complete
        # in well under the reconnect timeout.
        self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
                        "reconnect did not complete soon enough after eviction, took {0}".format(
                            evict_til_active
                        ))

        # We killed earlier so must clean up before trying to use again
        self.mount_a.kill_cleanup()

        # Bring the client back
        self.mount_a.mount()
        self.mount_a.wait_until_mounted()
        self.mount_a.create_destroy()

    def test_stale_caps(self):
        # Capability release from stale session
        # =====================================
        cap_holder = self.mount_a.open_background()

        # Wait for the file to be visible from another client, indicating
        # that mount_a has completed its network ops
        self.mount_b.wait_for_visible()

        # Simulate client death
        self.mount_a.kill()

        try:
            # Now, after mds_session_timeout seconds, the waiter should
            # complete their operation when the MDS marks the holder's
            # session stale.
            cap_waiter = self.mount_b.write_background()
            a = time.time()
            cap_waiter.wait()
            b = time.time()

            # Should have succeeded
            self.assertEqual(cap_waiter.exitstatus, 0)

            cap_waited = b - a
            log.info("cap_waiter waited {0}s".format(cap_waited))
            self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
                            "Capability handover took {0}, expected approx {1}".format(
                                cap_waited, self.mds_session_timeout
                            ))

            cap_holder.stdin.close()
            try:
                cap_holder.wait()
            except (CommandFailedError, ConnectionLostError):
                # We killed it (and possibly its node), so it raises an error
                pass
        finally:
            # teardown() doesn't quite handle this case cleanly, so help it out
            self.mount_a.kill_cleanup()

        self.mount_a.mount()
        self.mount_a.wait_until_mounted()

    def test_evicted_caps(self):
        # Eviction while holding a capability
        # ===================================

        # Take out a write capability on a file on client A,
        # and then immediately kill it.
        cap_holder = self.mount_a.open_background()
        mount_a_client_id = self.mount_a.get_global_id()

        # Wait for the file to be visible from another client, indicating
        # that mount_a has completed its network ops
        self.mount_b.wait_for_visible()

        # Simulate client death
        self.mount_a.kill()

        try:
            # The waiter should get stuck waiting for the capability
            # held on the MDS by the now-dead client A
            cap_waiter = self.mount_b.write_background()
            time.sleep(5)
            self.assertFalse(cap_waiter.finished)

            self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
            # Now, because I evicted the old holder of the capability, it should
            # immediately get handed over to the waiter
            a = time.time()
            cap_waiter.wait()
            b = time.time()
            cap_waited = b - a
            log.info("cap_waiter waited {0}s".format(cap_waited))
            # This is the check that it happened 'now' rather than waiting
            # for the session timeout
            self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
                            "Capability handover took {0}, expected less than {1}".format(
                                cap_waited, self.mds_session_timeout / 2.0
                            ))

            cap_holder.stdin.close()
            try:
                cap_holder.wait()
            except (CommandFailedError, ConnectionLostError):
                # We killed it (and possibly its node), so it raises an error
                pass
        finally:
            self.mount_a.kill_cleanup()

        self.mount_a.mount()
        self.mount_a.wait_until_mounted()

    def test_trim_caps(self):
        # Trim capability when reconnecting MDS
        # ===================================

        count = 500
        # Create lots of files
        for i in range(count):
            self.mount_a.run_shell(["touch", "f{0}".format(i)])

        # Populate mount_b's cache
        self.mount_b.run_shell(["ls", "-l"])

        client_id = self.mount_b.get_global_id()
        num_caps = self._session_num_caps(client_id)
        self.assertGreaterEqual(num_caps, count)

        # Restart MDS. client should trim its cache when reconnecting to the MDS
        self.fs.mds_fail_restart()
        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)

        num_caps = self._session_num_caps(client_id)
        self.assertLess(num_caps, count,
                        "should have less than {0} capabilities, have {1}".format(
                            count, num_caps
                        ))

    def _is_flockable(self):
        a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
        b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
        flock_version_str = "2.9"

        version_regex = re.compile(r"[0-9\.]+")
        a_result = version_regex.match(a_version_str)
        self.assertTrue(a_result)
        b_result = version_regex.match(b_version_str)
        self.assertTrue(b_result)
        a_version = version.StrictVersion(a_result.group())
        b_version = version.StrictVersion(b_result.group())
        flock_version=version.StrictVersion(flock_version_str)

        if (a_version >= flock_version and b_version >= flock_version):
            log.info("flock locks are available")
            return True
        else:
            log.info("not testing flock locks, machines have versions {av} and {bv}".format(
                av=a_version_str,bv=b_version_str))
            return False

    def test_filelock(self):
        """
        Check that file lock doesn't get lost after an MDS restart
        """

        flockable = self._is_flockable()
        lock_holder = self.mount_a.lock_background(do_flock=flockable)

        self.mount_b.wait_for_visible("background_file-2")
        self.mount_b.check_filelock(do_flock=flockable)

        self.fs.mds_fail_restart()
        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)

        self.mount_b.check_filelock(do_flock=flockable)

        # Tear down the background process
        lock_holder.stdin.close()
        try:
            lock_holder.wait()
        except (CommandFailedError, ConnectionLostError):
            # We killed it, so it raises an error
            pass

    def test_filelock_eviction(self):
        """
        Check that file lock held by evicted client is given to
        waiting client.
        """
        if not self._is_flockable():
            self.skipTest("flock is not available")

        lock_holder = self.mount_a.lock_background()
        self.mount_b.wait_for_visible("background_file-2")
        self.mount_b.check_filelock()

        lock_taker = self.mount_b.lock_and_release()
        # Check the taker is waiting (doesn't get it immediately)
        time.sleep(2)
        self.assertFalse(lock_holder.finished)
        self.assertFalse(lock_taker.finished)

        try:
            mount_a_client_id = self.mount_a.get_global_id()
            self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])

            # Evicting mount_a should let mount_b's attempt to take the lock
            # succeed
            self.wait_until_true(lambda: lock_taker.finished, timeout=10)
        finally:
            # teardown() doesn't quite handle this case cleanly, so help it out
            self.mount_a.kill()
            self.mount_a.kill_cleanup()

        # Bring the client back
        self.mount_a.mount()
        self.mount_a.wait_until_mounted()

    def test_dir_fsync(self):
	self._test_fsync(True);

    def test_create_fsync(self):
	self._test_fsync(False);

    def _test_fsync(self, dirfsync):
        """
        That calls to fsync guarantee visibility of metadata to another
        client immediately after the fsyncing client dies.
        """

        # Leave this guy out until he's needed
        self.mount_b.umount_wait()

        # Create dir + child dentry on client A, and fsync the dir
        path = os.path.join(self.mount_a.mountpoint, "subdir")
        self.mount_a.run_python(
            dedent("""
                import os
                import time

                path = "{path}"

                print "Starting creation..."
                start = time.time()

                os.mkdir(path)
                dfd = os.open(path, os.O_DIRECTORY)

                fd = open(os.path.join(path, "childfile"), "w")
                print "Finished creation in {{0}}s".format(time.time() - start)

                print "Starting fsync..."
                start = time.time()
                if {dirfsync}:
                    os.fsync(dfd)
                else:
                    os.fsync(fd)
                print "Finished fsync in {{0}}s".format(time.time() - start)
            """.format(path=path,dirfsync=str(dirfsync)))
        )

        # Immediately kill the MDS and then client A
        self.fs.mds_stop()
        self.fs.mds_fail()
        self.mount_a.kill()
        self.mount_a.kill_cleanup()

        # Restart the MDS.  Wait for it to come up, it'll have to time out in clientreplay
        self.fs.mds_restart()
        log.info("Waiting for reconnect...")
        self.fs.wait_for_state("up:reconnect")
        log.info("Waiting for active...")
        self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
        log.info("Reached active...")

        # Is the child dentry visible from mount B?
        self.mount_b.mount()
        self.mount_b.wait_until_mounted()
        self.mount_b.run_shell(["ls", "subdir/childfile"])

    def test_stale_renew(self):
        if not isinstance(self.mount_a, FuseMount):
            raise SkipTest("Require FUSE client to handle signal STOP/CONT")

        self.mount_a.run_shell(["mkdir", "testdir"])
        self.mount_a.run_shell(["touch", "testdir/file1"])
        # populate readdir cache
        self.mount_a.run_shell(["ls", "testdir"])
        self.mount_b.run_shell(["ls", "testdir"])

        # check if readdir cache is effective
        initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
        self.mount_b.run_shell(["ls", "testdir"])
        current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
        self.assertEqual(current_readdirs, initial_readdirs);

        mount_b_gid = self.mount_b.get_global_id()
        mount_b_pid = self.mount_b.get_client_pid()
        # stop ceph-fuse process of mount_b
        self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid])

        self.assert_session_state(mount_b_gid, "open")
        time.sleep(self.mds_session_timeout * 1.5)  # Long enough for MDS to consider session stale
        self.assert_session_state(mount_b_gid, "stale")

        self.mount_a.run_shell(["touch", "testdir/file2"])

        # resume ceph-fuse process of mount_b
        self.mount_b.client_remote.run(args=["sudo", "kill", "-CONT", mount_b_pid])
        # Is the new file visible from mount_b? (caps become invalid after session stale)
        self.mount_b.run_shell(["ls", "testdir/file2"])

    def test_unmount_for_evicted_client(self):
        """Test if client hangs on unmount after evicting the client."""
        mount_a_client_id = self.mount_a.get_global_id()
        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])

        self.mount_a.umount_wait(require_clean=True, timeout=30)
Commit	Line	Data
7c673cae FG	1
	2	"""
	3	Teuthology task for exercising CephFS client recovery
	4	"""
	5
	6	import logging
	7	from textwrap import dedent
	8	import time
	9	import distutils.version as version
	10	import re
	11	import os
	12
	13	from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
28e407b8	14	from tasks.cephfs.fuse_mount import FuseMount
7c673cae FG	15	from tasks.cephfs.cephfs_test_case import CephFSTestCase
7c673cae FG	16	from teuthology.packaging import get_package_version
28e407b8	17	from unittest import SkipTest
7c673cae FG	18
	19
	20	log = logging.getLogger(__name__)
	21
	22
	23	# Arbitrary timeouts for operations involving restarting
	24	# an MDS or waiting for it to come up
	25	MDS_RESTART_GRACE = 60
	26
	27
	28	class TestClientNetworkRecovery(CephFSTestCase):
	29	REQUIRE_KCLIENT_REMOTE = True
	30	REQUIRE_ONE_CLIENT_REMOTE = True
	31	CLIENTS_REQUIRED = 2
	32
	33	LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
	34
	35	# Environment references
	36	mds_session_timeout = None
	37	mds_reconnect_timeout = None
	38	ms_max_backoff = None
	39
	40	def test_network_death(self):
	41	"""
	42	Simulate software freeze or temporary network failure.
	43
	44	Check that the client blocks I/O during failure, and completes
	45	I/O after failure.
	46	"""
	47
	48	# We only need one client
	49	self.mount_b.umount_wait()
	50
	51	# Initially our one client session should be visible
	52	client_id = self.mount_a.get_global_id()
	53	ls_data = self._session_list()
	54	self.assert_session_count(1, ls_data)
	55	self.assertEqual(ls_data[0]['id'], client_id)
	56	self.assert_session_state(client_id, "open")
	57
	58	# ...and capable of doing I/O without blocking
	59	self.mount_a.create_files()
	60
	61	# ...but if we turn off the network
	62	self.fs.set_clients_block(True)
	63
	64	# ...and try and start an I/O
	65	write_blocked = self.mount_a.write_background()
	66
	67	# ...then it should block
	68	self.assertFalse(write_blocked.finished)
	69	self.assert_session_state(client_id, "open")
	70	time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
	71	self.assertFalse(write_blocked.finished)
	72	self.assert_session_state(client_id, "stale")
	73
	74	# ...until we re-enable I/O
	75	self.fs.set_clients_block(False)
	76
	77	# ...when it should complete promptly
	78	a = time.time()
	79	self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
	80	write_blocked.wait() # Already know we're finished, wait() to raise exception on errors
	81	recovery_time = time.time() - a
82	log.info("recovery time: {0}".format(recovery_time))
83	self.assert_session_state(client_id, "open")
84
85
86	class TestClientRecovery(CephFSTestCase):
87	REQUIRE_KCLIENT_REMOTE = True
88	CLIENTS_REQUIRED = 2
89
90	LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
91
92	# Environment references
93	mds_session_timeout = None
94	mds_reconnect_timeout = None
95	ms_max_backoff = None
96
97	def test_basic(self):
98	# Check that two clients come up healthy and see each others' files
99	# =====================================================
100	self.mount_a.create_files()
101	self.mount_a.check_files()
102	self.mount_a.umount_wait()
103
104	self.mount_b.check_files()
105
106	self.mount_a.mount()
107	self.mount_a.wait_until_mounted()
108
109	# Check that the admin socket interface is correctly reporting
110	# two sessions
111	# =====================================================
112	ls_data = self._session_list()
113	self.assert_session_count(2, ls_data)
114
115	self.assertSetEqual(
116	set([l['id'] for l in ls_data]),
117	{self.mount_a.get_global_id(), self.mount_b.get_global_id()}
118	)
119
120	def test_restart(self):
121	# Check that after an MDS restart both clients reconnect and continue
122	# to handle I/O
123	# =====================================================
124	self.fs.mds_fail_restart()
125	self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
126
127	self.mount_a.create_destroy()
128	self.mount_b.create_destroy()
129
130	def _session_num_caps(self, client_id):
131	ls_data = self.fs.mds_asok(['session', 'ls'])
132	return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
133
134	def test_reconnect_timeout(self):
135	# Reconnect timeout
136	# =================
137	# Check that if I stop an MDS and a client goes away, the MDS waits
138	# for the reconnect period
139	self.fs.mds_stop()
140	self.fs.mds_fail()
141
142	mount_a_client_id = self.mount_a.get_global_id()
143	self.mount_a.umount_wait(force=True)
144
145	self.fs.mds_restart()
146
147	self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
148	# Check that the MDS locally reports its state correctly
149	status = self.fs.mds_asok(['status'])
150	self.assertIn("reconnect_status", status)
151
152	ls_data = self._session_list()
153	self.assert_session_count(2, ls_data)
154
155	# The session for the dead client should have the 'reconnect' flag set
156	self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
157
158	# Wait for the reconnect state to clear, this should take the
159	# reconnect timeout period.
160	in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
161	# Check that the period we waited to enter active is within a factor
162	# of two of the reconnect timeout.
163	self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2,
164	"Should have been in reconnect phase for {0} but only took {1}".format(
165	self.mds_reconnect_timeout, in_reconnect_for
166	))
167
168	self.assert_session_count(1)
169
170	# Check that the client that timed out during reconnect can
171	# mount again and do I/O
172	self.mount_a.mount()
173	self.mount_a.wait_until_mounted()
174	self.mount_a.create_destroy()
175
176	self.assert_session_count(2)
177
178	def test_reconnect_eviction(self):
179	# Eviction during reconnect
180	# =========================
181	mount_a_client_id = self.mount_a.get_global_id()
182
183	self.fs.mds_stop()
184	self.fs.mds_fail()
185
186	# The mount goes away while the MDS is offline
187	self.mount_a.kill()
188
189	self.fs.mds_restart()
190
191	# Enter reconnect phase
192	self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
193	self.assert_session_count(2)
194
195	# Evict the stuck client
196	self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
197	self.assert_session_count(1)
198
199	# Observe that we proceed to active phase without waiting full reconnect timeout
200	evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
201	# Once we evict the troublemaker, the reconnect phase should complete
202	# in well under the reconnect timeout.
203	self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
204	"reconnect did not complete soon enough after eviction, took {0}".format(
205	evict_til_active
206	))
207
208	# We killed earlier so must clean up before trying to use again
209	self.mount_a.kill_cleanup()
210
211	# Bring the client back
212	self.mount_a.mount()
213	self.mount_a.wait_until_mounted()
214	self.mount_a.create_destroy()
215
216	def test_stale_caps(self):
217	# Capability release from stale session
218	# =====================================
219	cap_holder = self.mount_a.open_background()
220
221	# Wait for the file to be visible from another client, indicating
222	# that mount_a has completed its network ops
223	self.mount_b.wait_for_visible()
224
225	# Simulate client death
226	self.mount_a.kill()
227
228	try:
229	# Now, after mds_session_timeout seconds, the waiter should
230	# complete their operation when the MDS marks the holder's
231	# session stale.
232	cap_waiter = self.mount_b.write_background()
233	a = time.time()
234	cap_waiter.wait()
235	b = time.time()
236
237	# Should have succeeded
238	self.assertEqual(cap_waiter.exitstatus, 0)
239
240	cap_waited = b - a
241	log.info("cap_waiter waited {0}s".format(cap_waited))
242	self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
243	"Capability handover took {0}, expected approx {1}".format(
244	cap_waited, self.mds_session_timeout
245	))
246
247	cap_holder.stdin.close()
248	try:
249	cap_holder.wait()
250	except (CommandFailedError, ConnectionLostError):
251	# We killed it (and possibly its node), so it raises an error
252	pass
253	finally:
254	# teardown() doesn't quite handle this case cleanly, so help it out
255	self.mount_a.kill_cleanup()
256
257	self.mount_a.mount()
258	self.mount_a.wait_until_mounted()
259
260	def test_evicted_caps(self):
261	# Eviction while holding a capability
262	# ===================================
263
264	# Take out a write capability on a file on client A,
265	# and then immediately kill it.
266	cap_holder = self.mount_a.open_background()
267	mount_a_client_id = self.mount_a.get_global_id()
268
269	# Wait for the file to be visible from another client, indicating
270	# that mount_a has completed its network ops
271	self.mount_b.wait_for_visible()
272
273	# Simulate client death
274	self.mount_a.kill()
275
276	try:
277	# The waiter should get stuck waiting for the capability
278	# held on the MDS by the now-dead client A
279	cap_waiter = self.mount_b.write_background()
280	time.sleep(5)
281	self.assertFalse(cap_waiter.finished)
282
283	self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
284	# Now, because I evicted the old holder of the capability, it should
285	# immediately get handed over to the waiter
286	a = time.time()
287	cap_waiter.wait()
288	b = time.time()
289	cap_waited = b - a
290	log.info("cap_waiter waited {0}s".format(cap_waited))
291	# This is the check that it happened 'now' rather than waiting
292	# for the session timeout
293	self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
294	"Capability handover took {0}, expected less than {1}".format(
295	cap_waited, self.mds_session_timeout / 2.0
296	))
297
298	cap_holder.stdin.close()
299	try:
300	cap_holder.wait()
301	except (CommandFailedError, ConnectionLostError):
302	# We killed it (and possibly its node), so it raises an error
303	pass
304	finally:
305	self.mount_a.kill_cleanup()
306
307	self.mount_a.mount()
308	self.mount_a.wait_until_mounted()
309
310	def test_trim_caps(self):
311	# Trim capability when reconnecting MDS
312	# ===================================
313
314	count = 500
315	# Create lots of files
316	for i in range(count):
317	self.mount_a.run_shell(["touch", "f{0}".format(i)])
318
319	# Populate mount_b's cache
31f18b77	320	self.mount_b.run_shell(["ls", "-l"])
7c673cae FG	321
	322	client_id = self.mount_b.get_global_id()
	323	num_caps = self._session_num_caps(client_id)
	324	self.assertGreaterEqual(num_caps, count)
	325
	326	# Restart MDS. client should trim its cache when reconnecting to the MDS
	327	self.fs.mds_fail_restart()
	328	self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
	329
	330	num_caps = self._session_num_caps(client_id)
	331	self.assertLess(num_caps, count,
	332	"should have less than {0} capabilities, have {1}".format(
	333	count, num_caps
	334	))
	335
31f18b77	336	def _is_flockable(self):
7c673cae FG	337	a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
	338	b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
	339	flock_version_str = "2.9"
	340
	341	version_regex = re.compile(r"[0-9\.]+")
	342	a_result = version_regex.match(a_version_str)
	343	self.assertTrue(a_result)
	344	b_result = version_regex.match(b_version_str)
	345	self.assertTrue(b_result)
	346	a_version = version.StrictVersion(a_result.group())
	347	b_version = version.StrictVersion(b_result.group())
	348	flock_version=version.StrictVersion(flock_version_str)
	349
7c673cae	350	if (a_version >= flock_version and b_version >= flock_version):
31f18b77 FG	351	log.info("flock locks are available")
31f18b77 FG	352	return True
7c673cae FG	353	else:
	354	log.info("not testing flock locks, machines have versions {av} and {bv}".format(
	355	av=a_version_str,bv=b_version_str))
31f18b77 FG	356	return False
	357
	358	def test_filelock(self):
	359	"""
	360	Check that file lock doesn't get lost after an MDS restart
	361	"""
7c673cae	362
31f18b77	363	flockable = self._is_flockable()
7c673cae FG	364	lock_holder = self.mount_a.lock_background(do_flock=flockable)
	365
	366	self.mount_b.wait_for_visible("background_file-2")
	367	self.mount_b.check_filelock(do_flock=flockable)
	368
	369	self.fs.mds_fail_restart()
	370	self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
	371
	372	self.mount_b.check_filelock(do_flock=flockable)
	373
	374	# Tear down the background process
	375	lock_holder.stdin.close()
	376	try:
	377	lock_holder.wait()
	378	except (CommandFailedError, ConnectionLostError):
	379	# We killed it, so it raises an error
	380	pass
	381
31f18b77 FG	382	def test_filelock_eviction(self):
	383	"""
	384	Check that file lock held by evicted client is given to
	385	waiting client.
	386	"""
	387	if not self._is_flockable():
	388	self.skipTest("flock is not available")
	389
	390	lock_holder = self.mount_a.lock_background()
	391	self.mount_b.wait_for_visible("background_file-2")
	392	self.mount_b.check_filelock()
	393
	394	lock_taker = self.mount_b.lock_and_release()
	395	# Check the taker is waiting (doesn't get it immediately)
	396	time.sleep(2)
	397	self.assertFalse(lock_holder.finished)
	398	self.assertFalse(lock_taker.finished)
	399
181888fb FG	400	try:
	401	mount_a_client_id = self.mount_a.get_global_id()
	402	self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
31f18b77	403
181888fb FG	404	# Evicting mount_a should let mount_b's attempt to take the lock
	405	# succeed
	406	self.wait_until_true(lambda: lock_taker.finished, timeout=10)
	407	finally:
	408	# teardown() doesn't quite handle this case cleanly, so help it out
	409	self.mount_a.kill()
	410	self.mount_a.kill_cleanup()
	411
	412	# Bring the client back
	413	self.mount_a.mount()
	414	self.mount_a.wait_until_mounted()
31f18b77	415
7c673cae FG	416	def test_dir_fsync(self):
	417	self._test_fsync(True);
	418
	419	def test_create_fsync(self):
	420	self._test_fsync(False);
	421
	422	def _test_fsync(self, dirfsync):
	423	"""
	424	That calls to fsync guarantee visibility of metadata to another
	425	client immediately after the fsyncing client dies.
	426	"""
	427
	428	# Leave this guy out until he's needed
	429	self.mount_b.umount_wait()
	430
	431	# Create dir + child dentry on client A, and fsync the dir
	432	path = os.path.join(self.mount_a.mountpoint, "subdir")
	433	self.mount_a.run_python(
	434	dedent("""
	435	import os
	436	import time
	437
	438	path = "{path}"
	439
	440	print "Starting creation..."
	441	start = time.time()
	442
	443	os.mkdir(path)
	444	dfd = os.open(path, os.O_DIRECTORY)
	445
	446	fd = open(os.path.join(path, "childfile"), "w")
	447	print "Finished creation in {{0}}s".format(time.time() - start)
	448
	449	print "Starting fsync..."
	450	start = time.time()
	451	if {dirfsync}:
	452	os.fsync(dfd)
	453	else:
	454	os.fsync(fd)
	455	print "Finished fsync in {{0}}s".format(time.time() - start)
	456	""".format(path=path,dirfsync=str(dirfsync)))
	457	)
	458
	459	# Immediately kill the MDS and then client A
	460	self.fs.mds_stop()
	461	self.fs.mds_fail()
	462	self.mount_a.kill()
	463	self.mount_a.kill_cleanup()
	464
	465	# Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
	466	self.fs.mds_restart()
	467	log.info("Waiting for reconnect...")
	468	self.fs.wait_for_state("up:reconnect")
	469	log.info("Waiting for active...")
	470	self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
	471	log.info("Reached active...")
	472
	473	# Is the child dentry visible from mount B?
	474	self.mount_b.mount()
	475	self.mount_b.wait_until_mounted()
	476	self.mount_b.run_shell(["ls", "subdir/childfile"])
28e407b8 AA	477
	478	def test_stale_renew(self):
	479	if not isinstance(self.mount_a, FuseMount):
	480	raise SkipTest("Require FUSE client to handle signal STOP/CONT")
	481
	482	self.mount_a.run_shell(["mkdir", "testdir"])
	483	self.mount_a.run_shell(["touch", "testdir/file1"])
	484	# populate readdir cache
	485	self.mount_a.run_shell(["ls", "testdir"])
	486	self.mount_b.run_shell(["ls", "testdir"])
	487
	488	# check if readdir cache is effective
	489	initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
	490	self.mount_b.run_shell(["ls", "testdir"])
	491	current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
	492	self.assertEqual(current_readdirs, initial_readdirs);
	493
	494	mount_b_gid = self.mount_b.get_global_id()
	495	mount_b_pid = self.mount_b.get_client_pid()
	496	# stop ceph-fuse process of mount_b
	497	self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid])
	498
	499	self.assert_session_state(mount_b_gid, "open")
	500	time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
	501	self.assert_session_state(mount_b_gid, "stale")
	502
	503	self.mount_a.run_shell(["touch", "testdir/file2"])
	504
	505	# resume ceph-fuse process of mount_b
	506	self.mount_b.client_remote.run(args=["sudo", "kill", "-CONT", mount_b_pid])
	507	# Is the new file visible from mount_b? (caps become invalid after session stale)
	508	self.mount_b.run_shell(["ls", "testdir/file2"])
	509
	510	def test_unmount_for_evicted_client(self):
	511	"""Test if client hangs on unmount after evicting the client."""
	512	mount_a_client_id = self.mount_a.get_global_id()
	513	self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
	514
	515	self.mount_a.umount_wait(require_clean=True, timeout=30)