[ceph.git] / ceph / qa / tasks / cephfs / test_full.py

import json
import logging
import os
from textwrap import dedent
from typing import Optional
from teuthology.exceptions import CommandFailedError
from tasks.cephfs.fuse_mount import FuseMount
from tasks.cephfs.cephfs_test_case import CephFSTestCase


log = logging.getLogger(__name__)


class FullnessTestCase(CephFSTestCase):
    CLIENTS_REQUIRED = 2

    # Subclasses define whether they're filling whole cluster or just data pool
    data_only = False

    # Subclasses define how many bytes should be written to achieve fullness
    pool_capacity: Optional[int] = None
    fill_mb = None

    def is_full(self):
        return self.fs.is_full()

    def setUp(self):
        CephFSTestCase.setUp(self)

        mds_status = self.fs.rank_asok(["status"])

        # Capture the initial OSD map epoch for later use
        self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']

    def test_barrier(self):
        """
        That when an OSD epoch barrier is set on an MDS, subsequently
        issued capabilities cause clients to update their OSD map to that
        epoch.
        """

        # script that sync up client with MDS OSD map barrier. The barrier should
        # be updated by cap flush ack message.
        pyscript = dedent("""
            import os
            fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
            os.fchmod(fd, 0O666)
            os.fsync(fd)
            os.close(fd)
            """)

        # Sync up client with initial MDS OSD map barrier.
        path = os.path.join(self.mount_a.mountpoint, "foo")
        self.mount_a.run_python(pyscript.format(path=path))

        # Grab mounts' initial OSD epochs: later we will check that
        # it hasn't advanced beyond this point.
        mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()

        # Freshly mounted at start of test, should be up to date with OSD map
        self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)

        # Set and unset a flag to cause OSD epoch to increment
        self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
        self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")

        out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
        new_epoch = json.loads(out)['epoch']
        self.assertNotEqual(self.initial_osd_epoch, new_epoch)

        # Do a metadata operation on clients, witness that they end up with
        # the old OSD map from startup time (nothing has prompted client
        # to update its map)
        path = os.path.join(self.mount_a.mountpoint, "foo")
        self.mount_a.run_python(pyscript.format(path=path))
        mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
        self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
        self.assertEqual(mount_a_barrier, mount_a_initial_barrier)

        # Set a barrier on the MDS
        self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])

        # Sync up client with new MDS OSD map barrier
        path = os.path.join(self.mount_a.mountpoint, "baz")
        self.mount_a.run_python(pyscript.format(path=path))
        mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
        self.assertEqual(mount_a_barrier, new_epoch)

        # Some time passes here because the metadata part of the operation
        # completes immediately, while the resulting OSD map update happens
        # asynchronously (it's an Objecter::_maybe_request_map) as a result
        # of seeing the new epoch barrier.
        self.wait_until_true(
            lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
            timeout=30)

    def _data_pool_name(self):
        data_pool_names = self.fs.get_data_pool_names()
        if len(data_pool_names) > 1:
            raise RuntimeError("This test can't handle multiple data pools")
        else:
            return data_pool_names[0]

    def _test_full(self, easy_case):
        """
        - That a client trying to write data to a file is prevented
        from doing so with an -EFULL result
        - That they are also prevented from creating new files by the MDS.
        - That they may delete another file to get the system healthy again

        :param easy_case: if true, delete a successfully written file to
                          free up space.  else, delete the file that experienced
                          the failed write.
        """

        osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))

        log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))

        # Fill up the cluster.  This dd may or may not fail, as it depends on
        # how soon the cluster recognises its own fullness
        self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
        try:
            self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2)
        except CommandFailedError:
            log.info("Writing file B failed (full status happened already)")
            assert self.is_full()
        else:
            log.info("Writing file B succeeded (full status will happen soon)")
            self.wait_until_true(lambda: self.is_full(),
                                 timeout=osd_mon_report_interval * 120)

        # Attempting to write more data should give me ENOSPC
        with self.assertRaises(CommandFailedError) as ar:
            self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
        self.assertEqual(ar.exception.exitstatus, 1)  # dd returns 1 on "No space"

        # Wait for the MDS to see the latest OSD map so that it will reliably
        # be applying the policy of rejecting non-deletion metadata operations
        # while in the full state.
        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
        self.wait_until_true(
            lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
            timeout=10)

        if not self.data_only:
            with self.assertRaises(CommandFailedError):
                self.mount_a.write_n_mb("small_file_1", 0)

        # Clear out some space
        if easy_case:
            self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
            self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
        else:
            # In the hard case it is the file that filled the system.
            # Before the new #7317 (ENOSPC, epoch barrier) changes, this
            # would fail because the last objects written would be
            # stuck in the client cache as objecter operations.
            self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
            self.mount_a.run_shell(['rm', '-f', 'large_file_a'])

        # Here we are waiting for two things to happen:
        # * The MDS to purge the stray folder and execute object deletions
        #  * The OSDs to inform the mon that they are no longer full
        self.wait_until_true(lambda: not self.is_full(),
                             timeout=osd_mon_report_interval * 120)

        # Wait for the MDS to see the latest OSD map so that it will reliably
        # be applying the free space policy
        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
        self.wait_until_true(
            lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
            timeout=10)

        # Now I should be able to write again
        self.mount_a.write_n_mb("large_file", 50, seek=0)

        # Ensure that the MDS keeps its OSD epoch barrier across a restart

    def test_full_different_file(self):
        self._test_full(True)

    def test_full_same_file(self):
        self._test_full(False)

    def _remote_write_test(self, template):
        """
        Run some remote python in a way that's useful for
        testing free space behaviour (see test_* methods using this)
        """
        file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")

        # Enough to trip the full flag
        osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
        mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))

        # Sufficient data to cause RADOS cluster to go 'full'
        log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))

        # Long enough for RADOS cluster to notice it is full and set flag on mons
        # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
        #  factor of 1.5 for I/O + network latency in committing OSD map and distributing it
        #  to the OSDs)
        full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5

        # Configs for this test should bring this setting down in order to
        # run reasonably quickly
        if osd_mon_report_interval > 10:
            log.warning("This test may run rather slowly unless you decrease"
                     "osd_mon_report_interval (5 is a good setting)!")

        # set the object_size to 1MB to make the objects destributed more evenly
        # among the OSDs to fix Tracker#45434
        file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576"
        self.mount_a.run_python(template.format(
            fill_mb=self.fill_mb,
            file_path=file_path,
            file_layout=file_layout,
            full_wait=full_wait,
            is_fuse=isinstance(self.mount_a, FuseMount)
        ))

    def test_full_fclose(self):
        # A remote script which opens a file handle, fills up the filesystem, and then
        # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
        remote_script = dedent("""
            import time
            import datetime
            import subprocess
            import os

            # Write some buffered data through before going full, all should be well
            print("writing some data through which we expect to succeed")
            bytes = 0
            f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
            os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
            bytes += os.write(f, b'a' * 512 * 1024)
            os.fsync(f)
            print("fsync'ed data successfully, will now attempt to fill fs")

            # Okay, now we're going to fill up the filesystem, and then keep
            # writing until we see an error from fsync.  As long as we're doing
            # buffered IO, the error should always only appear from fsync and not
            # from write
            full = False

            for n in range(0, int({fill_mb} * 0.9)):
                bytes += os.write(f, b'x' * 1024 * 1024)
                print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
            print("done writing {{0}} bytes".format(bytes))

            # OK, now we should sneak in under the full condition
            # due to the time it takes the OSDs to report to the
            # mons, and get a successful fsync on our full-making data
            os.fsync(f)
            print("successfully fsync'ed prior to getting full state reported")

            # buffered write, add more dirty data to the buffer
            print("starting buffered write")
            try:
                for n in range(0, int({fill_mb} * 0.2)):
                    bytes += os.write(f, b'x' * 1024 * 1024)
                    print("sleeping a bit as we've exceeded 90% of our expected full ratio")
                    time.sleep({full_wait})
            except OSError:
                pass;

            print("wrote, now waiting 30s and then doing a close we expect to fail")

            # Wait long enough for a background flush that should fail
            time.sleep(30)

            if {is_fuse}:
                # ...and check that the failed background flush is reflected in fclose
                try:
                    os.close(f)
                except OSError:
                    print("close() returned an error as expected")
                else:
                    raise RuntimeError("close() failed to raise error")
            else:
                # The kernel cephfs client does not raise errors on fclose
                os.close(f)

            os.unlink("{file_path}")
            """)
        self._remote_write_test(remote_script)

    def test_full_fsync(self):
        """
        That when the full flag is encountered during asynchronous
        flushes, such that an fwrite() succeeds but an fsync/fclose()
        should return the ENOSPC error.
        """

        # A remote script which opens a file handle, fills up the filesystem, and then
        # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
        remote_script = dedent("""
            import time
            import datetime
            import subprocess
            import os

            # Write some buffered data through before going full, all should be well
            print("writing some data through which we expect to succeed")
            bytes = 0
            f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
            os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
            bytes += os.write(f, b'a' * 4096)
            os.fsync(f)
            print("fsync'ed data successfully, will now attempt to fill fs")

            # Okay, now we're going to fill up the filesystem, and then keep
            # writing until we see an error from fsync.  As long as we're doing
            # buffered IO, the error should always only appear from fsync and not
            # from write
            full = False

            for n in range(0, int({fill_mb} * 1.1)):
                try:
                    bytes += os.write(f, b'x' * 1024 * 1024)
                    print("wrote bytes via buffered write, moving on to fsync")
                except OSError as e:
                    if {is_fuse}:
                        print("Unexpected error %s from write() instead of fsync()" % e)
                        raise
                    else:
                        print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
                        full = True
                        break

                try:
                    os.fsync(f)
                    print("fsync'ed successfully")
                except OSError as e:
                    print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
                    full = True
                    break
                else:
                    print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))

                if n > {fill_mb} * 0.9:
                    # Be cautious in the last region where we expect to hit
                    # the full condition, so that we don't overshoot too dramatically
                    print("sleeping a bit as we've exceeded 90% of our expected full ratio")
                    time.sleep({full_wait})

            if not full:
                raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)

            # close() should not raise an error because we already caught it in
            # fsync.  There shouldn't have been any more writeback errors
            # since then because all IOs got cancelled on the full flag.
            print("calling close")
            os.close(f)
            print("close() did not raise error")

            os.unlink("{file_path}")
            """)

        self._remote_write_test(remote_script)


class TestQuotaFull(FullnessTestCase):
    """
    Test per-pool fullness, which indicates quota limits exceeded
    """
    pool_capacity = 1024 * 1024 * 32  # arbitrary low-ish limit
    fill_mb = pool_capacity // (1024 * 1024)  # type: ignore

    # We are only testing quota handling on the data pool, not the metadata
    # pool.
    data_only = True

    def setUp(self):
        super(TestQuotaFull, self).setUp()

        pool_name = self.fs.get_data_pool_name()
        self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
                                            "max_bytes", "{0}".format(self.pool_capacity))


class TestClusterFull(FullnessTestCase):
    """
    Test data pool fullness, which indicates that an OSD has become too full
    """
    pool_capacity = None
    REQUIRE_MEMSTORE = True

    def setUp(self):
        super(TestClusterFull, self).setUp()

        if self.pool_capacity is None:
            TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
            TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))

# Hide the parent class so that unittest.loader doesn't try to run it.
del globals()['FullnessTestCase']
Commit	Line	Data
7c673cae FG	1	import json
	2	import logging
	3	import os
	4	from textwrap import dedent
39ae355f	5	from typing import Optional
20effc67	6	from teuthology.exceptions import CommandFailedError
7c673cae FG	7	from tasks.cephfs.fuse_mount import FuseMount
	8	from tasks.cephfs.cephfs_test_case import CephFSTestCase
	9
	10
	11	log = logging.getLogger(__name__)
	12
	13
	14	class FullnessTestCase(CephFSTestCase):
	15	CLIENTS_REQUIRED = 2
	16
	17	# Subclasses define whether they're filling whole cluster or just data pool
	18	data_only = False
	19
	20	# Subclasses define how many bytes should be written to achieve fullness
39ae355f	21	pool_capacity: Optional[int] = None
7c673cae FG	22	fill_mb = None
7c673cae FG	23
7c673cae	24	def is_full(self):
522d829b	25	return self.fs.is_full()
7c673cae FG	26
	27	def setUp(self):
	28	CephFSTestCase.setUp(self)
	29
92f5a8d4	30	mds_status = self.fs.rank_asok(["status"])
7c673cae FG	31
7c673cae FG	32	# Capture the initial OSD map epoch for later use
92f5a8d4	33	self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
7c673cae FG	34
	35	def test_barrier(self):
	36	"""
	37	That when an OSD epoch barrier is set on an MDS, subsequently
	38	issued capabilities cause clients to update their OSD map to that
	39	epoch.
	40	"""
	41
1911f103 TL	42	# script that sync up client with MDS OSD map barrier. The barrier should
	43	# be updated by cap flush ack message.
	44	pyscript = dedent("""
	45	import os
	46	fd = os.open("{path}", os.O_CREAT \| os.O_RDWR, 0O600)
	47	os.fchmod(fd, 0O666)
	48	os.fsync(fd)
	49	os.close(fd)
	50	""")
	51
	52	# Sync up client with initial MDS OSD map barrier.
	53	path = os.path.join(self.mount_a.mountpoint, "foo")
	54	self.mount_a.run_python(pyscript.format(path=path))
7c673cae FG	55
	56	# Grab mounts' initial OSD epochs: later we will check that
	57	# it hasn't advanced beyond this point.
1911f103	58	mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
7c673cae FG	59
	60	# Freshly mounted at start of test, should be up to date with OSD map
	61	self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
7c673cae FG	62
	63	# Set and unset a flag to cause OSD epoch to increment
	64	self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
	65	self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
	66
	67	out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
	68	new_epoch = json.loads(out)['epoch']
	69	self.assertNotEqual(self.initial_osd_epoch, new_epoch)
	70
	71	# Do a metadata operation on clients, witness that they end up with
	72	# the old OSD map from startup time (nothing has prompted client
	73	# to update its map)
1911f103 TL	74	path = os.path.join(self.mount_a.mountpoint, "foo")
1911f103 TL	75	self.mount_a.run_python(pyscript.format(path=path))
7c673cae FG	76	mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
7c673cae FG	77	self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
1911f103	78	self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
7c673cae FG	79
7c673cae FG	80	# Set a barrier on the MDS
92f5a8d4	81	self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
7c673cae	82
1911f103 TL	83	# Sync up client with new MDS OSD map barrier
	84	path = os.path.join(self.mount_a.mountpoint, "baz")
	85	self.mount_a.run_python(pyscript.format(path=path))
	86	mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
	87	self.assertEqual(mount_a_barrier, new_epoch)
7c673cae FG	88
	89	# Some time passes here because the metadata part of the operation
	90	# completes immediately, while the resulting OSD map update happens
	91	# asynchronously (it's an Objecter::_maybe_request_map) as a result
	92	# of seeing the new epoch barrier.
1911f103 TL	93	self.wait_until_true(
	94	lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
	95	timeout=30)
7c673cae FG	96
	97	def _data_pool_name(self):
	98	data_pool_names = self.fs.get_data_pool_names()
	99	if len(data_pool_names) > 1:
	100	raise RuntimeError("This test can't handle multiple data pools")
	101	else:
	102	return data_pool_names[0]
	103
	104	def _test_full(self, easy_case):
	105	"""
	106	- That a client trying to write data to a file is prevented
	107	from doing so with an -EFULL result
	108	- That they are also prevented from creating new files by the MDS.
	109	- That they may delete another file to get the system healthy again
	110
	111	:param easy_case: if true, delete a successfully written file to
	112	free up space. else, delete the file that experienced
	113	the failed write.
	114	"""
	115
11fdf7f2	116	osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
7c673cae FG	117
	118	log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
	119
	120	# Fill up the cluster. This dd may or may not fail, as it depends on
	121	# how soon the cluster recognises its own fullness
e306af50	122	self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
7c673cae	123	try:
522d829b	124	self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2)
7c673cae FG	125	except CommandFailedError:
	126	log.info("Writing file B failed (full status happened already)")
	127	assert self.is_full()
	128	else:
	129	log.info("Writing file B succeeded (full status will happen soon)")
	130	self.wait_until_true(lambda: self.is_full(),
522d829b	131	timeout=osd_mon_report_interval * 120)
7c673cae FG	132
	133	# Attempting to write more data should give me ENOSPC
	134	with self.assertRaises(CommandFailedError) as ar:
e306af50	135	self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
7c673cae FG	136	self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
	137
	138	# Wait for the MDS to see the latest OSD map so that it will reliably
	139	# be applying the policy of rejecting non-deletion metadata operations
	140	# while in the full state.
	141	osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
	142	self.wait_until_true(
92f5a8d4	143	lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
7c673cae FG	144	timeout=10)
	145
	146	if not self.data_only:
	147	with self.assertRaises(CommandFailedError):
	148	self.mount_a.write_n_mb("small_file_1", 0)
	149
	150	# Clear out some space
	151	if easy_case:
	152	self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
	153	self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
	154	else:
	155	# In the hard case it is the file that filled the system.
	156	# Before the new #7317 (ENOSPC, epoch barrier) changes, this
	157	# would fail because the last objects written would be
	158	# stuck in the client cache as objecter operations.
	159	self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
	160	self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
	161
	162	# Here we are waiting for two things to happen:
	163	# * The MDS to purge the stray folder and execute object deletions
	164	# * The OSDs to inform the mon that they are no longer full
	165	self.wait_until_true(lambda: not self.is_full(),
522d829b	166	timeout=osd_mon_report_interval * 120)
7c673cae FG	167
	168	# Wait for the MDS to see the latest OSD map so that it will reliably
	169	# be applying the free space policy
	170	osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
	171	self.wait_until_true(
92f5a8d4	172	lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
7c673cae FG	173	timeout=10)
	174
	175	# Now I should be able to write again
	176	self.mount_a.write_n_mb("large_file", 50, seek=0)
	177
	178	# Ensure that the MDS keeps its OSD epoch barrier across a restart
	179
	180	def test_full_different_file(self):
	181	self._test_full(True)
	182
	183	def test_full_same_file(self):
	184	self._test_full(False)
	185
	186	def _remote_write_test(self, template):
	187	"""
	188	Run some remote python in a way that's useful for
	189	testing free space behaviour (see test_* methods using this)
	190	"""
	191	file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
	192
	193	# Enough to trip the full flag
11fdf7f2	194	osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
7c673cae FG	195	mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
	196
	197	# Sufficient data to cause RADOS cluster to go 'full'
	198	log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
	199
	200	# Long enough for RADOS cluster to notice it is full and set flag on mons
	201	# (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
	202	# factor of 1.5 for I/O + network latency in committing OSD map and distributing it
	203	# to the OSDs)
11fdf7f2	204	full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
7c673cae FG	205
	206	# Configs for this test should bring this setting down in order to
	207	# run reasonably quickly
11fdf7f2	208	if osd_mon_report_interval > 10:
e306af50	209	log.warning("This test may run rather slowly unless you decrease"
11fdf7f2	210	"osd_mon_report_interval (5 is a good setting)!")
7c673cae	211
522d829b TL	212	# set the object_size to 1MB to make the objects destributed more evenly
	213	# among the OSDs to fix Tracker#45434
	214	file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576"
7c673cae FG	215	self.mount_a.run_python(template.format(
	216	fill_mb=self.fill_mb,
	217	file_path=file_path,
522d829b	218	file_layout=file_layout,
7c673cae FG	219	full_wait=full_wait,
	220	is_fuse=isinstance(self.mount_a, FuseMount)
	221	))
	222
	223	def test_full_fclose(self):
	224	# A remote script which opens a file handle, fills up the filesystem, and then
	225	# checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
	226	remote_script = dedent("""
	227	import time
	228	import datetime
	229	import subprocess
	230	import os
	231
	232	# Write some buffered data through before going full, all should be well
9f95a23c	233	print("writing some data through which we expect to succeed")
7c673cae FG	234	bytes = 0
7c673cae FG	235	f = os.open("{file_path}", os.O_WRONLY \| os.O_CREAT)
522d829b	236	os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
9f95a23c	237	bytes += os.write(f, b'a' * 512 * 1024)
7c673cae	238	os.fsync(f)
9f95a23c	239	print("fsync'ed data successfully, will now attempt to fill fs")
7c673cae FG	240
	241	# Okay, now we're going to fill up the filesystem, and then keep
	242	# writing until we see an error from fsync. As long as we're doing
	243	# buffered IO, the error should always only appear from fsync and not
	244	# from write
	245	full = False
	246
94b18763	247	for n in range(0, int({fill_mb} * 0.9)):
9f95a23c TL	248	bytes += os.write(f, b'x' * 1024 * 1024)
	249	print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
	250	print("done writing {{0}} bytes".format(bytes))
7c673cae FG	251
	252	# OK, now we should sneak in under the full condition
	253	# due to the time it takes the OSDs to report to the
	254	# mons, and get a successful fsync on our full-making data
	255	os.fsync(f)
9f95a23c	256	print("successfully fsync'ed prior to getting full state reported")
7c673cae	257
94b18763	258	# buffered write, add more dirty data to the buffer
9f95a23c	259	print("starting buffered write")
94b18763 FG	260	try:
94b18763 FG	261	for n in range(0, int({fill_mb} * 0.2)):
9f95a23c TL	262	bytes += os.write(f, b'x' * 1024 * 1024)
9f95a23c TL	263	print("sleeping a bit as we've exceeded 90% of our expected full ratio")
94b18763 FG	264	time.sleep({full_wait})
	265	except OSError:
	266	pass;
7c673cae	267
9f95a23c	268	print("wrote, now waiting 30s and then doing a close we expect to fail")
7c673cae FG	269
	270	# Wait long enough for a background flush that should fail
	271	time.sleep(30)
	272
	273	if {is_fuse}:
	274	# ...and check that the failed background flush is reflected in fclose
	275	try:
	276	os.close(f)
	277	except OSError:
9f95a23c	278	print("close() returned an error as expected")
7c673cae FG	279	else:
	280	raise RuntimeError("close() failed to raise error")
	281	else:
	282	# The kernel cephfs client does not raise errors on fclose
	283	os.close(f)
	284
	285	os.unlink("{file_path}")
	286	""")
	287	self._remote_write_test(remote_script)
	288
	289	def test_full_fsync(self):
	290	"""
	291	That when the full flag is encountered during asynchronous
	292	flushes, such that an fwrite() succeeds but an fsync/fclose()
	293	should return the ENOSPC error.
	294	"""
	295
	296	# A remote script which opens a file handle, fills up the filesystem, and then
	297	# checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
	298	remote_script = dedent("""
	299	import time
	300	import datetime
	301	import subprocess
	302	import os
	303
	304	# Write some buffered data through before going full, all should be well
9f95a23c	305	print("writing some data through which we expect to succeed")
7c673cae FG	306	bytes = 0
7c673cae FG	307	f = os.open("{file_path}", os.O_WRONLY \| os.O_CREAT)
522d829b	308	os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
9f95a23c	309	bytes += os.write(f, b'a' * 4096)
7c673cae	310	os.fsync(f)
9f95a23c	311	print("fsync'ed data successfully, will now attempt to fill fs")
7c673cae FG	312
	313	# Okay, now we're going to fill up the filesystem, and then keep
	314	# writing until we see an error from fsync. As long as we're doing
	315	# buffered IO, the error should always only appear from fsync and not
	316	# from write
	317	full = False
	318
94b18763	319	for n in range(0, int({fill_mb} * 1.1)):
7c673cae	320	try:
9f95a23c TL	321	bytes += os.write(f, b'x' * 1024 * 1024)
9f95a23c TL	322	print("wrote bytes via buffered write, moving on to fsync")
7c673cae	323	except OSError as e:
522d829b TL	324	if {is_fuse}:
	325	print("Unexpected error %s from write() instead of fsync()" % e)
	326	raise
	327	else:
	328	print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
	329	full = True
	330	break
7c673cae FG	331
	332	try:
	333	os.fsync(f)
9f95a23c	334	print("fsync'ed successfully")
7c673cae	335	except OSError as e:
9f95a23c	336	print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
7c673cae FG	337	full = True
	338	break
	339	else:
9f95a23c	340	print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
7c673cae	341
94b18763	342	if n > {fill_mb} * 0.9:
7c673cae FG	343	# Be cautious in the last region where we expect to hit
7c673cae FG	344	# the full condition, so that we don't overshoot too dramatically
9f95a23c	345	print("sleeping a bit as we've exceeded 90% of our expected full ratio")
7c673cae FG	346	time.sleep({full_wait})
	347
	348	if not full:
	349	raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
	350
	351	# close() should not raise an error because we already caught it in
	352	# fsync. There shouldn't have been any more writeback errors
	353	# since then because all IOs got cancelled on the full flag.
9f95a23c	354	print("calling close")
7c673cae	355	os.close(f)
9f95a23c	356	print("close() did not raise error")
7c673cae FG	357
	358	os.unlink("{file_path}")
	359	""")
	360
	361	self._remote_write_test(remote_script)
	362
	363
	364	class TestQuotaFull(FullnessTestCase):
	365	"""
	366	Test per-pool fullness, which indicates quota limits exceeded
	367	"""
9f95a23c	368	pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
e306af50	369	fill_mb = pool_capacity // (1024 * 1024) # type: ignore
7c673cae FG	370
	371	# We are only testing quota handling on the data pool, not the metadata
	372	# pool.
	373	data_only = True
	374
	375	def setUp(self):
	376	super(TestQuotaFull, self).setUp()
	377
	378	pool_name = self.fs.get_data_pool_name()
	379	self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
	380	"max_bytes", "{0}".format(self.pool_capacity))
	381
7c673cae FG	382
	383	class TestClusterFull(FullnessTestCase):
	384	"""
b32b8144	385	Test data pool fullness, which indicates that an OSD has become too full
7c673cae FG	386	"""
	387	pool_capacity = None
	388	REQUIRE_MEMSTORE = True
	389
	390	def setUp(self):
	391	super(TestClusterFull, self).setUp()
	392
	393	if self.pool_capacity is None:
522d829b	394	TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
e306af50	395	TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))
7c673cae	396
7c673cae FG	397	# Hide the parent class so that unittest.loader doesn't try to run it.
7c673cae FG	398	del globals()['FullnessTestCase']