]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_full.py
962a3d036d1fce6658a9fc402a3290d97b2531e2
4 from textwrap
import dedent
6 from typing
import Optional
8 # make it work for python2
10 from teuthology
.exceptions
import CommandFailedError
11 from tasks
.cephfs
.fuse_mount
import FuseMount
12 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
15 log
= logging
.getLogger(__name__
)
18 class FullnessTestCase(CephFSTestCase
):
21 # Subclasses define whether they're filling whole cluster or just data pool
24 # Subclasses define how many bytes should be written to achieve fullness
25 pool_capacity
= None # type: Optional[int]
29 return self
.fs
.is_full()
32 CephFSTestCase
.setUp(self
)
34 mds_status
= self
.fs
.rank_asok(["status"])
36 # Capture the initial OSD map epoch for later use
37 self
.initial_osd_epoch
= mds_status
['osdmap_epoch_barrier']
39 def test_barrier(self
):
41 That when an OSD epoch barrier is set on an MDS, subsequently
42 issued capabilities cause clients to update their OSD map to that
46 # script that sync up client with MDS OSD map barrier. The barrier should
47 # be updated by cap flush ack message.
50 fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
56 # Sync up client with initial MDS OSD map barrier.
57 path
= os
.path
.join(self
.mount_a
.mountpoint
, "foo")
58 self
.mount_a
.run_python(pyscript
.format(path
=path
))
60 # Grab mounts' initial OSD epochs: later we will check that
61 # it hasn't advanced beyond this point.
62 mount_a_initial_epoch
, mount_a_initial_barrier
= self
.mount_a
.get_osd_epoch()
64 # Freshly mounted at start of test, should be up to date with OSD map
65 self
.assertGreaterEqual(mount_a_initial_epoch
, self
.initial_osd_epoch
)
67 # Set and unset a flag to cause OSD epoch to increment
68 self
.fs
.mon_manager
.raw_cluster_cmd("osd", "set", "pause")
69 self
.fs
.mon_manager
.raw_cluster_cmd("osd", "unset", "pause")
71 out
= self
.fs
.mon_manager
.raw_cluster_cmd("osd", "dump", "--format=json").strip()
72 new_epoch
= json
.loads(out
)['epoch']
73 self
.assertNotEqual(self
.initial_osd_epoch
, new_epoch
)
75 # Do a metadata operation on clients, witness that they end up with
76 # the old OSD map from startup time (nothing has prompted client
78 path
= os
.path
.join(self
.mount_a
.mountpoint
, "foo")
79 self
.mount_a
.run_python(pyscript
.format(path
=path
))
80 mount_a_epoch
, mount_a_barrier
= self
.mount_a
.get_osd_epoch()
81 self
.assertEqual(mount_a_epoch
, mount_a_initial_epoch
)
82 self
.assertEqual(mount_a_barrier
, mount_a_initial_barrier
)
84 # Set a barrier on the MDS
85 self
.fs
.rank_asok(["osdmap", "barrier", new_epoch
.__str
__()])
87 # Sync up client with new MDS OSD map barrier
88 path
= os
.path
.join(self
.mount_a
.mountpoint
, "baz")
89 self
.mount_a
.run_python(pyscript
.format(path
=path
))
90 mount_a_epoch
, mount_a_barrier
= self
.mount_a
.get_osd_epoch()
91 self
.assertEqual(mount_a_barrier
, new_epoch
)
93 # Some time passes here because the metadata part of the operation
94 # completes immediately, while the resulting OSD map update happens
95 # asynchronously (it's an Objecter::_maybe_request_map) as a result
96 # of seeing the new epoch barrier.
98 lambda: self
.mount_a
.get_osd_epoch()[0] >= new_epoch
,
101 def _data_pool_name(self
):
102 data_pool_names
= self
.fs
.get_data_pool_names()
103 if len(data_pool_names
) > 1:
104 raise RuntimeError("This test can't handle multiple data pools")
106 return data_pool_names
[0]
108 def _test_full(self
, easy_case
):
110 - That a client trying to write data to a file is prevented
111 from doing so with an -EFULL result
112 - That they are also prevented from creating new files by the MDS.
113 - That they may delete another file to get the system healthy again
115 :param easy_case: if true, delete a successfully written file to
116 free up space. else, delete the file that experienced
120 osd_mon_report_interval
= int(self
.fs
.get_config("osd_mon_report_interval", service_type
='osd'))
122 log
.info("Writing {0}MB should fill this cluster".format(self
.fill_mb
))
124 # Fill up the cluster. This dd may or may not fail, as it depends on
125 # how soon the cluster recognises its own fullness
126 self
.mount_a
.write_n_mb("large_file_a", self
.fill_mb
// 2)
128 self
.mount_a
.write_n_mb("large_file_b", (self
.fill_mb
* 1.1) // 2)
129 except CommandFailedError
:
130 log
.info("Writing file B failed (full status happened already)")
131 assert self
.is_full()
133 log
.info("Writing file B succeeded (full status will happen soon)")
134 self
.wait_until_true(lambda: self
.is_full(),
135 timeout
=osd_mon_report_interval
* 120)
137 # Attempting to write more data should give me ENOSPC
138 with self
.assertRaises(CommandFailedError
) as ar
:
139 self
.mount_a
.write_n_mb("large_file_b", 50, seek
=self
.fill_mb
// 2)
140 self
.assertEqual(ar
.exception
.exitstatus
, 1) # dd returns 1 on "No space"
142 # Wait for the MDS to see the latest OSD map so that it will reliably
143 # be applying the policy of rejecting non-deletion metadata operations
144 # while in the full state.
145 osd_epoch
= json
.loads(self
.fs
.mon_manager
.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
146 self
.wait_until_true(
147 lambda: self
.fs
.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch
,
150 if not self
.data_only
:
151 with self
.assertRaises(CommandFailedError
):
152 self
.mount_a
.write_n_mb("small_file_1", 0)
154 # Clear out some space
156 self
.mount_a
.run_shell(['rm', '-f', 'large_file_a'])
157 self
.mount_a
.run_shell(['rm', '-f', 'large_file_b'])
159 # In the hard case it is the file that filled the system.
160 # Before the new #7317 (ENOSPC, epoch barrier) changes, this
161 # would fail because the last objects written would be
162 # stuck in the client cache as objecter operations.
163 self
.mount_a
.run_shell(['rm', '-f', 'large_file_b'])
164 self
.mount_a
.run_shell(['rm', '-f', 'large_file_a'])
166 # Here we are waiting for two things to happen:
167 # * The MDS to purge the stray folder and execute object deletions
168 # * The OSDs to inform the mon that they are no longer full
169 self
.wait_until_true(lambda: not self
.is_full(),
170 timeout
=osd_mon_report_interval
* 120)
172 # Wait for the MDS to see the latest OSD map so that it will reliably
173 # be applying the free space policy
174 osd_epoch
= json
.loads(self
.fs
.mon_manager
.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
175 self
.wait_until_true(
176 lambda: self
.fs
.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch
,
179 # Now I should be able to write again
180 self
.mount_a
.write_n_mb("large_file", 50, seek
=0)
182 # Ensure that the MDS keeps its OSD epoch barrier across a restart
184 def test_full_different_file(self
):
185 self
._test
_full
(True)
187 def test_full_same_file(self
):
188 self
._test
_full
(False)
190 def _remote_write_test(self
, template
):
192 Run some remote python in a way that's useful for
193 testing free space behaviour (see test_* methods using this)
195 file_path
= os
.path
.join(self
.mount_a
.mountpoint
, "full_test_file")
197 # Enough to trip the full flag
198 osd_mon_report_interval
= int(self
.fs
.get_config("osd_mon_report_interval", service_type
='osd'))
199 mon_tick_interval
= int(self
.fs
.get_config("mon_tick_interval", service_type
="mon"))
201 # Sufficient data to cause RADOS cluster to go 'full'
202 log
.info("pool capacity {0}, {1}MB should be enough to fill it".format(self
.pool_capacity
, self
.fill_mb
))
204 # Long enough for RADOS cluster to notice it is full and set flag on mons
205 # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
206 # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
208 full_wait
= (osd_mon_report_interval
+ mon_tick_interval
) * 1.5
210 # Configs for this test should bring this setting down in order to
211 # run reasonably quickly
212 if osd_mon_report_interval
> 10:
213 log
.warning("This test may run rather slowly unless you decrease"
214 "osd_mon_report_interval (5 is a good setting)!")
216 # set the object_size to 1MB to make the objects destributed more evenly
217 # among the OSDs to fix Tracker#45434
218 file_layout
= "stripe_unit=1048576 stripe_count=1 object_size=1048576"
219 self
.mount_a
.run_python(template
.format(
220 fill_mb
=self
.fill_mb
,
222 file_layout
=file_layout
,
224 is_fuse
=isinstance(self
.mount_a
, FuseMount
)
227 def test_full_fclose(self
):
228 # A remote script which opens a file handle, fills up the filesystem, and then
229 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
230 remote_script
= dedent("""
236 # Write some buffered data through before going full, all should be well
237 print("writing some data through which we expect to succeed")
239 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
240 os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
241 bytes += os.write(f, b'a' * 512 * 1024)
243 print("fsync'ed data successfully, will now attempt to fill fs")
245 # Okay, now we're going to fill up the filesystem, and then keep
246 # writing until we see an error from fsync. As long as we're doing
247 # buffered IO, the error should always only appear from fsync and not
251 for n in range(0, int({fill_mb} * 0.9)):
252 bytes += os.write(f, b'x' * 1024 * 1024)
253 print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
254 print("done writing {{0}} bytes".format(bytes))
256 # OK, now we should sneak in under the full condition
257 # due to the time it takes the OSDs to report to the
258 # mons, and get a successful fsync on our full-making data
260 print("successfully fsync'ed prior to getting full state reported")
262 # buffered write, add more dirty data to the buffer
263 print("starting buffered write")
265 for n in range(0, int({fill_mb} * 0.2)):
266 bytes += os.write(f, b'x' * 1024 * 1024)
267 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
268 time.sleep({full_wait})
272 print("wrote, now waiting 30s and then doing a close we expect to fail")
274 # Wait long enough for a background flush that should fail
278 # ...and check that the failed background flush is reflected in fclose
282 print("close() returned an error as expected")
284 raise RuntimeError("close() failed to raise error")
286 # The kernel cephfs client does not raise errors on fclose
289 os.unlink("{file_path}")
291 self
._remote
_write
_test
(remote_script
)
293 def test_full_fsync(self
):
295 That when the full flag is encountered during asynchronous
296 flushes, such that an fwrite() succeeds but an fsync/fclose()
297 should return the ENOSPC error.
300 # A remote script which opens a file handle, fills up the filesystem, and then
301 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
302 remote_script
= dedent("""
308 # Write some buffered data through before going full, all should be well
309 print("writing some data through which we expect to succeed")
311 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
312 os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
313 bytes += os.write(f, b'a' * 4096)
315 print("fsync'ed data successfully, will now attempt to fill fs")
317 # Okay, now we're going to fill up the filesystem, and then keep
318 # writing until we see an error from fsync. As long as we're doing
319 # buffered IO, the error should always only appear from fsync and not
323 for n in range(0, int({fill_mb} * 1.1)):
325 bytes += os.write(f, b'x' * 1024 * 1024)
326 print("wrote bytes via buffered write, moving on to fsync")
329 print("Unexpected error %s from write() instead of fsync()" % e)
332 print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
338 print("fsync'ed successfully")
340 print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
344 print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
346 if n > {fill_mb} * 0.9:
347 # Be cautious in the last region where we expect to hit
348 # the full condition, so that we don't overshoot too dramatically
349 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
350 time.sleep({full_wait})
353 raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
355 # close() should not raise an error because we already caught it in
356 # fsync. There shouldn't have been any more writeback errors
357 # since then because all IOs got cancelled on the full flag.
358 print("calling close")
360 print("close() did not raise error")
362 os.unlink("{file_path}")
365 self
._remote
_write
_test
(remote_script
)
368 class TestQuotaFull(FullnessTestCase
):
370 Test per-pool fullness, which indicates quota limits exceeded
372 pool_capacity
= 1024 * 1024 * 32 # arbitrary low-ish limit
373 fill_mb
= pool_capacity
// (1024 * 1024) # type: ignore
375 # We are only testing quota handling on the data pool, not the metadata
380 super(TestQuotaFull
, self
).setUp()
382 pool_name
= self
.fs
.get_data_pool_name()
383 self
.fs
.mon_manager
.raw_cluster_cmd("osd", "pool", "set-quota", pool_name
,
384 "max_bytes", "{0}".format(self
.pool_capacity
))
387 class TestClusterFull(FullnessTestCase
):
389 Test data pool fullness, which indicates that an OSD has become too full
392 REQUIRE_MEMSTORE
= True
395 super(TestClusterFull
, self
).setUp()
397 if self
.pool_capacity
is None:
398 TestClusterFull
.pool_capacity
= self
.fs
.get_pool_df(self
._data
_pool
_name
())['max_avail']
399 TestClusterFull
.fill_mb
= (self
.pool_capacity
// (1024 * 1024))
401 # Hide the parent class so that unittest.loader doesn't try to run it.
402 del globals()['FullnessTestCase']