]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_full.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / qa / tasks / cephfs / test_full.py
1 import json
2 import logging
3 import os
4 from textwrap import dedent
5 from typing import Optional
6 from teuthology.exceptions import CommandFailedError
7 from tasks.cephfs.fuse_mount import FuseMount
8 from tasks.cephfs.cephfs_test_case import CephFSTestCase
9
10
11 log = logging.getLogger(__name__)
12
13
14 class FullnessTestCase(CephFSTestCase):
15 CLIENTS_REQUIRED = 2
16
17 # Subclasses define whether they're filling whole cluster or just data pool
18 data_only = False
19
20 # Subclasses define how many bytes should be written to achieve fullness
21 pool_capacity: Optional[int] = None
22 fill_mb = None
23
24 def is_full(self):
25 return self.fs.is_full()
26
27 def setUp(self):
28 CephFSTestCase.setUp(self)
29
30 mds_status = self.fs.rank_asok(["status"])
31
32 # Capture the initial OSD map epoch for later use
33 self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
34
35 def test_barrier(self):
36 """
37 That when an OSD epoch barrier is set on an MDS, subsequently
38 issued capabilities cause clients to update their OSD map to that
39 epoch.
40 """
41
42 # script that sync up client with MDS OSD map barrier. The barrier should
43 # be updated by cap flush ack message.
44 pyscript = dedent("""
45 import os
46 fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
47 os.fchmod(fd, 0O666)
48 os.fsync(fd)
49 os.close(fd)
50 """)
51
52 # Sync up client with initial MDS OSD map barrier.
53 path = os.path.join(self.mount_a.mountpoint, "foo")
54 self.mount_a.run_python(pyscript.format(path=path))
55
56 # Grab mounts' initial OSD epochs: later we will check that
57 # it hasn't advanced beyond this point.
58 mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
59
60 # Freshly mounted at start of test, should be up to date with OSD map
61 self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
62
63 # Set and unset a flag to cause OSD epoch to increment
64 self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
65 self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
66
67 out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
68 new_epoch = json.loads(out)['epoch']
69 self.assertNotEqual(self.initial_osd_epoch, new_epoch)
70
71 # Do a metadata operation on clients, witness that they end up with
72 # the old OSD map from startup time (nothing has prompted client
73 # to update its map)
74 path = os.path.join(self.mount_a.mountpoint, "foo")
75 self.mount_a.run_python(pyscript.format(path=path))
76 mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
77 self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
78 self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
79
80 # Set a barrier on the MDS
81 self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
82
83 # Sync up client with new MDS OSD map barrier
84 path = os.path.join(self.mount_a.mountpoint, "baz")
85 self.mount_a.run_python(pyscript.format(path=path))
86 mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
87 self.assertEqual(mount_a_barrier, new_epoch)
88
89 # Some time passes here because the metadata part of the operation
90 # completes immediately, while the resulting OSD map update happens
91 # asynchronously (it's an Objecter::_maybe_request_map) as a result
92 # of seeing the new epoch barrier.
93 self.wait_until_true(
94 lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
95 timeout=30)
96
97 def _data_pool_name(self):
98 data_pool_names = self.fs.get_data_pool_names()
99 if len(data_pool_names) > 1:
100 raise RuntimeError("This test can't handle multiple data pools")
101 else:
102 return data_pool_names[0]
103
104 def _test_full(self, easy_case):
105 """
106 - That a client trying to write data to a file is prevented
107 from doing so with an -EFULL result
108 - That they are also prevented from creating new files by the MDS.
109 - That they may delete another file to get the system healthy again
110
111 :param easy_case: if true, delete a successfully written file to
112 free up space. else, delete the file that experienced
113 the failed write.
114 """
115
116 osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
117
118 log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
119
120 # Fill up the cluster. This dd may or may not fail, as it depends on
121 # how soon the cluster recognises its own fullness
122 self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
123 try:
124 self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2)
125 except CommandFailedError:
126 log.info("Writing file B failed (full status happened already)")
127 assert self.is_full()
128 else:
129 log.info("Writing file B succeeded (full status will happen soon)")
130 self.wait_until_true(lambda: self.is_full(),
131 timeout=osd_mon_report_interval * 120)
132
133 # Attempting to write more data should give me ENOSPC
134 with self.assertRaises(CommandFailedError) as ar:
135 self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
136 self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
137
138 # Wait for the MDS to see the latest OSD map so that it will reliably
139 # be applying the policy of rejecting non-deletion metadata operations
140 # while in the full state.
141 osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
142 self.wait_until_true(
143 lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
144 timeout=10)
145
146 if not self.data_only:
147 with self.assertRaises(CommandFailedError):
148 self.mount_a.write_n_mb("small_file_1", 0)
149
150 # Clear out some space
151 if easy_case:
152 self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
153 self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
154 else:
155 # In the hard case it is the file that filled the system.
156 # Before the new #7317 (ENOSPC, epoch barrier) changes, this
157 # would fail because the last objects written would be
158 # stuck in the client cache as objecter operations.
159 self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
160 self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
161
162 # Here we are waiting for two things to happen:
163 # * The MDS to purge the stray folder and execute object deletions
164 # * The OSDs to inform the mon that they are no longer full
165 self.wait_until_true(lambda: not self.is_full(),
166 timeout=osd_mon_report_interval * 120)
167
168 # Wait for the MDS to see the latest OSD map so that it will reliably
169 # be applying the free space policy
170 osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
171 self.wait_until_true(
172 lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
173 timeout=10)
174
175 # Now I should be able to write again
176 self.mount_a.write_n_mb("large_file", 50, seek=0)
177
178 # Ensure that the MDS keeps its OSD epoch barrier across a restart
179
180 def test_full_different_file(self):
181 self._test_full(True)
182
183 def test_full_same_file(self):
184 self._test_full(False)
185
186 def _remote_write_test(self, template):
187 """
188 Run some remote python in a way that's useful for
189 testing free space behaviour (see test_* methods using this)
190 """
191 file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
192
193 # Enough to trip the full flag
194 osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
195 mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
196
197 # Sufficient data to cause RADOS cluster to go 'full'
198 log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
199
200 # Long enough for RADOS cluster to notice it is full and set flag on mons
201 # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
202 # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
203 # to the OSDs)
204 full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
205
206 # Configs for this test should bring this setting down in order to
207 # run reasonably quickly
208 if osd_mon_report_interval > 10:
209 log.warning("This test may run rather slowly unless you decrease"
210 "osd_mon_report_interval (5 is a good setting)!")
211
212 # set the object_size to 1MB to make the objects destributed more evenly
213 # among the OSDs to fix Tracker#45434
214 file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576"
215 self.mount_a.run_python(template.format(
216 fill_mb=self.fill_mb,
217 file_path=file_path,
218 file_layout=file_layout,
219 full_wait=full_wait,
220 is_fuse=isinstance(self.mount_a, FuseMount)
221 ))
222
223 def test_full_fclose(self):
224 # A remote script which opens a file handle, fills up the filesystem, and then
225 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
226 remote_script = dedent("""
227 import time
228 import datetime
229 import subprocess
230 import os
231
232 # Write some buffered data through before going full, all should be well
233 print("writing some data through which we expect to succeed")
234 bytes = 0
235 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
236 os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
237 bytes += os.write(f, b'a' * 512 * 1024)
238 os.fsync(f)
239 print("fsync'ed data successfully, will now attempt to fill fs")
240
241 # Okay, now we're going to fill up the filesystem, and then keep
242 # writing until we see an error from fsync. As long as we're doing
243 # buffered IO, the error should always only appear from fsync and not
244 # from write
245 full = False
246
247 for n in range(0, int({fill_mb} * 0.9)):
248 bytes += os.write(f, b'x' * 1024 * 1024)
249 print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
250 print("done writing {{0}} bytes".format(bytes))
251
252 # OK, now we should sneak in under the full condition
253 # due to the time it takes the OSDs to report to the
254 # mons, and get a successful fsync on our full-making data
255 os.fsync(f)
256 print("successfully fsync'ed prior to getting full state reported")
257
258 # buffered write, add more dirty data to the buffer
259 print("starting buffered write")
260 try:
261 for n in range(0, int({fill_mb} * 0.2)):
262 bytes += os.write(f, b'x' * 1024 * 1024)
263 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
264 time.sleep({full_wait})
265 except OSError:
266 pass;
267
268 print("wrote, now waiting 30s and then doing a close we expect to fail")
269
270 # Wait long enough for a background flush that should fail
271 time.sleep(30)
272
273 if {is_fuse}:
274 # ...and check that the failed background flush is reflected in fclose
275 try:
276 os.close(f)
277 except OSError:
278 print("close() returned an error as expected")
279 else:
280 raise RuntimeError("close() failed to raise error")
281 else:
282 # The kernel cephfs client does not raise errors on fclose
283 os.close(f)
284
285 os.unlink("{file_path}")
286 """)
287 self._remote_write_test(remote_script)
288
289 def test_full_fsync(self):
290 """
291 That when the full flag is encountered during asynchronous
292 flushes, such that an fwrite() succeeds but an fsync/fclose()
293 should return the ENOSPC error.
294 """
295
296 # A remote script which opens a file handle, fills up the filesystem, and then
297 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
298 remote_script = dedent("""
299 import time
300 import datetime
301 import subprocess
302 import os
303
304 # Write some buffered data through before going full, all should be well
305 print("writing some data through which we expect to succeed")
306 bytes = 0
307 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
308 os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
309 bytes += os.write(f, b'a' * 4096)
310 os.fsync(f)
311 print("fsync'ed data successfully, will now attempt to fill fs")
312
313 # Okay, now we're going to fill up the filesystem, and then keep
314 # writing until we see an error from fsync. As long as we're doing
315 # buffered IO, the error should always only appear from fsync and not
316 # from write
317 full = False
318
319 for n in range(0, int({fill_mb} * 1.1)):
320 try:
321 bytes += os.write(f, b'x' * 1024 * 1024)
322 print("wrote bytes via buffered write, moving on to fsync")
323 except OSError as e:
324 if {is_fuse}:
325 print("Unexpected error %s from write() instead of fsync()" % e)
326 raise
327 else:
328 print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
329 full = True
330 break
331
332 try:
333 os.fsync(f)
334 print("fsync'ed successfully")
335 except OSError as e:
336 print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
337 full = True
338 break
339 else:
340 print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
341
342 if n > {fill_mb} * 0.9:
343 # Be cautious in the last region where we expect to hit
344 # the full condition, so that we don't overshoot too dramatically
345 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
346 time.sleep({full_wait})
347
348 if not full:
349 raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
350
351 # close() should not raise an error because we already caught it in
352 # fsync. There shouldn't have been any more writeback errors
353 # since then because all IOs got cancelled on the full flag.
354 print("calling close")
355 os.close(f)
356 print("close() did not raise error")
357
358 os.unlink("{file_path}")
359 """)
360
361 self._remote_write_test(remote_script)
362
363
364 class TestQuotaFull(FullnessTestCase):
365 """
366 Test per-pool fullness, which indicates quota limits exceeded
367 """
368 pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
369 fill_mb = pool_capacity // (1024 * 1024) # type: ignore
370
371 # We are only testing quota handling on the data pool, not the metadata
372 # pool.
373 data_only = True
374
375 def setUp(self):
376 super(TestQuotaFull, self).setUp()
377
378 pool_name = self.fs.get_data_pool_name()
379 self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
380 "max_bytes", "{0}".format(self.pool_capacity))
381
382
383 class TestClusterFull(FullnessTestCase):
384 """
385 Test data pool fullness, which indicates that an OSD has become too full
386 """
387 pool_capacity = None
388 REQUIRE_MEMSTORE = True
389
390 def setUp(self):
391 super(TestClusterFull, self).setUp()
392
393 if self.pool_capacity is None:
394 TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
395 TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))
396
397 # Hide the parent class so that unittest.loader doesn't try to run it.
398 del globals()['FullnessTestCase']