]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_full.py
import 15.2.2 octopus source
[ceph.git] / ceph / qa / tasks / cephfs / test_full.py
1 import json
2 import logging
3 import os
4 from textwrap import dedent
5 import time
6 try:
7 from typing import Optional
8 except:
9 # make it work for python2
10 pass
11 from teuthology.orchestra.run import CommandFailedError
12 from tasks.cephfs.fuse_mount import FuseMount
13 from tasks.cephfs.cephfs_test_case import CephFSTestCase
14
15
16 log = logging.getLogger(__name__)
17
18
19 class FullnessTestCase(CephFSTestCase):
20 CLIENTS_REQUIRED = 2
21
22 # Subclasses define whether they're filling whole cluster or just data pool
23 data_only = False
24
25 # Subclasses define how many bytes should be written to achieve fullness
26 pool_capacity = None # type: Optional[int]
27 fill_mb = None
28
29 # Subclasses define what fullness means to them
30 def is_full(self):
31 raise NotImplementedError()
32
33 def setUp(self):
34 CephFSTestCase.setUp(self)
35
36 mds_status = self.fs.rank_asok(["status"])
37
38 # Capture the initial OSD map epoch for later use
39 self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
40
41 def test_barrier(self):
42 """
43 That when an OSD epoch barrier is set on an MDS, subsequently
44 issued capabilities cause clients to update their OSD map to that
45 epoch.
46 """
47
48 # script that sync up client with MDS OSD map barrier. The barrier should
49 # be updated by cap flush ack message.
50 pyscript = dedent("""
51 import os
52 fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
53 os.fchmod(fd, 0O666)
54 os.fsync(fd)
55 os.close(fd)
56 """)
57
58 # Sync up client with initial MDS OSD map barrier.
59 path = os.path.join(self.mount_a.mountpoint, "foo")
60 self.mount_a.run_python(pyscript.format(path=path))
61
62 # Grab mounts' initial OSD epochs: later we will check that
63 # it hasn't advanced beyond this point.
64 mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
65
66 # Freshly mounted at start of test, should be up to date with OSD map
67 self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
68
69 # Set and unset a flag to cause OSD epoch to increment
70 self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
71 self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
72
73 out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
74 new_epoch = json.loads(out)['epoch']
75 self.assertNotEqual(self.initial_osd_epoch, new_epoch)
76
77 # Do a metadata operation on clients, witness that they end up with
78 # the old OSD map from startup time (nothing has prompted client
79 # to update its map)
80 path = os.path.join(self.mount_a.mountpoint, "foo")
81 self.mount_a.run_python(pyscript.format(path=path))
82 mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
83 self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
84 self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
85
86 # Set a barrier on the MDS
87 self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
88
89 # Sync up client with new MDS OSD map barrier
90 path = os.path.join(self.mount_a.mountpoint, "baz")
91 self.mount_a.run_python(pyscript.format(path=path))
92 mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
93 self.assertEqual(mount_a_barrier, new_epoch)
94
95 # Some time passes here because the metadata part of the operation
96 # completes immediately, while the resulting OSD map update happens
97 # asynchronously (it's an Objecter::_maybe_request_map) as a result
98 # of seeing the new epoch barrier.
99 self.wait_until_true(
100 lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
101 timeout=30)
102
103 def _data_pool_name(self):
104 data_pool_names = self.fs.get_data_pool_names()
105 if len(data_pool_names) > 1:
106 raise RuntimeError("This test can't handle multiple data pools")
107 else:
108 return data_pool_names[0]
109
110 def _test_full(self, easy_case):
111 """
112 - That a client trying to write data to a file is prevented
113 from doing so with an -EFULL result
114 - That they are also prevented from creating new files by the MDS.
115 - That they may delete another file to get the system healthy again
116
117 :param easy_case: if true, delete a successfully written file to
118 free up space. else, delete the file that experienced
119 the failed write.
120 """
121
122 osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
123
124 log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
125
126 # Fill up the cluster. This dd may or may not fail, as it depends on
127 # how soon the cluster recognises its own fullness
128 self.mount_a.write_n_mb("large_file_a", self.fill_mb / 2)
129 try:
130 self.mount_a.write_n_mb("large_file_b", self.fill_mb / 2)
131 except CommandFailedError:
132 log.info("Writing file B failed (full status happened already)")
133 assert self.is_full()
134 else:
135 log.info("Writing file B succeeded (full status will happen soon)")
136 self.wait_until_true(lambda: self.is_full(),
137 timeout=osd_mon_report_interval * 5)
138
139 # Attempting to write more data should give me ENOSPC
140 with self.assertRaises(CommandFailedError) as ar:
141 self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb / 2)
142 self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
143
144 # Wait for the MDS to see the latest OSD map so that it will reliably
145 # be applying the policy of rejecting non-deletion metadata operations
146 # while in the full state.
147 osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
148 self.wait_until_true(
149 lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
150 timeout=10)
151
152 if not self.data_only:
153 with self.assertRaises(CommandFailedError):
154 self.mount_a.write_n_mb("small_file_1", 0)
155
156 # Clear out some space
157 if easy_case:
158 self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
159 self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
160 else:
161 # In the hard case it is the file that filled the system.
162 # Before the new #7317 (ENOSPC, epoch barrier) changes, this
163 # would fail because the last objects written would be
164 # stuck in the client cache as objecter operations.
165 self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
166 self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
167
168 # Here we are waiting for two things to happen:
169 # * The MDS to purge the stray folder and execute object deletions
170 # * The OSDs to inform the mon that they are no longer full
171 self.wait_until_true(lambda: not self.is_full(),
172 timeout=osd_mon_report_interval * 5)
173
174 # Wait for the MDS to see the latest OSD map so that it will reliably
175 # be applying the free space policy
176 osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
177 self.wait_until_true(
178 lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
179 timeout=10)
180
181 # Now I should be able to write again
182 self.mount_a.write_n_mb("large_file", 50, seek=0)
183
184 # Ensure that the MDS keeps its OSD epoch barrier across a restart
185
186 def test_full_different_file(self):
187 self._test_full(True)
188
189 def test_full_same_file(self):
190 self._test_full(False)
191
192 def _remote_write_test(self, template):
193 """
194 Run some remote python in a way that's useful for
195 testing free space behaviour (see test_* methods using this)
196 """
197 file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
198
199 # Enough to trip the full flag
200 osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
201 mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
202
203 # Sufficient data to cause RADOS cluster to go 'full'
204 log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
205
206 # Long enough for RADOS cluster to notice it is full and set flag on mons
207 # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
208 # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
209 # to the OSDs)
210 full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
211
212 # Configs for this test should bring this setting down in order to
213 # run reasonably quickly
214 if osd_mon_report_interval > 10:
215 log.warn("This test may run rather slowly unless you decrease"
216 "osd_mon_report_interval (5 is a good setting)!")
217
218 self.mount_a.run_python(template.format(
219 fill_mb=self.fill_mb,
220 file_path=file_path,
221 full_wait=full_wait,
222 is_fuse=isinstance(self.mount_a, FuseMount)
223 ))
224
225 def test_full_fclose(self):
226 # A remote script which opens a file handle, fills up the filesystem, and then
227 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
228 remote_script = dedent("""
229 import time
230 import datetime
231 import subprocess
232 import os
233
234 # Write some buffered data through before going full, all should be well
235 print("writing some data through which we expect to succeed")
236 bytes = 0
237 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
238 bytes += os.write(f, b'a' * 512 * 1024)
239 os.fsync(f)
240 print("fsync'ed data successfully, will now attempt to fill fs")
241
242 # Okay, now we're going to fill up the filesystem, and then keep
243 # writing until we see an error from fsync. As long as we're doing
244 # buffered IO, the error should always only appear from fsync and not
245 # from write
246 full = False
247
248 for n in range(0, int({fill_mb} * 0.9)):
249 bytes += os.write(f, b'x' * 1024 * 1024)
250 print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
251 print("done writing {{0}} bytes".format(bytes))
252
253 # OK, now we should sneak in under the full condition
254 # due to the time it takes the OSDs to report to the
255 # mons, and get a successful fsync on our full-making data
256 os.fsync(f)
257 print("successfully fsync'ed prior to getting full state reported")
258
259 # buffered write, add more dirty data to the buffer
260 print("starting buffered write")
261 try:
262 for n in range(0, int({fill_mb} * 0.2)):
263 bytes += os.write(f, b'x' * 1024 * 1024)
264 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
265 time.sleep({full_wait})
266 except OSError:
267 pass;
268
269 print("wrote, now waiting 30s and then doing a close we expect to fail")
270
271 # Wait long enough for a background flush that should fail
272 time.sleep(30)
273
274 if {is_fuse}:
275 # ...and check that the failed background flush is reflected in fclose
276 try:
277 os.close(f)
278 except OSError:
279 print("close() returned an error as expected")
280 else:
281 raise RuntimeError("close() failed to raise error")
282 else:
283 # The kernel cephfs client does not raise errors on fclose
284 os.close(f)
285
286 os.unlink("{file_path}")
287 """)
288 self._remote_write_test(remote_script)
289
290 def test_full_fsync(self):
291 """
292 That when the full flag is encountered during asynchronous
293 flushes, such that an fwrite() succeeds but an fsync/fclose()
294 should return the ENOSPC error.
295 """
296
297 # A remote script which opens a file handle, fills up the filesystem, and then
298 # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
299 remote_script = dedent("""
300 import time
301 import datetime
302 import subprocess
303 import os
304
305 # Write some buffered data through before going full, all should be well
306 print("writing some data through which we expect to succeed")
307 bytes = 0
308 f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
309 bytes += os.write(f, b'a' * 4096)
310 os.fsync(f)
311 print("fsync'ed data successfully, will now attempt to fill fs")
312
313 # Okay, now we're going to fill up the filesystem, and then keep
314 # writing until we see an error from fsync. As long as we're doing
315 # buffered IO, the error should always only appear from fsync and not
316 # from write
317 full = False
318
319 for n in range(0, int({fill_mb} * 1.1)):
320 try:
321 bytes += os.write(f, b'x' * 1024 * 1024)
322 print("wrote bytes via buffered write, moving on to fsync")
323 except OSError as e:
324 print("Unexpected error %s from write() instead of fsync()" % e)
325 raise
326
327 try:
328 os.fsync(f)
329 print("fsync'ed successfully")
330 except OSError as e:
331 print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
332 full = True
333 break
334 else:
335 print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
336
337 if n > {fill_mb} * 0.9:
338 # Be cautious in the last region where we expect to hit
339 # the full condition, so that we don't overshoot too dramatically
340 print("sleeping a bit as we've exceeded 90% of our expected full ratio")
341 time.sleep({full_wait})
342
343 if not full:
344 raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
345
346 # close() should not raise an error because we already caught it in
347 # fsync. There shouldn't have been any more writeback errors
348 # since then because all IOs got cancelled on the full flag.
349 print("calling close")
350 os.close(f)
351 print("close() did not raise error")
352
353 os.unlink("{file_path}")
354 """)
355
356 self._remote_write_test(remote_script)
357
358
359 class TestQuotaFull(FullnessTestCase):
360 """
361 Test per-pool fullness, which indicates quota limits exceeded
362 """
363 pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
364 fill_mb = pool_capacity / (1024 * 1024) # type: ignore
365
366 # We are only testing quota handling on the data pool, not the metadata
367 # pool.
368 data_only = True
369
370 def setUp(self):
371 super(TestQuotaFull, self).setUp()
372
373 pool_name = self.fs.get_data_pool_name()
374 self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
375 "max_bytes", "{0}".format(self.pool_capacity))
376
377 def is_full(self):
378 return self.fs.is_full()
379
380
381 class TestClusterFull(FullnessTestCase):
382 """
383 Test data pool fullness, which indicates that an OSD has become too full
384 """
385 pool_capacity = None
386 REQUIRE_MEMSTORE = True
387
388 def setUp(self):
389 super(TestClusterFull, self).setUp()
390
391 if self.pool_capacity is None:
392 max_avail = self.fs.get_pool_df(self._data_pool_name())['max_avail']
393 full_ratio = float(self.fs.get_config("mon_osd_full_ratio", service_type="mon"))
394 TestClusterFull.pool_capacity = int(max_avail * full_ratio)
395 TestClusterFull.fill_mb = (self.pool_capacity / (1024 * 1024))
396
397 def is_full(self):
398 return self.fs.is_full()
399
400 # Hide the parent class so that unittest.loader doesn't try to run it.
401 del globals()['FullnessTestCase']