]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/src/core/file.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / seastar / src / core / file.cc
CommitLineData
9f95a23c
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright 2019 ScyllaDB
20 */
21
22#define __user /* empty */ // for xfs includes, below
23
24#include <sys/syscall.h>
25#include <dirent.h>
26#include <linux/types.h> // for xfs, below
20effc67 27#include <linux/fs.h> // BLKBSZGET
9f95a23c 28#include <sys/ioctl.h>
20effc67
TL
29#include <unistd.h>
30#include <fcntl.h>
9f95a23c
TL
31#include <xfs/linux.h>
32#define min min /* prevent xfs.h from defining min() as a macro */
33#include <xfs/xfs.h>
34#undef min
35#include <boost/range/numeric.hpp>
36#include <boost/range/adaptor/transformed.hpp>
37#include <seastar/core/reactor.hh>
38#include <seastar/core/file.hh>
39#include <seastar/core/report_exception.hh>
40#include <seastar/core/linux-aio.hh>
f67539c2 41#include <seastar/util/later.hh>
20effc67 42#include <seastar/core/io_queue.hh>
9f95a23c
TL
43#include "core/file-impl.hh"
44#include "core/syscall_result.hh"
45#include "core/thread_pool.hh"
46#include "core/uname.hh"
20effc67 47#include "seastar/core/internal/read_state.hh"
9f95a23c
TL
48
49namespace seastar {
50
f67539c2
TL
51static_assert(std::is_nothrow_copy_constructible_v<io_priority_class>);
52static_assert(std::is_nothrow_move_constructible_v<io_priority_class>);
53
20effc67
TL
54namespace internal {
55
56struct fs_info {
57 uint32_t block_size;
58 bool append_challenged;
59 unsigned append_concurrency;
60 bool fsync_is_exclusive;
61 bool nowait_works;
62 std::optional<dioattr> dioinfo;
63};
64
65};
66
9f95a23c
TL
67using namespace internal;
68using namespace internal::linux_abi;
69
70file_handle::file_handle(const file_handle& x)
71 : _impl(x._impl ? x._impl->clone() : std::unique_ptr<file_handle_impl>()) {
72}
73
74file_handle::file_handle(file_handle&& x) noexcept = default;
75
76file_handle&
77file_handle::operator=(const file_handle& x) {
78 return operator=(file_handle(x));
79}
80
81file_handle&
82file_handle::operator=(file_handle&&) noexcept = default;
83
84file
85file_handle::to_file() const & {
86 return file_handle(*this).to_file();
87}
88
89file
90file_handle::to_file() && {
91 return file(std::move(*_impl).to_file());
92}
93
20effc67 94posix_file_impl::posix_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, bool nowait_works)
f67539c2 95 : _device_id(device_id)
20effc67
TL
96 , _nowait_works(nowait_works)
97 , _io_queue(engine().get_io_queue(_device_id))
9f95a23c
TL
98 , _open_flags(f)
99 , _fd(fd)
100{
20effc67
TL
101 configure_io_lengths();
102}
103
104posix_file_impl::posix_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, const internal::fs_info& fsi)
105 : posix_file_impl(fd, f, options, device_id, fsi.nowait_works)
106{
107 configure_dma_alignment(fsi);
9f95a23c
TL
108}
109
110posix_file_impl::~posix_file_impl() {
111 if (_refcount && _refcount->fetch_add(-1, std::memory_order_relaxed) != 1) {
112 return;
113 }
114 delete _refcount;
115 if (_fd != -1) {
116 // Note: close() can be a blocking operation on NFS
117 ::close(_fd);
118 }
119}
120
121void
20effc67
TL
122posix_file_impl::configure_dma_alignment(const internal::fs_info& fsi) {
123 if (fsi.dioinfo) {
124 const dioattr& da = *fsi.dioinfo;
9f95a23c
TL
125 _memory_dma_alignment = da.d_mem;
126 _disk_read_dma_alignment = da.d_miniosz;
127 // xfs wants at least the block size for writes
128 // FIXME: really read the block size
20effc67
TL
129 _disk_write_dma_alignment = std::max<unsigned>(da.d_miniosz, fsi.block_size);
130 static bool xfs_with_relaxed_overwrite_alignment = kernel_uname().whitelisted({"5.12"});
131 _disk_overwrite_dma_alignment = xfs_with_relaxed_overwrite_alignment ? da.d_miniosz : _disk_write_dma_alignment;
9f95a23c
TL
132 }
133}
134
20effc67
TL
135void posix_file_impl::configure_io_lengths() noexcept {
136 auto limits = _io_queue.get_request_limits();
137 _read_max_length = std::min<size_t>(_read_max_length, limits.max_read);
138 _write_max_length = std::min<size_t>(_write_max_length, limits.max_write);
139}
140
9f95a23c
TL
141std::unique_ptr<seastar::file_handle_impl>
142posix_file_impl::dup() {
143 if (!_refcount) {
144 _refcount = new std::atomic<unsigned>(1u);
145 }
20effc67
TL
146 auto ret = std::make_unique<posix_file_handle_impl>(_fd, _open_flags, _refcount, _device_id,
147 _memory_dma_alignment, _disk_read_dma_alignment, _disk_write_dma_alignment, _disk_overwrite_dma_alignment,
148 _nowait_works);
9f95a23c
TL
149 _refcount->fetch_add(1, std::memory_order_relaxed);
150 return ret;
151}
152
20effc67
TL
153posix_file_impl::posix_file_impl(int fd, open_flags f, std::atomic<unsigned>* refcount, dev_t device_id,
154 uint32_t memory_dma_alignment,
155 uint32_t disk_read_dma_alignment,
156 uint32_t disk_write_dma_alignment,
157 uint32_t disk_overwrite_dma_alignment,
158 bool nowait_works)
f67539c2
TL
159 : _refcount(refcount)
160 , _device_id(device_id)
20effc67
TL
161 , _nowait_works(nowait_works)
162 , _io_queue(engine().get_io_queue(_device_id))
f67539c2
TL
163 , _open_flags(f)
164 , _fd(fd) {
20effc67
TL
165 _memory_dma_alignment = memory_dma_alignment;
166 _disk_read_dma_alignment = disk_read_dma_alignment;
167 _disk_write_dma_alignment = disk_write_dma_alignment;
168 _disk_overwrite_dma_alignment = disk_overwrite_dma_alignment;
169 configure_io_lengths();
9f95a23c
TL
170}
171
172future<>
f67539c2 173posix_file_impl::flush(void) noexcept {
9f95a23c
TL
174 if ((_open_flags & open_flags::dsync) != open_flags{}) {
175 return make_ready_future<>();
176 }
177 return engine().fdatasync(_fd);
178}
179
180future<struct stat>
f67539c2
TL
181posix_file_impl::stat() noexcept {
182 return engine().fstat(_fd);
9f95a23c
TL
183}
184
185future<>
f67539c2 186posix_file_impl::truncate(uint64_t length) noexcept {
9f95a23c
TL
187 return engine()._thread_pool->submit<syscall_result<int>>([this, length] {
188 return wrap_syscall<int>(::ftruncate(_fd, length));
189 }).then([] (syscall_result<int> sr) {
190 sr.throw_if_error();
191 return make_ready_future<>();
192 });
193}
194
20effc67
TL
195future<int>
196posix_file_impl::ioctl(uint64_t cmd, void* argp) noexcept {
197 return engine()._thread_pool->submit<syscall_result<int>>([this, cmd, argp] () mutable {
198 return wrap_syscall<int>(::ioctl(_fd, cmd, argp));
199 }).then([] (syscall_result<int> sr) {
200 sr.throw_if_error();
201 // Some ioctls require to return a positive integer back.
202 return make_ready_future<int>(sr.result);
203 });
204}
205
206future<int>
207posix_file_impl::ioctl_short(uint64_t cmd, void* argp) noexcept {
208 int ret = ::ioctl(_fd, cmd, argp);
209 if (ret == -1) {
210 return make_exception_future<int>(
211 std::system_error(errno, std::system_category(), "ioctl failed"));
212 }
213 return make_ready_future<int>(ret);
214}
215
216future<int>
217posix_file_impl::fcntl(int op, uintptr_t arg) noexcept {
218 return engine()._thread_pool->submit<syscall_result<int>>([this, op, arg] () mutable {
219 return wrap_syscall<int>(::fcntl(_fd, op, arg));
220 }).then([] (syscall_result<int> sr) {
221 sr.throw_if_error();
222 // Some fcntls require to return a positive integer back.
223 return make_ready_future<int>(sr.result);
224 });
225}
226
227future<int>
228posix_file_impl::fcntl_short(int op, uintptr_t arg) noexcept {
229 int ret = ::fcntl(_fd, op, arg);
230 if (ret == -1) {
231 return make_exception_future<int>(
232 std::system_error(errno, std::system_category(), "fcntl failed"));
233 }
234 return make_ready_future<int>(ret);
235}
236
9f95a23c 237future<>
f67539c2 238posix_file_impl::discard(uint64_t offset, uint64_t length) noexcept {
9f95a23c
TL
239 return engine()._thread_pool->submit<syscall_result<int>>([this, offset, length] () mutable {
240 return wrap_syscall<int>(::fallocate(_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
241 offset, length));
242 }).then([] (syscall_result<int> sr) {
243 sr.throw_if_error();
244 return make_ready_future<>();
245 });
246}
247
248future<>
f67539c2 249posix_file_impl::allocate(uint64_t position, uint64_t length) noexcept {
9f95a23c
TL
250#ifdef FALLOC_FL_ZERO_RANGE
251 // FALLOC_FL_ZERO_RANGE is fairly new, so don't fail if it's not supported.
252 static bool supported = true;
253 if (!supported) {
254 return make_ready_future<>();
255 }
256 return engine()._thread_pool->submit<syscall_result<int>>([this, position, length] () mutable {
257 auto ret = ::fallocate(_fd, FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE, position, length);
258 if (ret == -1 && errno == EOPNOTSUPP) {
259 ret = 0;
260 supported = false; // Racy, but harmless. At most we issue an extra call or two.
261 }
262 return wrap_syscall<int>(ret);
263 }).then([] (syscall_result<int> sr) {
264 sr.throw_if_error();
265 return make_ready_future<>();
266 });
267#else
268 return make_ready_future<>();
269#endif
270}
271
272future<uint64_t>
f67539c2 273posix_file_impl::size() noexcept {
9f95a23c
TL
274 auto r = ::lseek(_fd, 0, SEEK_END);
275 if (r == -1) {
276 return make_exception_future<uint64_t>(std::system_error(errno, std::system_category()));
277 }
278 return make_ready_future<uint64_t>(r);
279}
280
281future<>
282posix_file_impl::close() noexcept {
283 if (_fd == -1) {
284 seastar_logger.warn("double close() detected, contact support");
285 return make_ready_future<>();
286 }
287 auto fd = _fd;
288 _fd = -1; // Prevent a concurrent close (which is illegal) from closing another file's fd
289 if (_refcount && _refcount->fetch_add(-1, std::memory_order_relaxed) != 1) {
290 _refcount = nullptr;
291 return make_ready_future<>();
292 }
293 delete _refcount;
294 _refcount = nullptr;
295 auto closed = [fd] () noexcept {
296 try {
297 return engine()._thread_pool->submit<syscall_result<int>>([fd] {
298 return wrap_syscall<int>(::close(fd));
299 });
300 } catch (...) {
301 report_exception("Running ::close() in reactor thread, submission failed with exception", std::current_exception());
302 return make_ready_future<syscall_result<int>>(wrap_syscall<int>(::close(fd)));
303 }
304 }();
305 return closed.then([] (syscall_result<int> sr) {
20effc67 306 try {
9f95a23c 307 sr.throw_if_error();
20effc67
TL
308 } catch (...) {
309 report_exception("close() syscall failed", std::current_exception());
310 }
9f95a23c
TL
311 });
312}
313
314future<uint64_t>
f67539c2 315blockdev_file_impl::size(void) noexcept {
9f95a23c
TL
316 return engine()._thread_pool->submit<syscall_result_extra<size_t>>([this] {
317 uint64_t size;
318 int ret = ::ioctl(_fd, BLKGETSIZE64, &size);
319 return wrap_syscall(ret, size);
320 }).then([] (syscall_result_extra<uint64_t> ret) {
321 ret.throw_if_error();
322 return make_ready_future<uint64_t>(ret.extra);
323 });
324}
325
326subscription<directory_entry>
327posix_file_impl::list_directory(std::function<future<> (directory_entry de)> next) {
328 static constexpr size_t buffer_size = 8192;
329 struct work {
330 stream<directory_entry> s;
331 unsigned current = 0;
332 unsigned total = 0;
333 bool eof = false;
334 int error = 0;
335 char buffer[buffer_size];
336 };
337
338 // While it would be natural to use fdopendir()/readdir(),
339 // our syscall thread pool doesn't support malloc(), which is
340 // required for this to work. So resort to using getdents()
341 // instead.
342
343 // From getdents(2):
344 struct linux_dirent64 {
345 ino64_t d_ino; /* 64-bit inode number */
346 off64_t d_off; /* 64-bit offset to next structure */
347 unsigned short d_reclen; /* Size of this dirent */
348 unsigned char d_type; /* File type */
349 char d_name[]; /* Filename (null-terminated) */
350 };
351
352 auto w = make_lw_shared<work>();
353 auto ret = w->s.listen(std::move(next));
354 // List the directory asynchronously in the background.
355 // Caller synchronizes using the returned subscription.
356 (void)w->s.started().then([w, this] {
357 auto eofcond = [w] { return w->eof; };
358 return do_until(eofcond, [w, this] {
359 if (w->current == w->total) {
360 return engine()._thread_pool->submit<syscall_result<long>>([w , this] () {
361 auto ret = ::syscall(__NR_getdents64, _fd, reinterpret_cast<linux_dirent64*>(w->buffer), buffer_size);
362 return wrap_syscall(ret);
363 }).then([w] (syscall_result<long> ret) {
364 ret.throw_if_error();
365 if (ret.result == 0) {
366 w->eof = true;
367 } else {
368 w->current = 0;
369 w->total = ret.result;
370 }
371 });
372 }
373 auto start = w->buffer + w->current;
374 auto de = reinterpret_cast<linux_dirent64*>(start);
f67539c2 375 std::optional<directory_entry_type> type;
9f95a23c
TL
376 switch (de->d_type) {
377 case DT_BLK:
378 type = directory_entry_type::block_device;
379 break;
380 case DT_CHR:
381 type = directory_entry_type::char_device;
382 break;
383 case DT_DIR:
384 type = directory_entry_type::directory;
385 break;
386 case DT_FIFO:
387 type = directory_entry_type::fifo;
388 break;
389 case DT_REG:
390 type = directory_entry_type::regular;
391 break;
392 case DT_LNK:
393 type = directory_entry_type::link;
394 break;
395 case DT_SOCK:
396 type = directory_entry_type::socket;
397 break;
398 default:
399 // unknown, ignore
400 ;
401 }
402 w->current += de->d_reclen;
403 sstring name = de->d_name;
404 if (name == "." || name == "..") {
405 return make_ready_future<>();
406 }
407 return w->s.produce({std::move(name), type});
408 });
409 }).then([w] {
410 w->s.close();
411 }).handle_exception([] (std::exception_ptr ignored) {});
412 return ret;
413}
414
415future<size_t>
20effc67
TL
416posix_file_impl::do_write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& io_priority_class, io_intent* intent) noexcept {
417 auto req = internal::io_request::make_write(_fd, pos, buffer, len, _nowait_works);
418 return engine().submit_io_write(&_io_queue, io_priority_class, len, std::move(req), intent);
9f95a23c
TL
419}
420
421future<size_t>
20effc67 422posix_file_impl::do_write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& io_priority_class, io_intent* intent) noexcept {
9f95a23c 423 auto len = internal::sanitize_iovecs(iov, _disk_write_dma_alignment);
20effc67
TL
424 auto req = internal::io_request::make_writev(_fd, pos, iov, _nowait_works);
425 return engine().submit_io_write(&_io_queue, io_priority_class, len, std::move(req), intent).finally([iov = std::move(iov)] () {});
9f95a23c
TL
426}
427
428future<size_t>
20effc67
TL
429posix_file_impl::do_read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& io_priority_class, io_intent* intent) noexcept {
430 auto req = internal::io_request::make_read(_fd, pos, buffer, len, _nowait_works);
431 return engine().submit_io_read(&_io_queue, io_priority_class, len, std::move(req), intent);
9f95a23c
TL
432}
433
434future<size_t>
20effc67 435posix_file_impl::do_read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& io_priority_class, io_intent* intent) noexcept {
9f95a23c 436 auto len = internal::sanitize_iovecs(iov, _disk_read_dma_alignment);
20effc67
TL
437 auto req = internal::io_request::make_readv(_fd, pos, iov, _nowait_works);
438 return engine().submit_io_read(&_io_queue, io_priority_class, len, std::move(req), intent).finally([iov = std::move(iov)] () {});
439}
440
441future<size_t>
442posix_file_real_impl::write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
443 return posix_file_impl::do_write_dma(pos, buffer, len, pc, intent);
444}
445
446future<size_t>
447posix_file_real_impl::write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
448 return posix_file_impl::do_write_dma(pos, std::move(iov), pc, intent);
449}
450
451future<size_t>
452posix_file_real_impl::read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
453 return posix_file_impl::do_read_dma(pos, buffer, len, pc, intent);
454}
455
456future<size_t>
457posix_file_real_impl::read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
458 return posix_file_impl::do_read_dma(pos, std::move(iov), pc, intent);
9f95a23c
TL
459}
460
461future<temporary_buffer<uint8_t>>
20effc67
TL
462posix_file_real_impl::dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept {
463 return posix_file_impl::do_dma_read_bulk(offset, range_size, pc, intent);
464}
465
466future<temporary_buffer<uint8_t>>
467posix_file_impl::do_dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept {
468 using tmp_buf_type = typename internal::file_read_state<uint8_t>::tmp_buf_type;
9f95a23c 469
f67539c2 470 try {
9f95a23c
TL
471 auto front = offset & (_disk_read_dma_alignment - 1);
472 offset -= front;
473 range_size += front;
474
20effc67 475 auto rstate = make_lw_shared<internal::file_read_state<uint8_t>>(offset, front,
9f95a23c
TL
476 range_size,
477 _memory_dma_alignment,
20effc67
TL
478 _disk_read_dma_alignment,
479 intent);
9f95a23c
TL
480
481 //
482 // First, try to read directly into the buffer. Most of the reads will
483 // end here.
484 //
485 auto read = read_dma(offset, rstate->buf.get_write(),
20effc67 486 rstate->buf.size(), pc, intent);
9f95a23c
TL
487
488 return read.then([rstate, this, &pc] (size_t size) mutable {
489 rstate->pos = size;
490
491 //
492 // If we haven't read all required data at once -
493 // start read-copy sequence. We can't continue with direct reads
494 // into the previously allocated buffer here since we have to ensure
495 // the aligned read length and thus the aligned destination buffer
496 // size.
497 //
498 // The copying will actually take place only if there was a HW glitch.
499 // In EOF case or in case of a persistent I/O error the only overhead is
500 // an extra allocation.
501 //
502 return do_until(
503 [rstate] { return rstate->done(); },
504 [rstate, this, &pc] () mutable {
505 return read_maybe_eof(
20effc67 506 rstate->cur_offset(), rstate->left_to_read(), pc, rstate->get_intent()).then(
9f95a23c
TL
507 [rstate] (auto buf1) mutable {
508 if (buf1.size()) {
509 rstate->append_new_data(buf1);
510 } else {
511 rstate->eof = true;
512 }
513
514 return make_ready_future<>();
515 });
516 }).then([rstate] () mutable {
517 //
518 // If we are here we are promised to have read some bytes beyond
519 // "front" so we may trim straight away.
520 //
521 rstate->trim_buf_before_ret();
522 return make_ready_future<tmp_buf_type>(std::move(rstate->buf));
523 });
524 });
f67539c2
TL
525 } catch (...) {
526 return make_exception_future<tmp_buf_type>(std::current_exception());
527 }
9f95a23c
TL
528}
529
530future<temporary_buffer<uint8_t>>
20effc67 531posix_file_impl::read_maybe_eof(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) {
9f95a23c
TL
532 //
533 // We have to allocate a new aligned buffer to make sure we don't get
534 // an EINVAL error due to unaligned destination buffer.
535 //
536 temporary_buffer<uint8_t> buf = temporary_buffer<uint8_t>::aligned(
537 _memory_dma_alignment, align_up(len, size_t(_disk_read_dma_alignment)));
538
539 // try to read a single bulk from the given position
540 auto dst = buf.get_write();
541 auto buf_size = buf.size();
20effc67 542 return read_dma(pos, dst, buf_size, pc, intent).then_wrapped(
9f95a23c
TL
543 [buf = std::move(buf)](future<size_t> f) mutable {
544 try {
f67539c2 545 size_t size = f.get0();
9f95a23c
TL
546
547 buf.trim(size);
548
549 return std::move(buf);
550 } catch (std::system_error& e) {
551 //
552 // TODO: implement a non-trowing file_impl::dma_read() interface to
553 // avoid the exceptions throwing in a good flow completely.
554 // Otherwise for users that don't want to care about the
555 // underlying file size and preventing the attempts to read
556 // bytes beyond EOF there will always be at least one
557 // exception throwing at the file end for files with unaligned
558 // length.
559 //
560 if (e.code().value() == EINVAL) {
561 buf.trim(0);
562 return std::move(buf);
563 } else {
564 throw;
565 }
566 }
567 });
568}
569
20effc67
TL
570static bool blockdev_nowait_works = kernel_uname().whitelisted({"4.13"});
571
572blockdev_file_impl::blockdev_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, size_t block_size)
573 : posix_file_impl(fd, f, options, device_id, blockdev_nowait_works) {
574 // FIXME -- configure file_impl::_..._dma_alignment's from block_size
9f95a23c
TL
575}
576
577future<>
f67539c2 578blockdev_file_impl::truncate(uint64_t length) noexcept {
9f95a23c
TL
579 return make_ready_future<>();
580}
581
582future<>
f67539c2 583blockdev_file_impl::discard(uint64_t offset, uint64_t length) noexcept {
9f95a23c
TL
584 return engine()._thread_pool->submit<syscall_result<int>>([this, offset, length] () mutable {
585 uint64_t range[2] { offset, length };
586 return wrap_syscall<int>(::ioctl(_fd, BLKDISCARD, &range));
587 }).then([] (syscall_result<int> sr) {
588 sr.throw_if_error();
589 return make_ready_future<>();
590 });
591}
592
593future<>
f67539c2 594blockdev_file_impl::allocate(uint64_t position, uint64_t length) noexcept {
9f95a23c
TL
595 // nothing to do for block device
596 return make_ready_future<>();
597}
598
20effc67
TL
599future<size_t>
600blockdev_file_impl::write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
601 return posix_file_impl::do_write_dma(pos, buffer, len, pc, intent);
602}
603
604future<size_t>
605blockdev_file_impl::write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
606 return posix_file_impl::do_write_dma(pos, std::move(iov), pc, intent);
607}
608
609future<size_t>
610blockdev_file_impl::read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
611 return posix_file_impl::do_read_dma(pos, buffer, len, pc, intent);
612}
613
614future<size_t>
615blockdev_file_impl::read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
616 return posix_file_impl::do_read_dma(pos, std::move(iov), pc, intent);
617}
618
619future<temporary_buffer<uint8_t>>
620blockdev_file_impl::dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept {
621 return posix_file_impl::do_dma_read_bulk(offset, range_size, pc, intent);
622}
623
624append_challenged_posix_file_impl::append_challenged_posix_file_impl(int fd, open_flags f, file_open_options options, const fs_info& fsi, dev_t device_id)
625 : posix_file_impl(fd, f, options, device_id, fsi)
626 , _max_size_changing_ops(fsi.append_concurrency)
627 , _fsync_is_exclusive(fsi.fsync_is_exclusive)
628 , _sloppy_size(options.sloppy_size)
629 , _sloppy_size_hint(align_up<uint64_t>(options.sloppy_size_hint, _disk_write_dma_alignment))
630{
9f95a23c
TL
631 auto r = ::lseek(fd, 0, SEEK_END);
632 throw_system_error_on(r == -1);
633 _committed_size = _logical_size = r;
9f95a23c
TL
634}
635
636append_challenged_posix_file_impl::~append_challenged_posix_file_impl() {
637 // If the file has not been closed we risk having running tasks
638 // that will try to access freed memory.
f67539c2
TL
639 //
640 // It is safe to destory it if nothing is queued.
641 // Note that posix_file_impl::~posix_file_impl auto-closes the file descriptor.
20effc67 642 assert(_q.empty() && (_logical_size == _committed_size || _closing_state == state::closed));
9f95a23c
TL
643}
644
645bool
646append_challenged_posix_file_impl::must_run_alone(const op& candidate) const noexcept {
647 // checks if candidate is a non-write, size-changing operation.
648 return (candidate.type == opcode::truncate)
649 || (candidate.type == opcode::flush && (_fsync_is_exclusive || _sloppy_size));
650}
651
20effc67
TL
652bool
653append_challenged_posix_file_impl::appending_write(const op& candidate) const noexcept {
654 return (candidate.type == opcode::write) &&
655 (candidate.pos + candidate.len > _committed_size);
656}
657
9f95a23c
TL
658bool
659append_challenged_posix_file_impl::size_changing(const op& candidate) const noexcept {
20effc67 660 return appending_write(candidate) || must_run_alone(candidate);
9f95a23c
TL
661}
662
663bool
664append_challenged_posix_file_impl::may_dispatch(const op& candidate) const noexcept {
665 if (size_changing(candidate)) {
666 return !_current_size_changing_ops && !_current_non_size_changing_ops;
667 } else {
668 return !_current_size_changing_ops;
669 }
670}
671
672void
673append_challenged_posix_file_impl::dispatch(op& candidate) noexcept {
674 unsigned* op_counter = size_changing(candidate)
675 ? &_current_size_changing_ops : &_current_non_size_changing_ops;
676 ++*op_counter;
677 // FIXME: future is discarded
678 (void)candidate.run().then([me = shared_from_this(), op_counter] {
679 --*op_counter;
680 me->process_queue();
681 });
682}
683
20effc67
TL
684int append_challenged_posix_file_impl::truncate_sync(uint64_t length) noexcept {
685 int r = ::ftruncate(_fd, length);
686 if (r != -1) {
687 _committed_size = length;
688 }
689 return r;
690}
691
692void append_challenged_posix_file_impl::truncate_to_logical_size() {
693 auto r = truncate_sync(_logical_size);
694 if (r == -1) {
695 throw std::system_error(errno, std::system_category(), "truncate");
696 }
697}
698
9f95a23c
TL
699// If we have a bunch of size-extending writes in the queue,
700// issue an ftruncate() extending the file size, so they can
701// be issued concurrently.
702void
703append_challenged_posix_file_impl::optimize_queue() noexcept {
704 if (_current_non_size_changing_ops || _current_size_changing_ops) {
705 // Can't issue an ftruncate() if something is going on
706 return;
707 }
708 auto speculative_size = _committed_size;
709 unsigned n_appending_writes = 0;
710 for (const auto& op : _q) {
711 // stop calculating speculative size after a non-write, size-changing
712 // operation is found to prevent an useless truncate from being issued.
713 if (must_run_alone(op)) {
714 break;
715 }
20effc67
TL
716
717 if (appending_write(op)) {
9f95a23c
TL
718 speculative_size = std::max(speculative_size, op.pos + op.len);
719 ++n_appending_writes;
720 }
721 }
722 if (n_appending_writes > _max_size_changing_ops
723 || (n_appending_writes && _sloppy_size)) {
20effc67
TL
724 if (_sloppy_size) {
725 if (!_committed_size) {
726 speculative_size = std::max(speculative_size, _sloppy_size_hint);
727 } else if (speculative_size < 2 * _committed_size) {
9f95a23c 728 speculative_size = align_up<uint64_t>(2 * _committed_size, _disk_write_dma_alignment);
20effc67 729 }
9f95a23c
TL
730 }
731 // We're all alone, so issuing the ftruncate() in the reactor
732 // thread won't block us.
733 //
734 // Issuing it in the syscall thread is too slow; this can happen
735 // every several ops, and the syscall thread latency can be very
736 // high.
20effc67
TL
737
738 truncate_sync(speculative_size);
739 // If we failed, the next write will pick it up.
9f95a23c
TL
740 }
741}
742
743void
744append_challenged_posix_file_impl::process_queue() noexcept {
745 optimize_queue();
746 while (!_q.empty() && may_dispatch(_q.front())) {
747 op candidate = std::move(_q.front());
748 _q.pop_front();
749 dispatch(candidate);
750 }
751 if (may_quit()) {
752 _completed.set_value();
753 _closing_state = state::closing; // prevents _completed to be signaled again in case of recursion
754 }
755}
756
757void
f67539c2 758append_challenged_posix_file_impl::enqueue_op(op&& op) {
9f95a23c
TL
759 _q.push_back(std::move(op));
760 process_queue();
761}
762
763bool
764append_challenged_posix_file_impl::may_quit() const noexcept {
765 return _closing_state == state::draining && _q.empty() && !_current_non_size_changing_ops &&
766 !_current_size_changing_ops;
767}
768
769void
770append_challenged_posix_file_impl::commit_size(uint64_t size) noexcept {
771 _committed_size = std::max(size, _committed_size);
772 _logical_size = std::max(size, _logical_size);
773}
774
775future<size_t>
20effc67 776append_challenged_posix_file_impl::read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
9f95a23c
TL
777 if (pos >= _logical_size) {
778 // later() avoids tail recursion
779 return later().then([] {
780 return size_t(0);
781 });
782 }
783 len = std::min(pos + len, align_up<uint64_t>(_logical_size, _disk_read_dma_alignment)) - pos;
20effc67 784 internal::intent_reference iref(intent);
f67539c2 785 return enqueue<size_t>(
9f95a23c
TL
786 opcode::read,
787 pos,
788 len,
20effc67
TL
789 [this, pos, buffer, len, &pc, iref = std::move(iref)] () mutable {
790 return posix_file_impl::do_read_dma(pos, buffer, len, pc, iref.retrieve());
9f95a23c 791 }
f67539c2 792 );
9f95a23c
TL
793}
794
795future<size_t>
20effc67 796append_challenged_posix_file_impl::read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
9f95a23c
TL
797 if (pos >= _logical_size) {
798 // later() avoids tail recursion
799 return later().then([] {
800 return size_t(0);
801 });
802 }
803 size_t len = 0;
804 auto i = iov.begin();
f67539c2
TL
805 auto aligned_logical_size = align_up<uint64_t>(_logical_size, _disk_read_dma_alignment);
806 while (i != iov.end() && pos + len + i->iov_len <= aligned_logical_size) {
9f95a23c
TL
807 len += i++->iov_len;
808 }
9f95a23c
TL
809 if (i != iov.end()) {
810 auto last_len = pos + len + i->iov_len - aligned_logical_size;
811 if (last_len) {
812 i++->iov_len = last_len;
813 }
814 iov.erase(i, iov.end());
815 }
20effc67 816 internal::intent_reference iref(intent);
f67539c2 817 return enqueue<size_t>(
9f95a23c
TL
818 opcode::read,
819 pos,
820 len,
20effc67
TL
821 [this, pos, iov = std::move(iov), &pc, iref = std::move(iref)] () mutable {
822 return posix_file_impl::do_read_dma(pos, std::move(iov), pc, iref.retrieve());
9f95a23c 823 }
f67539c2 824 );
9f95a23c
TL
825}
826
827future<size_t>
20effc67
TL
828append_challenged_posix_file_impl::write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
829 internal::intent_reference iref(intent);
f67539c2 830 return enqueue<size_t>(
9f95a23c
TL
831 opcode::write,
832 pos,
833 len,
20effc67
TL
834 [this, pos, buffer, len, &pc, iref = std::move(iref)] {
835 return posix_file_impl::do_write_dma(pos, buffer, len, pc, iref.retrieve()).then([this, pos] (size_t ret) {
f67539c2
TL
836 commit_size(pos + ret);
837 return make_ready_future<size_t>(ret);
9f95a23c
TL
838 });
839 }
f67539c2 840 );
9f95a23c
TL
841}
842
843future<size_t>
20effc67 844append_challenged_posix_file_impl::write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
9f95a23c 845 auto len = boost::accumulate(iov | boost::adaptors::transformed(std::mem_fn(&iovec::iov_len)), size_t(0));
20effc67 846 internal::intent_reference iref(intent);
f67539c2 847 return enqueue<size_t>(
9f95a23c
TL
848 opcode::write,
849 pos,
850 len,
20effc67
TL
851 [this, pos, iov = std::move(iov), &pc, iref = std::move(iref)] () mutable {
852 return posix_file_impl::do_write_dma(pos, std::move(iov), pc, iref.retrieve()).then([this, pos] (size_t ret) {
f67539c2
TL
853 commit_size(pos + ret);
854 return make_ready_future<size_t>(ret);
9f95a23c
TL
855 });
856 }
f67539c2 857 );
9f95a23c
TL
858}
859
20effc67
TL
860future<temporary_buffer<uint8_t>>
861append_challenged_posix_file_impl::dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept {
862 return posix_file_impl::do_dma_read_bulk(offset, range_size, pc, intent);
863}
864
9f95a23c 865future<>
f67539c2 866append_challenged_posix_file_impl::flush() noexcept {
9f95a23c
TL
867 if ((!_sloppy_size || _logical_size == _committed_size) && !_fsync_is_exclusive) {
868 // FIXME: determine if flush can block concurrent reads or writes
869 return posix_file_impl::flush();
870 } else {
f67539c2 871 return enqueue<>(
9f95a23c
TL
872 opcode::flush,
873 0,
874 0,
f67539c2
TL
875 [this] () {
876 if (_logical_size != _committed_size) {
877 // We're all alone, so can truncate in reactor thread
20effc67 878 truncate_to_logical_size();
f67539c2
TL
879 }
880 return posix_file_impl::flush();
9f95a23c 881 }
f67539c2 882 );
9f95a23c
TL
883 }
884}
885
886future<struct stat>
f67539c2 887append_challenged_posix_file_impl::stat() noexcept {
9f95a23c
TL
888 // FIXME: can this conflict with anything?
889 return posix_file_impl::stat().then([this] (struct stat stat) {
890 stat.st_size = _logical_size;
891 return stat;
892 });
893}
894
895future<>
f67539c2
TL
896append_challenged_posix_file_impl::truncate(uint64_t length) noexcept {
897 return enqueue<>(
9f95a23c
TL
898 opcode::truncate,
899 length,
900 0,
f67539c2
TL
901 [this, length] () mutable {
902 return posix_file_impl::truncate(length).then([this, length] () mutable {
903 _committed_size = _logical_size = length;
9f95a23c
TL
904 });
905 }
f67539c2 906 );
9f95a23c
TL
907}
908
909future<uint64_t>
f67539c2 910append_challenged_posix_file_impl::size() noexcept {
9f95a23c
TL
911 return make_ready_future<size_t>(_logical_size);
912}
913
914future<>
915append_challenged_posix_file_impl::close() noexcept {
916 // Caller should have drained all pending I/O
917 _closing_state = state::draining;
918 process_queue();
919 return _completed.get_future().then([this] {
920 if (_logical_size != _committed_size) {
20effc67
TL
921 truncate_to_logical_size();
922 }
923 }).then_wrapped([this] (future<> f) {
924 if (f.failed()) {
925 report_exception("Closing append_challenged_posix_file_impl failed.", f.get_exception());
9f95a23c
TL
926 }
927 return posix_file_impl::close().finally([this] { _closing_state = state::closed; });
928 });
929}
930
931posix_file_handle_impl::~posix_file_handle_impl() {
932 if (_refcount && _refcount->fetch_add(-1, std::memory_order_relaxed) == 1) {
933 ::close(_fd);
934 delete _refcount;
935 }
936}
937
938std::unique_ptr<seastar::file_handle_impl>
939posix_file_handle_impl::clone() const {
20effc67
TL
940 auto ret = std::make_unique<posix_file_handle_impl>(_fd, _open_flags, _refcount, _device_id,
941 _memory_dma_alignment, _disk_read_dma_alignment, _disk_write_dma_alignment, _disk_overwrite_dma_alignment, _nowait_works);
9f95a23c
TL
942 if (_refcount) {
943 _refcount->fetch_add(1, std::memory_order_relaxed);
944 }
945 return ret;
946}
947
948shared_ptr<file_impl>
949posix_file_handle_impl::to_file() && {
20effc67
TL
950 auto ret = ::seastar::make_shared<posix_file_real_impl>(_fd, _open_flags, _refcount, _device_id,
951 _memory_dma_alignment, _disk_read_dma_alignment, _disk_write_dma_alignment, _disk_overwrite_dma_alignment, _nowait_works);
9f95a23c
TL
952 _fd = -1;
953 _refcount = nullptr;
954 return ret;
955}
956
957// Some kernels can append to xfs filesystems, some cannot; determine
958// from kernel version.
959static
960unsigned
961xfs_concurrency_from_kernel_version() {
962 // try to see if this is a mainline kernel with xfs append fixed (3.15+)
963 // or a RHEL kernel with the backported fix (3.10.0-325.el7+)
964 if (kernel_uname().whitelisted({"3.15", "3.10.0-325.el7"})) {
965 // Can append, but not concurrently
966 return 1;
967 }
968 // Cannot append at all; need ftrucnate().
969 return 0;
970}
971
f67539c2
TL
972future<shared_ptr<file_impl>>
973make_file_impl(int fd, file_open_options options, int flags) noexcept {
974 return engine().fstat(fd).then([fd, options = std::move(options), flags] (struct stat st) mutable {
975 auto st_dev = st.st_dev;
9f95a23c 976
f67539c2 977 if (S_ISBLK(st.st_mode)) {
20effc67
TL
978 size_t block_size;
979 auto ret = ::ioctl(fd, BLKBSZGET, &block_size);
980 if (ret == -1) {
981 return make_exception_future<shared_ptr<file_impl>>(
982 std::system_error(errno, std::system_category(), "ioctl(BLKBSZGET) failed"));
983 }
984 return make_ready_future<shared_ptr<file_impl>>(make_shared<blockdev_file_impl>(fd, open_flags(flags), options, st_dev, block_size));
f67539c2 985 } else {
20effc67
TL
986 if (S_ISDIR(st.st_mode)) {
987 // Directories don't care about block size, so we need not
988 // query it here. Just provide something reasonable.
989 internal::fs_info fsi;
990 fsi.block_size = 4096;
991 fsi.nowait_works = false;
992 return make_ready_future<shared_ptr<file_impl>>(make_shared<posix_file_real_impl>(fd, open_flags(flags), options, fsi, st_dev));
9f95a23c 993 }
20effc67
TL
994 static thread_local std::unordered_map<decltype(st_dev), fs_info> s_fstype;
995 future<> get_fs_info = s_fstype.count(st_dev) ? make_ready_future<>() :
996 engine().fstatfs(fd).then([fd, st_dev] (struct statfs sfs) {
997 internal::fs_info fsi;
998 fsi.block_size = sfs.f_bsize;
f67539c2
TL
999 switch (sfs.f_type) {
1000 case 0x58465342: /* XFS */
20effc67
TL
1001 dioattr da;
1002 if (::ioctl(fd, XFS_IOC_DIOINFO, &da) == 0) {
1003 fsi.dioinfo = std::move(da);
1004 }
1005
1006 fsi.append_challenged = true;
f67539c2 1007 static auto xc = xfs_concurrency_from_kernel_version();
20effc67
TL
1008 fsi.append_concurrency = xc;
1009 fsi.fsync_is_exclusive = true;
1010 fsi.nowait_works = kernel_uname().whitelisted({"4.13"});
f67539c2
TL
1011 break;
1012 case 0x6969: /* NFS */
20effc67
TL
1013 fsi.append_challenged = false;
1014 fsi.append_concurrency = 0;
1015 fsi.fsync_is_exclusive = false;
1016 fsi.nowait_works = kernel_uname().whitelisted({"4.13"});
f67539c2
TL
1017 break;
1018 case 0xEF53: /* EXT4 */
20effc67
TL
1019 fsi.append_challenged = true;
1020 fsi.append_concurrency = 0;
1021 fsi.fsync_is_exclusive = false;
1022 fsi.nowait_works = kernel_uname().whitelisted({"5.5"});
1023 break;
1024 case 0x9123683E: /* BTRFS */
1025 fsi.append_challenged = true;
1026 fsi.append_concurrency = 0;
1027 fsi.fsync_is_exclusive = true;
1028 fsi.nowait_works = kernel_uname().whitelisted({"5.9"});
1029 break;
1030 case 0x01021994: /* TMPFS */
1031 case 0x65735546: /* FUSE */
1032 fsi.append_challenged = false;
1033 fsi.append_concurrency = 999;
1034 fsi.fsync_is_exclusive = false;
1035 fsi.nowait_works = false;
f67539c2
TL
1036 break;
1037 default:
20effc67
TL
1038 fsi.append_challenged = true;
1039 fsi.append_concurrency = 0;
1040 fsi.fsync_is_exclusive = true;
1041 fsi.nowait_works = false;
f67539c2 1042 }
20effc67 1043 s_fstype[st_dev] = std::move(fsi);
f67539c2 1044 });
20effc67
TL
1045 return get_fs_info.then([st_dev, fd, flags, options = std::move(options)] () mutable {
1046 const fs_info& fsi = s_fstype[st_dev];
1047 if (!fsi.append_challenged || options.append_is_unlikely || ((flags & O_ACCMODE) == O_RDONLY)) {
1048 return make_ready_future<shared_ptr<file_impl>>(make_shared<posix_file_real_impl>(fd, open_flags(flags), std::move(options), fsi, st_dev));
f67539c2 1049 }
20effc67 1050 return make_ready_future<shared_ptr<file_impl>>(make_shared<append_challenged_posix_file_impl>(fd, open_flags(flags), std::move(options), fsi, st_dev));
f67539c2 1051 });
9f95a23c 1052 }
f67539c2
TL
1053 });
1054}
1055
1056file::file(seastar::file_handle&& handle) noexcept
1057 : _file_impl(std::move(std::move(handle).to_file()._file_impl)) {
1058}
1059
1060future<uint64_t> file::size() const noexcept {
1061 try {
1062 return _file_impl->size();
1063 } catch (...) {
1064 return current_exception_as_future<uint64_t>();
1065 }
1066}
1067
1068future<> file::close() noexcept {
20effc67
TL
1069 auto f = std::move(_file_impl);
1070 return f->close().handle_exception([f = std::move(f)] (std::exception_ptr ex) {
1071 report_exception("Closing the file failed unexpectedly", std::move(ex));
f67539c2
TL
1072 });
1073}
1074
1075subscription<directory_entry>
1076file::list_directory(std::function<future<>(directory_entry de)> next) {
1077 return _file_impl->list_directory(std::move(next));
1078}
1079
20effc67
TL
1080future<int> file::ioctl(uint64_t cmd, void* argp) noexcept {
1081 try {
1082 return _file_impl->ioctl(cmd, argp);
1083 } catch (...) {
1084 return current_exception_as_future<int>();
1085 }
1086}
1087
1088future<int> file::ioctl_short(uint64_t cmd, void* argp) noexcept {
1089 try {
1090 return _file_impl->ioctl_short(cmd, argp);
1091 } catch (...) {
1092 return current_exception_as_future<int>();
1093 }
1094}
1095
1096future<int> file::fcntl(int op, uintptr_t arg) noexcept {
1097 try {
1098 return _file_impl->fcntl(op, arg);
1099 } catch (...) {
1100 return current_exception_as_future<int>();
1101 }
1102}
1103
1104future<int> file::fcntl_short(int op, uintptr_t arg) noexcept {
1105 try {
1106 return _file_impl->fcntl_short(op, arg);
1107 } catch (...) {
1108 return current_exception_as_future<int>();
1109 }
1110}
1111
1112future<> file::set_lifetime_hint_impl(int op, uint64_t hint) noexcept {
1113 return do_with(hint, [op, this] (uint64_t& arg) {
1114 try {
1115 return _file_impl->fcntl(op, (uintptr_t)&arg).then_wrapped([] (future<int> f) {
1116 // Need to handle return value differently from that of fcntl
1117 if (f.failed()) {
1118 return make_exception_future<>(f.get_exception());
1119 }
1120 return make_ready_future<>();
1121 });
1122 } catch (...) {
1123 return current_exception_as_future<>();
1124 }
1125 });
1126}
1127
1128future<> file::set_file_lifetime_hint(uint64_t hint) noexcept {
1129 return set_lifetime_hint_impl(F_SET_FILE_RW_HINT, hint);
1130}
1131
1132future<> file::set_inode_lifetime_hint(uint64_t hint) noexcept {
1133 return set_lifetime_hint_impl(F_SET_RW_HINT, hint);
1134}
1135
1136future<uint64_t> file::get_lifetime_hint_impl(int op) noexcept {
1137 return do_with(uint64_t(0), [op, this] (uint64_t& arg) {
1138 try {
1139 return _file_impl->fcntl(op, (uintptr_t)&arg).then_wrapped([&arg] (future<int> f) {
1140 // Need to handle return value differently from that of fcntl
1141 if (f.failed()) {
1142 return make_exception_future<uint64_t>(f.get_exception());
1143 }
1144 return make_ready_future<uint64_t>(arg);
1145 });
1146 } catch (...) {
1147 return current_exception_as_future<uint64_t>();
1148 }
1149 });
1150}
1151
1152future<uint64_t> file::get_file_lifetime_hint() noexcept {
1153 return get_lifetime_hint_impl(F_GET_FILE_RW_HINT);
1154}
1155
1156future<uint64_t> file::get_inode_lifetime_hint() noexcept {
1157 return get_lifetime_hint_impl(F_GET_RW_HINT);
1158}
1159
f67539c2 1160future<temporary_buffer<uint8_t>>
20effc67 1161file::dma_read_bulk_impl(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept {
f67539c2 1162 try {
20effc67 1163 return _file_impl->dma_read_bulk(offset, range_size, pc, intent);
f67539c2
TL
1164 } catch (...) {
1165 return current_exception_as_future<temporary_buffer<uint8_t>>();
1166 }
1167}
1168
1169future<> file::discard(uint64_t offset, uint64_t length) noexcept {
1170 try {
1171 return _file_impl->discard(offset, length);
1172 } catch (...) {
1173 return current_exception_as_future();
1174 }
1175}
1176
1177future<> file::allocate(uint64_t position, uint64_t length) noexcept {
1178 try {
1179 return _file_impl->allocate(position, length);
1180 } catch (...) {
1181 return current_exception_as_future();
1182 }
1183}
1184
1185future<> file::truncate(uint64_t length) noexcept {
1186 try {
1187 return _file_impl->truncate(length);
1188 } catch (...) {
1189 return current_exception_as_future();
1190 }
1191}
1192
1193future<struct stat> file::stat() noexcept {
1194 try {
1195 return _file_impl->stat();
1196 } catch (...) {
1197 return current_exception_as_future<struct stat>();
1198 }
1199}
1200
1201future<> file::flush() noexcept {
1202 try {
1203 return _file_impl->flush();
1204 } catch (...) {
1205 return current_exception_as_future();
1206 }
1207}
1208
20effc67 1209future<size_t> file::dma_write(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
f67539c2 1210 try {
20effc67 1211 return _file_impl->write_dma(pos, std::move(iov), pc, intent);
f67539c2
TL
1212 } catch (...) {
1213 return current_exception_as_future<size_t>();
1214 }
1215}
1216
1217future<size_t>
20effc67 1218file::dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
f67539c2 1219 try {
20effc67 1220 return _file_impl->write_dma(pos, buffer, len, pc, intent);
f67539c2
TL
1221 } catch (...) {
1222 return current_exception_as_future<size_t>();
1223 }
1224}
1225
20effc67 1226future<size_t> file::dma_read(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent* intent) noexcept {
f67539c2 1227 try {
20effc67 1228 return _file_impl->read_dma(pos, std::move(iov), pc, intent);
f67539c2
TL
1229 } catch (...) {
1230 return current_exception_as_future<size_t>();
1231 }
1232}
1233
1234future<temporary_buffer<uint8_t>>
20effc67
TL
1235file::dma_read_exactly_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
1236 return dma_read<uint8_t>(pos, len, pc, intent).then([len](auto buf) {
f67539c2
TL
1237 if (buf.size() < len) {
1238 throw eof_error();
9f95a23c 1239 }
f67539c2
TL
1240
1241 return buf;
1242 });
9f95a23c
TL
1243}
1244
f67539c2 1245future<temporary_buffer<uint8_t>>
20effc67
TL
1246file::dma_read_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept {
1247 return dma_read_bulk<uint8_t>(pos, len, pc, intent).then([len](temporary_buffer<uint8_t> buf) {
f67539c2
TL
1248 if (len < buf.size()) {
1249 buf.trim(len);
1250 }
1251
1252 return buf;
1253 });
9f95a23c
TL
1254}
1255
f67539c2 1256future<size_t>
20effc67 1257file::dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, const io_priority_class& pc, io_intent* intent) noexcept {
f67539c2 1258 try {
20effc67 1259 return _file_impl->read_dma(aligned_pos, aligned_buffer, aligned_len, pc, intent);
f67539c2
TL
1260 } catch (...) {
1261 return current_exception_as_future<size_t>();
1262 }
9f95a23c
TL
1263}
1264
1265seastar::file_handle
1266file::dup() {
1267 return seastar::file_handle(_file_impl->dup());
1268}
1269
1270file_impl* file_impl::get_file_impl(file& f) {
1271 return f._file_impl.get();
1272}
1273
1274std::unique_ptr<seastar::file_handle_impl>
1275file_impl::dup() {
1276 throw std::runtime_error("this file type cannot be duplicated");
1277}
1278
20effc67
TL
1279future<int> file_impl::ioctl(uint64_t cmd, void* argp) noexcept {
1280 return make_exception_future<int>(std::runtime_error("this file type does not support ioctl"));
1281}
1282
1283future<int> file_impl::ioctl_short(uint64_t cmd, void* argp) noexcept {
1284 return make_exception_future<int>(std::runtime_error("this file type does not support ioctl_short"));
1285}
1286
1287future<int> file_impl::fcntl(int op, uintptr_t arg) noexcept {
1288 return make_exception_future<int>(std::runtime_error("this file type does not support fcntl"));
1289}
1290
1291future<int> file_impl::fcntl_short(int op, uintptr_t arg) noexcept {
1292 return make_exception_future<int>(std::runtime_error("this file type does not support fcntl_short"));
1293}
1294
f67539c2
TL
1295future<file> open_file_dma(std::string_view name, open_flags flags) noexcept {
1296 return engine().open_file_dma(name, flags, file_open_options());
1297}
1298
1299future<file> open_file_dma(std::string_view name, open_flags flags, file_open_options options) noexcept {
1300 return engine().open_file_dma(name, flags, std::move(options));
1301}
1302
1303future<file> open_directory(std::string_view name) noexcept {
1304 return engine().open_directory(name);
1305}
1306
9f95a23c 1307}