]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/include/seastar/core/file.hh
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / include / seastar / core / file.hh
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18 /*
19 * Copyright 2015 Cloudius Systems
20 */
21
22 #pragma once
23
24 #include <seastar/core/do_with.hh>
25 #include <seastar/core/stream.hh>
26 #include <seastar/core/sstring.hh>
27 #include <seastar/core/shared_ptr.hh>
28 #include <seastar/core/align.hh>
29 #include <seastar/core/io_priority_class.hh>
30 #include <seastar/core/file-types.hh>
31 #include <seastar/util/std-compat.hh>
32 #include <system_error>
33 #include <sys/statvfs.h>
34 #include <sys/ioctl.h>
35 #include <linux/fs.h>
36 #include <sys/uio.h>
37 #include <unistd.h>
38
39 namespace seastar {
40
41 /// \addtogroup fileio-module
42 /// @{
43
44 /// A directory entry being listed.
45 struct directory_entry {
46 /// Name of the file in a directory entry. Will never be "." or "..". Only the last component is included.
47 sstring name;
48 /// Type of the directory entry, if known.
49 std::optional<directory_entry_type> type;
50 };
51
52 /// Filesystem object stat information
53 struct stat_data {
54 uint64_t device_id; // ID of device containing file
55 uint64_t inode_number; // Inode number
56 uint64_t mode; // File type and mode
57 directory_entry_type type;
58 uint64_t number_of_links;// Number of hard links
59 uint64_t uid; // User ID of owner
60 uint64_t gid; // Group ID of owner
61 uint64_t rdev; // Device ID (if special file)
62 uint64_t size; // Total size, in bytes
63 uint64_t block_size; // Block size for filesystem I/O
64 uint64_t allocated_size; // Total size of allocated storage, in bytes
65
66 std::chrono::system_clock::time_point time_accessed; // Time of last content access
67 std::chrono::system_clock::time_point time_modified; // Time of last content modification
68 std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes)
69 };
70
71 /// File open options
72 ///
73 /// Options used to configure an open file.
74 ///
75 /// \ref file
76 struct file_open_options {
77 uint64_t extent_allocation_size_hint = 1 << 20; ///< Allocate this much disk space when extending the file
78 bool sloppy_size = false; ///< Allow the file size not to track the amount of data written until a flush
79 uint64_t sloppy_size_hint = 1 << 20; ///< Hint as to what the eventual file size will be
80 file_permissions create_permissions = file_permissions::default_file_permissions; ///< File permissions to use when creating a file
81 bool append_is_unlikely = false; ///< Hint that user promises (or at least tries hard) not to write behind file size
82
83 // The fsxattr.fsx_extsize is 32-bit
84 static constexpr uint64_t max_extent_allocation_size_hint = 1 << 31;
85 };
86
87 class file;
88 class file_impl;
89 class io_intent;
90 class file_handle;
91
92 // A handle that can be transported across shards and used to
93 // create a dup(2)-like `file` object referring to the same underlying file
94 class file_handle_impl {
95 public:
96 virtual ~file_handle_impl() = default;
97 virtual std::unique_ptr<file_handle_impl> clone() const = 0;
98 virtual shared_ptr<file_impl> to_file() && = 0;
99 };
100
101 class file_impl {
102 friend class file;
103 protected:
104 static file_impl* get_file_impl(file& f);
105 unsigned _memory_dma_alignment = 4096;
106 unsigned _disk_read_dma_alignment = 4096;
107 unsigned _disk_write_dma_alignment = 4096;
108 unsigned _disk_overwrite_dma_alignment = 4096;
109 unsigned _read_max_length = 1u << 30;
110 unsigned _write_max_length = 1u << 30;
111 public:
112 virtual ~file_impl() {}
113
114 virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) = 0;
115 virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0;
116 virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) = 0;
117 virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0;
118
119 virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent*) {
120 return write_dma(pos, buffer, len, pc);
121 }
122 virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) {
123 return write_dma(pos, std::move(iov), pc);
124 }
125 virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent*) {
126 return read_dma(pos, buffer, len, pc);
127 }
128 virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) {
129 return read_dma(pos, std::move(iov), pc);
130 }
131
132 virtual future<> flush(void) = 0;
133 virtual future<struct stat> stat(void) = 0;
134 virtual future<> truncate(uint64_t length) = 0;
135 virtual future<> discard(uint64_t offset, uint64_t length) = 0;
136 virtual future<int> ioctl(uint64_t cmd, void* argp) noexcept;
137 virtual future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
138 virtual future<int> fcntl(int op, uintptr_t arg) noexcept;
139 virtual future<int> fcntl_short(int op, uintptr_t arg) noexcept;
140 virtual future<> allocate(uint64_t position, uint64_t length) = 0;
141 virtual future<uint64_t> size(void) = 0;
142 virtual future<> close() = 0;
143 virtual std::unique_ptr<file_handle_impl> dup();
144 virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) = 0;
145 virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) = 0;
146 virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent*) {
147 return dma_read_bulk(offset, range_size, pc);
148 }
149
150 friend class reactor;
151 };
152
153 future<shared_ptr<file_impl>> make_file_impl(int fd, file_open_options options, int oflags) noexcept;
154
155 /// \endcond
156
157 /// A data file on persistent storage.
158 ///
159 /// File objects represent uncached, unbuffered files. As such great care
160 /// must be taken to cache data at the application layer; neither seastar
161 /// nor the OS will cache these file.
162 ///
163 /// Data is transferred using direct memory access (DMA). This imposes
164 /// restrictions on file offsets and data pointers. The former must be aligned
165 /// on a 4096 byte boundary, while a 512 byte boundary suffices for the latter.
166 class file {
167 shared_ptr<file_impl> _file_impl;
168 public:
169 /// Default constructor constructs an uninitialized file object.
170 ///
171 /// A default constructor is useful for the common practice of declaring
172 /// a variable, and only assigning to it later. The uninitialized file
173 /// must not be used, or undefined behavior will result (currently, a null
174 /// pointer dereference).
175 ///
176 /// One can check whether a file object is in uninitialized state with
177 /// \ref operator bool(); One can reset a file back to uninitialized state
178 /// by assigning file() to it.
179 file() noexcept : _file_impl(nullptr) {}
180
181 file(shared_ptr<file_impl> impl) noexcept
182 : _file_impl(std::move(impl)) {}
183
184 /// Constructs a file object from a \ref file_handle obtained from another shard
185 explicit file(file_handle&& handle) noexcept;
186
187 /// Checks whether the file object was initialized.
188 ///
189 /// \return false if the file object is uninitialized (default
190 /// constructed), true if the file object refers to an actual file.
191 explicit operator bool() const noexcept { return bool(_file_impl); }
192
193 /// Copies a file object. The new and old objects refer to the
194 /// same underlying file.
195 ///
196 /// \param x file object to be copied
197 file(const file& x) = default;
198 /// Moves a file object.
199 file(file&& x) noexcept : _file_impl(std::move(x._file_impl)) {}
200 /// Assigns a file object. After assignent, the destination and source refer
201 /// to the same underlying file.
202 ///
203 /// \param x file object to assign to `this`.
204 file& operator=(const file& x) noexcept = default;
205 /// Moves assigns a file object.
206 file& operator=(file&& x) noexcept = default;
207
208 // O_DIRECT reading requires that buffer, offset, and read length, are
209 // all aligned. Alignment of 4096 was necessary in the past, but no longer
210 // is - 512 is usually enough; But we'll need to use BLKSSZGET ioctl to
211 // be sure it is really enough on this filesystem. 4096 is always safe.
212 // In addition, if we start reading in things outside page boundaries,
213 // we will end up with various pages around, some of them with
214 // overlapping ranges. Those would be very challenging to cache.
215
216 /// Alignment requirement for file offsets (for reads)
217 uint64_t disk_read_dma_alignment() const noexcept {
218 return _file_impl->_disk_read_dma_alignment;
219 }
220
221 /// Alignment requirement for file offsets (for writes)
222 uint64_t disk_write_dma_alignment() const noexcept {
223 return _file_impl->_disk_write_dma_alignment;
224 }
225
226 /// Alignment requirement for file offsets (for overwrites).
227 ///
228 /// Specifies the minimum alignment for disk offsets for
229 /// overwrites (writes to a location that was previously written).
230 /// This can be smaller than \ref disk_write_dma_alignment(), allowing
231 /// a reduction in disk bandwidth used.
232 uint64_t disk_overwrite_dma_alignment() const noexcept {
233 return _file_impl->_disk_overwrite_dma_alignment;
234 }
235
236 /// Alignment requirement for data buffers
237 uint64_t memory_dma_alignment() const noexcept {
238 return _file_impl->_memory_dma_alignment;
239 }
240
241 /// Recommended limit for read request size.
242 /// Submitting a larger request will not cause any error,
243 /// but may result in poor latencies for this and any other
244 /// concurrent requests
245 size_t disk_read_max_length() const noexcept {
246 return _file_impl->_read_max_length;
247 }
248
249 /// Recommended limit for write request size.
250 /// Submitting a larger request will not cause any error,
251 /// but may result in poor latencies for this and any other
252 /// concurrent requests
253 size_t disk_write_max_length() const noexcept {
254 return _file_impl->_write_max_length;
255 }
256
257 /**
258 * Perform a single DMA read operation.
259 *
260 * @param aligned_pos offset to begin reading at (should be aligned)
261 * @param aligned_buffer output buffer (should be aligned)
262 * @param aligned_len number of bytes to read (should be aligned)
263 * @param pc the IO priority class under which to queue this operation
264 * @param intent the IO intention confirmation (\ref seastar::io_intent)
265 *
266 * Alignment is HW dependent but use 4KB alignment to be on the safe side as
267 * explained above.
268 *
269 * @return number of bytes actually read
270 * or exceptional future in case of I/O error
271 */
272 template <typename CharType>
273 future<size_t>
274 dma_read(uint64_t aligned_pos, CharType* aligned_buffer, size_t aligned_len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
275 return dma_read_impl(aligned_pos, reinterpret_cast<uint8_t*>(aligned_buffer), aligned_len, pc, intent);
276 }
277
278 /**
279 * Read the requested amount of bytes starting from the given offset.
280 *
281 * @param pos offset to begin reading from
282 * @param len number of bytes to read
283 * @param pc the IO priority class under which to queue this operation
284 * @param intent the IO intention confirmation (\ref seastar::io_intent)
285 *
286 * @return temporary buffer containing the requested data.
287 * or exceptional future in case of I/O error
288 *
289 * This function doesn't require any alignment for both "pos" and "len"
290 *
291 * @note size of the returned buffer may be smaller than "len" if EOF is
292 * reached or in case of I/O error.
293 */
294 template <typename CharType>
295 future<temporary_buffer<CharType>> dma_read(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
296 return dma_read_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) {
297 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
298 });
299 }
300
301 /// Error thrown when attempting to read past end-of-file
302 /// with \ref dma_read_exactly().
303 class eof_error : public std::exception {};
304
305 /**
306 * Read the exact amount of bytes.
307 *
308 * @param pos offset in a file to begin reading from
309 * @param len number of bytes to read
310 * @param pc the IO priority class under which to queue this operation
311 * @param intent the IO intention confirmation (\ref seastar::io_intent)
312 *
313 * @return temporary buffer containing the read data
314 * or exceptional future in case an error, holding:
315 * end_of_file_error if EOF is reached, file_io_error or
316 * std::system_error in case of I/O error.
317 */
318 template <typename CharType>
319 future<temporary_buffer<CharType>>
320 dma_read_exactly(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
321 return dma_read_exactly_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) {
322 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
323 });
324 }
325
326 /// Performs a DMA read into the specified iovec.
327 ///
328 /// \param pos offset to read from. Must be aligned to \ref disk_read_dma_alignment.
329 /// \param iov vector of address/size pairs to read into. Addresses must be
330 /// aligned.
331 /// \param pc the IO priority class under which to queue this operation
332 /// \param intent the IO intention confirmation (\ref seastar::io_intent)
333 ///
334 /// \return a future representing the number of bytes actually read. A short
335 /// read may happen due to end-of-file or an I/O error.
336 future<size_t> dma_read(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept;
337
338 /// Performs a DMA write from the specified buffer.
339 ///
340 /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment.
341 /// \param buffer aligned address of buffer to read from. Buffer must exists
342 /// until the future is made ready.
343 /// \param len number of bytes to write. Must be aligned.
344 /// \param pc the IO priority class under which to queue this operation
345 /// \param intent the IO intention confirmation (\ref seastar::io_intent)
346 ///
347 /// \return a future representing the number of bytes actually written. A short
348 /// write may happen due to an I/O error.
349 template <typename CharType>
350 future<size_t> dma_write(uint64_t pos, const CharType* buffer, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
351 return dma_write_impl(pos, reinterpret_cast<const uint8_t*>(buffer), len, pc, intent);
352 }
353
354 /// Performs a DMA write to the specified iovec.
355 ///
356 /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment.
357 /// \param iov vector of address/size pairs to write from. Addresses must be
358 /// aligned.
359 /// \param pc the IO priority class under which to queue this operation
360 /// \param intent the IO intention confirmation (\ref seastar::io_intent)
361 ///
362 /// \return a future representing the number of bytes actually written. A short
363 /// write may happen due to an I/O error.
364 future<size_t> dma_write(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept;
365
366 /// Causes any previously written data to be made stable on persistent storage.
367 ///
368 /// Prior to a flush, written data may or may not survive a power failure. After
369 /// a flush, data is guaranteed to be on disk.
370 future<> flush() noexcept;
371
372 /// Returns \c stat information about the file.
373 future<struct stat> stat() noexcept;
374
375 /// Truncates the file to a specified length.
376 future<> truncate(uint64_t length) noexcept;
377
378 /// Preallocate disk blocks for a specified byte range.
379 ///
380 /// Requests the file system to allocate disk blocks to
381 /// back the specified range (\c length bytes starting at
382 /// \c position). The range may be outside the current file
383 /// size; the blocks can then be used when appending to the
384 /// file.
385 ///
386 /// \param position beginning of the range at which to allocate
387 /// blocks.
388 /// \param length length of range to allocate.
389 /// \return future that becomes ready when the operation completes.
390 future<> allocate(uint64_t position, uint64_t length) noexcept;
391
392 /// Discard unneeded data from the file.
393 ///
394 /// The discard operation tells the file system that a range of offsets
395 /// (which be aligned) is no longer needed and can be reused.
396 future<> discard(uint64_t offset, uint64_t length) noexcept;
397
398 /// Generic ioctl syscall support for special file handling.
399 ///
400 /// This interface is useful for many non-standard operations on seastar::file.
401 /// The examples can be - querying device or file system capabilities,
402 /// configuring special performance or access modes on devices etc.
403 /// Refer ioctl(2) man page for more details.
404 ///
405 /// \param cmd ioctl command to be executed
406 /// \param argp pointer to the buffer which holds the argument
407 ///
408 /// \return a future containing the return value if any, or an exceptional future
409 /// if the operation has failed.
410 future<int> ioctl(uint64_t cmd, void* argp) noexcept;
411
412 /// Performs a short ioctl syscall on seastar::file
413 ///
414 /// This is similar to generic \c ioctl; the difference is, here user indicates
415 /// that this operation is a short one, and does not involve any i/o or locking.
416 /// The \c file module will process this differently from the normal \ref ioctl().
417 /// Use this method only if the user is sure that the operation does not involve any
418 /// blocking operation. If unsure, use the default \ref ioctl() method.
419 /// Refer ioctl(2) man page for more details on ioctl operation.
420 ///
421 /// \param cmd ioctl command to be executed
422 /// \param argp pointer to the buffer which holds the argument
423 ///
424 /// \return a future containing the return value if any, or an exceptional future
425 /// if the operation has failed.
426 future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
427
428 /// Generic fcntl syscall support for special file handling.
429 ///
430 /// fcntl performs the operation specified by 'op' field on the file.
431 /// Some of the use cases can be - setting file status flags, advisory record locking,
432 /// managing signals, managing file leases or write hints etc.
433 /// Refer fcntl(2) man page for more details.
434 ///
435 /// \param op the operation to be executed
436 /// \param arg the optional argument
437 /// \return a future containing the return value if any, or an exceptional future
438 /// if the operation has failed
439 future<int> fcntl(int op, uintptr_t arg = 0UL) noexcept;
440
441 /// Performs a 'short' fcntl syscall on seastar::file
442 ///
443 /// This is similar to generic \c fcntl; the difference is, here user indicates
444 /// that this operation is a short one, and does not involve any i/o or locking.
445 /// The \c file module will process this differently from normal \ref fcntl().
446 /// Use this only if the user is sure that the operation does not involve any
447 /// blocking operation. If unsure, use the default \ref fcntl() method.
448 /// Refer fcntl(2) man page for more details on fcntl operation.
449 ///
450 /// \param op the operation to be executed
451 /// \param arg the optional argument
452 /// \return a future containing the return value if any, or an exceptional future
453 /// if the operation has failed
454 future<int> fcntl_short(int op, uintptr_t arg = 0UL) noexcept;
455
456 /// Set a lifetime hint for the open file descriptor corresponding to seastar::file
457 ///
458 /// Write lifetime hints can be used to inform the kernel about the relative
459 /// expected lifetime of writes on a given inode or via open file descriptor.
460 /// An application may use the different hint values to separate writes into different
461 /// write classes, so that multiple users or applications running on a single storage back-end
462 /// can aggregate their I/O patterns in a consistent manner.
463 /// Refer fcntl(2) man page for more details on write lifetime hints.
464 ///
465 /// \param hint the hint value of the stream
466 /// \return future indicating success or failure
467 [[deprecated("This API was removed from the kernel")]]
468 future<> set_file_lifetime_hint(uint64_t hint) noexcept;
469
470 /// Set a lifetime hint for the inode corresponding to seastar::file
471 ///
472 /// Write lifetime hints can be used to inform the kernel about the relative
473 /// expected lifetime of writes on a given inode or via open file descriptor.
474 /// An application may use the different hint values to separate writes into different
475 /// write classes, so that multiple users or applications running on a single storage back-end
476 /// can aggregate their I/O patterns in a consistent manner.
477 /// Refer fcntl(2) man page for more details on write lifetime hints.
478 ///
479 /// \param hint the hint value of the stream
480 /// \return future indicating success or failure
481 future<> set_inode_lifetime_hint(uint64_t hint) noexcept;
482
483 /// Get the lifetime hint of the open file descriptor of seastar::file which was set by
484 /// \ref set_file_lifetime_hint()
485 ///
486 /// Write lifetime hints can be used to inform the kernel about the relative
487 /// expected lifetime of writes on a given inode or via open file descriptor.
488 /// An application may use the different hint values to separate writes into different
489 /// write classes, so that multiple users or applications running on a single storage back-end
490 /// can aggregate their I/O patterns in a consistent manner.
491 /// Refer fcntl(2) man page for more details on write lifetime hints.
492 ///
493 /// \return the hint value of the open file descriptor
494 [[deprecated("This API was removed from the kernel")]]
495 future<uint64_t> get_file_lifetime_hint() noexcept;
496
497 /// Get the lifetime hint of the inode of seastar::file which was set by
498 /// \ref set_inode_lifetime_hint()
499 ///
500 /// Write lifetime hints can be used to inform the kernel about the relative
501 /// expected lifetime of writes on a given inode or via open file descriptor.
502 /// An application may use the different hint values to separate writes into different
503 /// write classes, so that multiple users or applications running on a single storage back-end
504 /// can aggregate their I/O patterns in a consistent manner.
505 /// Refer fcntl(2) man page for more details on write lifetime hints.
506 ///
507 /// \return the hint value of the inode
508 future<uint64_t> get_inode_lifetime_hint() noexcept;
509
510 /// Gets the file size.
511 future<uint64_t> size() const noexcept;
512
513 /// Closes the file.
514 ///
515 /// Flushes any pending operations and release any resources associated with
516 /// the file (except for stable storage).
517 ///
518 /// \note
519 /// \c close() never fails. It just reports errors and swallows them.
520 /// To ensure file data reaches stable storage, you must call \ref flush()
521 /// before calling \c close().
522 future<> close() noexcept;
523
524 /// Returns a directory listing, given that this file object is a directory.
525 subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next);
526
527 /**
528 * Read a data bulk containing the provided addresses range that starts at
529 * the given offset and ends at either the address aligned to
530 * dma_alignment (4KB) or at the file end.
531 *
532 * @param offset starting address of the range the read bulk should contain
533 * @param range_size size of the addresses range
534 * @param pc the IO priority class under which to queue this operation
535 * @param intent the IO intention confirmation (\ref seastar::io_intent)
536 *
537 * @return temporary buffer containing the read data bulk.
538 * or exceptional future holding:
539 * system_error exception in case of I/O error or eof_error when
540 * "offset" is beyond EOF.
541 */
542 template <typename CharType>
543 future<temporary_buffer<CharType>>
544 dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
545 return dma_read_bulk_impl(offset, range_size, pc, intent).then([] (temporary_buffer<uint8_t> t) {
546 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
547 });
548 }
549
550 /// \brief Creates a handle that can be transported across shards.
551 ///
552 /// Creates a handle that can be transported across shards, and then
553 /// used to create a new shard-local \ref file object that refers to
554 /// the same on-disk file.
555 ///
556 /// \note Use on read-only files.
557 ///
558 file_handle dup();
559 private:
560 future<temporary_buffer<uint8_t>>
561 dma_read_bulk_impl(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept;
562
563 future<size_t>
564 dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
565
566 future<temporary_buffer<uint8_t>>
567 dma_read_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
568
569 future<size_t>
570 dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, const io_priority_class& pc, io_intent* intent) noexcept;
571
572 future<temporary_buffer<uint8_t>>
573 dma_read_exactly_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
574
575 future<uint64_t> get_lifetime_hint_impl(int op) noexcept;
576 future<> set_lifetime_hint_impl(int op, uint64_t hint) noexcept;
577
578 friend class reactor;
579 friend class file_impl;
580 };
581
582 /// \brief Helper for ensuring a file is closed after \c func is called.
583 ///
584 /// The file provided by the \c file_fut future is passed to \c func.
585 ///
586 /// \param file_fut A future that produces a file
587 /// \param func A function that uses a file
588 /// \returns the future returned by \c func, or an exceptional future if either \c file_fut or closing the file failed.
589 template <typename Func>
590 SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> )
591 auto with_file(future<file> file_fut, Func func) noexcept {
592 static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
593 return file_fut.then([func = std::move(func)] (file f) mutable {
594 return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
595 return futurize_invoke(func, f).finally([&f] {
596 return f.close();
597 });
598 });
599 });
600 }
601
602 /// \brief Helper for ensuring a file is closed if \c func fails.
603 ///
604 /// The file provided by the \c file_fut future is passed to \c func.
605 /// * If func throws an exception E, the file is closed and we return
606 /// a failed future with E.
607 /// * If func returns a value V, the file is not closed and we return
608 /// a future with V.
609 /// Note that when an exception is not thrown, it is the
610 /// responsibility of func to make sure the file will be closed. It
611 /// can close the file itself, return it, or store it somewhere.
612 ///
613 /// \param file_fut A future that produces a file
614 /// \param func A function that uses a file
615 /// \returns the future returned by \c func, or an exceptional future if \c file_fut failed or a nested exception if closing the file failed.
616 template <typename Func>
617 SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> )
618 auto with_file_close_on_failure(future<file> file_fut, Func func) noexcept {
619 static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
620 return file_fut.then([func = std::move(func)] (file f) mutable {
621 return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
622 return futurize_invoke(std::move(func), f).then_wrapped([&f] (auto ret) mutable {
623 if (!ret.failed()) {
624 return ret;
625 }
626 return ret.finally([&f] {
627 // If f.close() fails, return that as nested exception.
628 return f.close();
629 });
630 });
631 });
632 });
633 }
634
635 /// \example file_demo.cc
636 /// A program demonstrating the use of \ref seastar::with_file
637 /// and \ref seastar::with_file_close_on_failure
638
639 /// \brief A shard-transportable handle to a file
640 ///
641 /// If you need to access a file (for reads only) across multiple shards,
642 /// you can use the file::dup() method to create a `file_handle`, transport
643 /// this file handle to another shard, and use the handle to create \ref file
644 /// object on that shard. This is more efficient than calling open_file_dma()
645 /// again.
646 class file_handle {
647 std::unique_ptr<file_handle_impl> _impl;
648 private:
649 explicit file_handle(std::unique_ptr<file_handle_impl> impl) : _impl(std::move(impl)) {}
650 public:
651 /// Copies a file handle object
652 file_handle(const file_handle&);
653 /// Moves a file handle object
654 file_handle(file_handle&&) noexcept;
655 /// Assigns a file handle object
656 file_handle& operator=(const file_handle&);
657 /// Move-assigns a file handle object
658 file_handle& operator=(file_handle&&) noexcept;
659 /// Converts the file handle object to a \ref file.
660 file to_file() const &;
661 /// Converts the file handle object to a \ref file.
662 file to_file() &&;
663
664 friend class file;
665 };
666
667 /// @}
668
669 /// An exception Cancelled IOs resolve their future into (see \ref io_intent "io_intent")
670 class cancelled_error : public std::exception {
671 public:
672 virtual const char* what() const noexcept {
673 return "cancelled";
674 }
675 };
676
677 }