]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright 2015 Cloudius Systems | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
f67539c2 | 24 | #include <seastar/core/do_with.hh> |
11fdf7f2 TL |
25 | #include <seastar/core/stream.hh> |
26 | #include <seastar/core/sstring.hh> | |
27 | #include <seastar/core/shared_ptr.hh> | |
28 | #include <seastar/core/align.hh> | |
20effc67 | 29 | #include <seastar/core/io_priority_class.hh> |
9f95a23c | 30 | #include <seastar/core/file-types.hh> |
11fdf7f2 TL |
31 | #include <seastar/util/std-compat.hh> |
32 | #include <system_error> | |
11fdf7f2 TL |
33 | #include <sys/statvfs.h> |
34 | #include <sys/ioctl.h> | |
35 | #include <linux/fs.h> | |
36 | #include <sys/uio.h> | |
37 | #include <unistd.h> | |
38 | ||
39 | namespace seastar { | |
40 | ||
41 | /// \addtogroup fileio-module | |
42 | /// @{ | |
43 | ||
11fdf7f2 TL |
44 | /// A directory entry being listed. |
45 | struct directory_entry { | |
46 | /// Name of the file in a directory entry. Will never be "." or "..". Only the last component is included. | |
47 | sstring name; | |
48 | /// Type of the directory entry, if known. | |
f67539c2 | 49 | std::optional<directory_entry_type> type; |
11fdf7f2 TL |
50 | }; |
51 | ||
9f95a23c TL |
52 | /// Filesystem object stat information |
53 | struct stat_data { | |
54 | uint64_t device_id; // ID of device containing file | |
55 | uint64_t inode_number; // Inode number | |
56 | uint64_t mode; // File type and mode | |
57 | directory_entry_type type; | |
58 | uint64_t number_of_links;// Number of hard links | |
59 | uint64_t uid; // User ID of owner | |
60 | uint64_t gid; // Group ID of owner | |
61 | uint64_t rdev; // Device ID (if special file) | |
62 | uint64_t size; // Total size, in bytes | |
63 | uint64_t block_size; // Block size for filesystem I/O | |
64 | uint64_t allocated_size; // Total size of allocated storage, in bytes | |
65 | ||
66 | std::chrono::system_clock::time_point time_accessed; // Time of last content access | |
67 | std::chrono::system_clock::time_point time_modified; // Time of last content modification | |
68 | std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes) | |
69 | }; | |
70 | ||
11fdf7f2 TL |
71 | /// File open options |
72 | /// | |
73 | /// Options used to configure an open file. | |
74 | /// | |
75 | /// \ref file | |
76 | struct file_open_options { | |
77 | uint64_t extent_allocation_size_hint = 1 << 20; ///< Allocate this much disk space when extending the file | |
78 | bool sloppy_size = false; ///< Allow the file size not to track the amount of data written until a flush | |
79 | uint64_t sloppy_size_hint = 1 << 20; ///< Hint as to what the eventual file size will be | |
9f95a23c | 80 | file_permissions create_permissions = file_permissions::default_file_permissions; ///< File permissions to use when creating a file |
20effc67 | 81 | bool append_is_unlikely = false; ///< Hint that user promises (or at least tries hard) not to write behind file size |
9f95a23c | 82 | |
20effc67 TL |
83 | // The fsxattr.fsx_extsize is 32-bit |
84 | static constexpr uint64_t max_extent_allocation_size_hint = 1 << 31; | |
11fdf7f2 TL |
85 | }; |
86 | ||
11fdf7f2 TL |
87 | class file; |
88 | class file_impl; | |
20effc67 | 89 | class io_intent; |
11fdf7f2 TL |
90 | class file_handle; |
91 | ||
92 | // A handle that can be transported across shards and used to | |
93 | // create a dup(2)-like `file` object referring to the same underlying file | |
94 | class file_handle_impl { | |
95 | public: | |
96 | virtual ~file_handle_impl() = default; | |
97 | virtual std::unique_ptr<file_handle_impl> clone() const = 0; | |
98 | virtual shared_ptr<file_impl> to_file() && = 0; | |
99 | }; | |
100 | ||
101 | class file_impl { | |
20effc67 | 102 | friend class file; |
11fdf7f2 TL |
103 | protected: |
104 | static file_impl* get_file_impl(file& f); | |
11fdf7f2 TL |
105 | unsigned _memory_dma_alignment = 4096; |
106 | unsigned _disk_read_dma_alignment = 4096; | |
107 | unsigned _disk_write_dma_alignment = 4096; | |
20effc67 TL |
108 | unsigned _disk_overwrite_dma_alignment = 4096; |
109 | unsigned _read_max_length = 1u << 30; | |
110 | unsigned _write_max_length = 1u << 30; | |
11fdf7f2 TL |
111 | public: |
112 | virtual ~file_impl() {} | |
113 | ||
114 | virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) = 0; | |
115 | virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0; | |
116 | virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) = 0; | |
117 | virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0; | |
20effc67 TL |
118 | |
119 | virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent*) { | |
120 | return write_dma(pos, buffer, len, pc); | |
121 | } | |
122 | virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) { | |
123 | return write_dma(pos, std::move(iov), pc); | |
124 | } | |
125 | virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent*) { | |
126 | return read_dma(pos, buffer, len, pc); | |
127 | } | |
128 | virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) { | |
129 | return read_dma(pos, std::move(iov), pc); | |
130 | } | |
131 | ||
11fdf7f2 TL |
132 | virtual future<> flush(void) = 0; |
133 | virtual future<struct stat> stat(void) = 0; | |
134 | virtual future<> truncate(uint64_t length) = 0; | |
135 | virtual future<> discard(uint64_t offset, uint64_t length) = 0; | |
20effc67 TL |
136 | virtual future<int> ioctl(uint64_t cmd, void* argp) noexcept; |
137 | virtual future<int> ioctl_short(uint64_t cmd, void* argp) noexcept; | |
138 | virtual future<int> fcntl(int op, uintptr_t arg) noexcept; | |
139 | virtual future<int> fcntl_short(int op, uintptr_t arg) noexcept; | |
11fdf7f2 TL |
140 | virtual future<> allocate(uint64_t position, uint64_t length) = 0; |
141 | virtual future<uint64_t> size(void) = 0; | |
142 | virtual future<> close() = 0; | |
143 | virtual std::unique_ptr<file_handle_impl> dup(); | |
144 | virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) = 0; | |
145 | virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) = 0; | |
20effc67 TL |
146 | virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent*) { |
147 | return dma_read_bulk(offset, range_size, pc); | |
148 | } | |
11fdf7f2 TL |
149 | |
150 | friend class reactor; | |
151 | }; | |
152 | ||
f67539c2 TL |
153 | future<shared_ptr<file_impl>> make_file_impl(int fd, file_open_options options, int oflags) noexcept; |
154 | ||
11fdf7f2 TL |
155 | /// \endcond |
156 | ||
157 | /// A data file on persistent storage. | |
158 | /// | |
159 | /// File objects represent uncached, unbuffered files. As such great care | |
160 | /// must be taken to cache data at the application layer; neither seastar | |
161 | /// nor the OS will cache these file. | |
162 | /// | |
163 | /// Data is transferred using direct memory access (DMA). This imposes | |
164 | /// restrictions on file offsets and data pointers. The former must be aligned | |
165 | /// on a 4096 byte boundary, while a 512 byte boundary suffices for the latter. | |
166 | class file { | |
167 | shared_ptr<file_impl> _file_impl; | |
11fdf7f2 TL |
168 | public: |
169 | /// Default constructor constructs an uninitialized file object. | |
170 | /// | |
171 | /// A default constructor is useful for the common practice of declaring | |
172 | /// a variable, and only assigning to it later. The uninitialized file | |
173 | /// must not be used, or undefined behavior will result (currently, a null | |
174 | /// pointer dereference). | |
175 | /// | |
176 | /// One can check whether a file object is in uninitialized state with | |
177 | /// \ref operator bool(); One can reset a file back to uninitialized state | |
178 | /// by assigning file() to it. | |
f67539c2 | 179 | file() noexcept : _file_impl(nullptr) {} |
11fdf7f2 | 180 | |
f67539c2 | 181 | file(shared_ptr<file_impl> impl) noexcept |
11fdf7f2 TL |
182 | : _file_impl(std::move(impl)) {} |
183 | ||
184 | /// Constructs a file object from a \ref file_handle obtained from another shard | |
f67539c2 | 185 | explicit file(file_handle&& handle) noexcept; |
11fdf7f2 TL |
186 | |
187 | /// Checks whether the file object was initialized. | |
188 | /// | |
189 | /// \return false if the file object is uninitialized (default | |
190 | /// constructed), true if the file object refers to an actual file. | |
191 | explicit operator bool() const noexcept { return bool(_file_impl); } | |
192 | ||
193 | /// Copies a file object. The new and old objects refer to the | |
194 | /// same underlying file. | |
195 | /// | |
196 | /// \param x file object to be copied | |
197 | file(const file& x) = default; | |
198 | /// Moves a file object. | |
199 | file(file&& x) noexcept : _file_impl(std::move(x._file_impl)) {} | |
200 | /// Assigns a file object. After assignent, the destination and source refer | |
201 | /// to the same underlying file. | |
202 | /// | |
203 | /// \param x file object to assign to `this`. | |
204 | file& operator=(const file& x) noexcept = default; | |
205 | /// Moves assigns a file object. | |
206 | file& operator=(file&& x) noexcept = default; | |
207 | ||
208 | // O_DIRECT reading requires that buffer, offset, and read length, are | |
209 | // all aligned. Alignment of 4096 was necessary in the past, but no longer | |
210 | // is - 512 is usually enough; But we'll need to use BLKSSZGET ioctl to | |
211 | // be sure it is really enough on this filesystem. 4096 is always safe. | |
212 | // In addition, if we start reading in things outside page boundaries, | |
213 | // we will end up with various pages around, some of them with | |
214 | // overlapping ranges. Those would be very challenging to cache. | |
215 | ||
216 | /// Alignment requirement for file offsets (for reads) | |
f67539c2 | 217 | uint64_t disk_read_dma_alignment() const noexcept { |
11fdf7f2 TL |
218 | return _file_impl->_disk_read_dma_alignment; |
219 | } | |
220 | ||
221 | /// Alignment requirement for file offsets (for writes) | |
f67539c2 | 222 | uint64_t disk_write_dma_alignment() const noexcept { |
11fdf7f2 TL |
223 | return _file_impl->_disk_write_dma_alignment; |
224 | } | |
225 | ||
20effc67 TL |
226 | /// Alignment requirement for file offsets (for overwrites). |
227 | /// | |
228 | /// Specifies the minimum alignment for disk offsets for | |
229 | /// overwrites (writes to a location that was previously written). | |
230 | /// This can be smaller than \ref disk_write_dma_alignment(), allowing | |
231 | /// a reduction in disk bandwidth used. | |
232 | uint64_t disk_overwrite_dma_alignment() const noexcept { | |
233 | return _file_impl->_disk_overwrite_dma_alignment; | |
234 | } | |
235 | ||
11fdf7f2 | 236 | /// Alignment requirement for data buffers |
f67539c2 | 237 | uint64_t memory_dma_alignment() const noexcept { |
11fdf7f2 TL |
238 | return _file_impl->_memory_dma_alignment; |
239 | } | |
240 | ||
20effc67 TL |
241 | /// Recommended limit for read request size. |
242 | /// Submitting a larger request will not cause any error, | |
243 | /// but may result in poor latencies for this and any other | |
244 | /// concurrent requests | |
245 | size_t disk_read_max_length() const noexcept { | |
246 | return _file_impl->_read_max_length; | |
247 | } | |
248 | ||
249 | /// Recommended limit for write request size. | |
250 | /// Submitting a larger request will not cause any error, | |
251 | /// but may result in poor latencies for this and any other | |
252 | /// concurrent requests | |
253 | size_t disk_write_max_length() const noexcept { | |
254 | return _file_impl->_write_max_length; | |
255 | } | |
11fdf7f2 TL |
256 | |
257 | /** | |
258 | * Perform a single DMA read operation. | |
259 | * | |
260 | * @param aligned_pos offset to begin reading at (should be aligned) | |
261 | * @param aligned_buffer output buffer (should be aligned) | |
262 | * @param aligned_len number of bytes to read (should be aligned) | |
263 | * @param pc the IO priority class under which to queue this operation | |
20effc67 | 264 | * @param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
265 | * |
266 | * Alignment is HW dependent but use 4KB alignment to be on the safe side as | |
267 | * explained above. | |
268 | * | |
269 | * @return number of bytes actually read | |
f67539c2 | 270 | * or exceptional future in case of I/O error |
11fdf7f2 TL |
271 | */ |
272 | template <typename CharType> | |
273 | future<size_t> | |
20effc67 TL |
274 | dma_read(uint64_t aligned_pos, CharType* aligned_buffer, size_t aligned_len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept { |
275 | return dma_read_impl(aligned_pos, reinterpret_cast<uint8_t*>(aligned_buffer), aligned_len, pc, intent); | |
11fdf7f2 TL |
276 | } |
277 | ||
278 | /** | |
279 | * Read the requested amount of bytes starting from the given offset. | |
280 | * | |
281 | * @param pos offset to begin reading from | |
282 | * @param len number of bytes to read | |
283 | * @param pc the IO priority class under which to queue this operation | |
20effc67 | 284 | * @param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
285 | * |
286 | * @return temporary buffer containing the requested data. | |
f67539c2 | 287 | * or exceptional future in case of I/O error |
11fdf7f2 TL |
288 | * |
289 | * This function doesn't require any alignment for both "pos" and "len" | |
290 | * | |
291 | * @note size of the returned buffer may be smaller than "len" if EOF is | |
f67539c2 | 292 | * reached or in case of I/O error. |
11fdf7f2 TL |
293 | */ |
294 | template <typename CharType> | |
20effc67 TL |
295 | future<temporary_buffer<CharType>> dma_read(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept { |
296 | return dma_read_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) { | |
f67539c2 | 297 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); |
11fdf7f2 TL |
298 | }); |
299 | } | |
300 | ||
301 | /// Error thrown when attempting to read past end-of-file | |
302 | /// with \ref dma_read_exactly(). | |
303 | class eof_error : public std::exception {}; | |
304 | ||
305 | /** | |
306 | * Read the exact amount of bytes. | |
307 | * | |
308 | * @param pos offset in a file to begin reading from | |
309 | * @param len number of bytes to read | |
310 | * @param pc the IO priority class under which to queue this operation | |
20effc67 | 311 | * @param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
312 | * |
313 | * @return temporary buffer containing the read data | |
f67539c2 TL |
314 | * or exceptional future in case an error, holding: |
315 | * end_of_file_error if EOF is reached, file_io_error or | |
11fdf7f2 TL |
316 | * std::system_error in case of I/O error. |
317 | */ | |
318 | template <typename CharType> | |
319 | future<temporary_buffer<CharType>> | |
20effc67 TL |
320 | dma_read_exactly(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept { |
321 | return dma_read_exactly_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) { | |
f67539c2 | 322 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); |
11fdf7f2 TL |
323 | }); |
324 | } | |
325 | ||
326 | /// Performs a DMA read into the specified iovec. | |
327 | /// | |
f67539c2 | 328 | /// \param pos offset to read from. Must be aligned to \ref disk_read_dma_alignment. |
11fdf7f2 TL |
329 | /// \param iov vector of address/size pairs to read into. Addresses must be |
330 | /// aligned. | |
331 | /// \param pc the IO priority class under which to queue this operation | |
20effc67 | 332 | /// \param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
333 | /// |
334 | /// \return a future representing the number of bytes actually read. A short | |
335 | /// read may happen due to end-of-file or an I/O error. | |
20effc67 | 336 | future<size_t> dma_read(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept; |
11fdf7f2 TL |
337 | |
338 | /// Performs a DMA write from the specified buffer. | |
339 | /// | |
f67539c2 | 340 | /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment. |
11fdf7f2 TL |
341 | /// \param buffer aligned address of buffer to read from. Buffer must exists |
342 | /// until the future is made ready. | |
343 | /// \param len number of bytes to write. Must be aligned. | |
344 | /// \param pc the IO priority class under which to queue this operation | |
20effc67 | 345 | /// \param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
346 | /// |
347 | /// \return a future representing the number of bytes actually written. A short | |
348 | /// write may happen due to an I/O error. | |
349 | template <typename CharType> | |
20effc67 TL |
350 | future<size_t> dma_write(uint64_t pos, const CharType* buffer, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept { |
351 | return dma_write_impl(pos, reinterpret_cast<const uint8_t*>(buffer), len, pc, intent); | |
11fdf7f2 TL |
352 | } |
353 | ||
354 | /// Performs a DMA write to the specified iovec. | |
355 | /// | |
f67539c2 | 356 | /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment. |
11fdf7f2 TL |
357 | /// \param iov vector of address/size pairs to write from. Addresses must be |
358 | /// aligned. | |
359 | /// \param pc the IO priority class under which to queue this operation | |
20effc67 | 360 | /// \param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
361 | /// |
362 | /// \return a future representing the number of bytes actually written. A short | |
363 | /// write may happen due to an I/O error. | |
20effc67 | 364 | future<size_t> dma_write(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept; |
11fdf7f2 TL |
365 | |
366 | /// Causes any previously written data to be made stable on persistent storage. | |
367 | /// | |
368 | /// Prior to a flush, written data may or may not survive a power failure. After | |
369 | /// a flush, data is guaranteed to be on disk. | |
f67539c2 | 370 | future<> flush() noexcept; |
11fdf7f2 TL |
371 | |
372 | /// Returns \c stat information about the file. | |
f67539c2 | 373 | future<struct stat> stat() noexcept; |
11fdf7f2 TL |
374 | |
375 | /// Truncates the file to a specified length. | |
f67539c2 | 376 | future<> truncate(uint64_t length) noexcept; |
11fdf7f2 TL |
377 | |
378 | /// Preallocate disk blocks for a specified byte range. | |
379 | /// | |
380 | /// Requests the file system to allocate disk blocks to | |
381 | /// back the specified range (\c length bytes starting at | |
382 | /// \c position). The range may be outside the current file | |
383 | /// size; the blocks can then be used when appending to the | |
384 | /// file. | |
385 | /// | |
386 | /// \param position beginning of the range at which to allocate | |
387 | /// blocks. | |
f67539c2 | 388 | /// \param length length of range to allocate. |
11fdf7f2 | 389 | /// \return future that becomes ready when the operation completes. |
f67539c2 | 390 | future<> allocate(uint64_t position, uint64_t length) noexcept; |
11fdf7f2 TL |
391 | |
392 | /// Discard unneeded data from the file. | |
393 | /// | |
394 | /// The discard operation tells the file system that a range of offsets | |
395 | /// (which be aligned) is no longer needed and can be reused. | |
f67539c2 | 396 | future<> discard(uint64_t offset, uint64_t length) noexcept; |
11fdf7f2 | 397 | |
20effc67 TL |
398 | /// Generic ioctl syscall support for special file handling. |
399 | /// | |
400 | /// This interface is useful for many non-standard operations on seastar::file. | |
401 | /// The examples can be - querying device or file system capabilities, | |
402 | /// configuring special performance or access modes on devices etc. | |
403 | /// Refer ioctl(2) man page for more details. | |
404 | /// | |
405 | /// \param cmd ioctl command to be executed | |
406 | /// \param argp pointer to the buffer which holds the argument | |
407 | /// | |
408 | /// \return a future containing the return value if any, or an exceptional future | |
409 | /// if the operation has failed. | |
410 | future<int> ioctl(uint64_t cmd, void* argp) noexcept; | |
411 | ||
412 | /// Performs a short ioctl syscall on seastar::file | |
413 | /// | |
414 | /// This is similar to generic \c ioctl; the difference is, here user indicates | |
415 | /// that this operation is a short one, and does not involve any i/o or locking. | |
416 | /// The \c file module will process this differently from the normal \ref ioctl(). | |
417 | /// Use this method only if the user is sure that the operation does not involve any | |
418 | /// blocking operation. If unsure, use the default \ref ioctl() method. | |
419 | /// Refer ioctl(2) man page for more details on ioctl operation. | |
420 | /// | |
421 | /// \param cmd ioctl command to be executed | |
422 | /// \param argp pointer to the buffer which holds the argument | |
423 | /// | |
424 | /// \return a future containing the return value if any, or an exceptional future | |
425 | /// if the operation has failed. | |
426 | future<int> ioctl_short(uint64_t cmd, void* argp) noexcept; | |
427 | ||
428 | /// Generic fcntl syscall support for special file handling. | |
429 | /// | |
430 | /// fcntl performs the operation specified by 'op' field on the file. | |
431 | /// Some of the use cases can be - setting file status flags, advisory record locking, | |
432 | /// managing signals, managing file leases or write hints etc. | |
433 | /// Refer fcntl(2) man page for more details. | |
434 | /// | |
435 | /// \param op the operation to be executed | |
436 | /// \param arg the optional argument | |
437 | /// \return a future containing the return value if any, or an exceptional future | |
438 | /// if the operation has failed | |
439 | future<int> fcntl(int op, uintptr_t arg = 0UL) noexcept; | |
440 | ||
441 | /// Performs a 'short' fcntl syscall on seastar::file | |
442 | /// | |
443 | /// This is similar to generic \c fcntl; the difference is, here user indicates | |
444 | /// that this operation is a short one, and does not involve any i/o or locking. | |
445 | /// The \c file module will process this differently from normal \ref fcntl(). | |
446 | /// Use this only if the user is sure that the operation does not involve any | |
447 | /// blocking operation. If unsure, use the default \ref fcntl() method. | |
448 | /// Refer fcntl(2) man page for more details on fcntl operation. | |
449 | /// | |
450 | /// \param op the operation to be executed | |
451 | /// \param arg the optional argument | |
452 | /// \return a future containing the return value if any, or an exceptional future | |
453 | /// if the operation has failed | |
454 | future<int> fcntl_short(int op, uintptr_t arg = 0UL) noexcept; | |
455 | ||
456 | /// Set a lifetime hint for the open file descriptor corresponding to seastar::file | |
457 | /// | |
458 | /// Write lifetime hints can be used to inform the kernel about the relative | |
459 | /// expected lifetime of writes on a given inode or via open file descriptor. | |
460 | /// An application may use the different hint values to separate writes into different | |
461 | /// write classes, so that multiple users or applications running on a single storage back-end | |
462 | /// can aggregate their I/O patterns in a consistent manner. | |
463 | /// Refer fcntl(2) man page for more details on write lifetime hints. | |
464 | /// | |
465 | /// \param hint the hint value of the stream | |
466 | /// \return future indicating success or failure | |
1e59de90 | 467 | [[deprecated("This API was removed from the kernel")]] |
20effc67 TL |
468 | future<> set_file_lifetime_hint(uint64_t hint) noexcept; |
469 | ||
470 | /// Set a lifetime hint for the inode corresponding to seastar::file | |
471 | /// | |
472 | /// Write lifetime hints can be used to inform the kernel about the relative | |
473 | /// expected lifetime of writes on a given inode or via open file descriptor. | |
474 | /// An application may use the different hint values to separate writes into different | |
475 | /// write classes, so that multiple users or applications running on a single storage back-end | |
476 | /// can aggregate their I/O patterns in a consistent manner. | |
477 | /// Refer fcntl(2) man page for more details on write lifetime hints. | |
478 | /// | |
479 | /// \param hint the hint value of the stream | |
480 | /// \return future indicating success or failure | |
481 | future<> set_inode_lifetime_hint(uint64_t hint) noexcept; | |
482 | ||
483 | /// Get the lifetime hint of the open file descriptor of seastar::file which was set by | |
484 | /// \ref set_file_lifetime_hint() | |
485 | /// | |
486 | /// Write lifetime hints can be used to inform the kernel about the relative | |
487 | /// expected lifetime of writes on a given inode or via open file descriptor. | |
488 | /// An application may use the different hint values to separate writes into different | |
489 | /// write classes, so that multiple users or applications running on a single storage back-end | |
490 | /// can aggregate their I/O patterns in a consistent manner. | |
491 | /// Refer fcntl(2) man page for more details on write lifetime hints. | |
492 | /// | |
493 | /// \return the hint value of the open file descriptor | |
1e59de90 | 494 | [[deprecated("This API was removed from the kernel")]] |
20effc67 TL |
495 | future<uint64_t> get_file_lifetime_hint() noexcept; |
496 | ||
497 | /// Get the lifetime hint of the inode of seastar::file which was set by | |
498 | /// \ref set_inode_lifetime_hint() | |
499 | /// | |
500 | /// Write lifetime hints can be used to inform the kernel about the relative | |
501 | /// expected lifetime of writes on a given inode or via open file descriptor. | |
502 | /// An application may use the different hint values to separate writes into different | |
503 | /// write classes, so that multiple users or applications running on a single storage back-end | |
504 | /// can aggregate their I/O patterns in a consistent manner. | |
505 | /// Refer fcntl(2) man page for more details on write lifetime hints. | |
506 | /// | |
507 | /// \return the hint value of the inode | |
508 | future<uint64_t> get_inode_lifetime_hint() noexcept; | |
509 | ||
11fdf7f2 | 510 | /// Gets the file size. |
f67539c2 | 511 | future<uint64_t> size() const noexcept; |
11fdf7f2 TL |
512 | |
513 | /// Closes the file. | |
514 | /// | |
515 | /// Flushes any pending operations and release any resources associated with | |
516 | /// the file (except for stable storage). | |
517 | /// | |
518 | /// \note | |
20effc67 TL |
519 | /// \c close() never fails. It just reports errors and swallows them. |
520 | /// To ensure file data reaches stable storage, you must call \ref flush() | |
11fdf7f2 | 521 | /// before calling \c close(). |
f67539c2 | 522 | future<> close() noexcept; |
11fdf7f2 TL |
523 | |
524 | /// Returns a directory listing, given that this file object is a directory. | |
f67539c2 | 525 | subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next); |
11fdf7f2 TL |
526 | |
527 | /** | |
528 | * Read a data bulk containing the provided addresses range that starts at | |
529 | * the given offset and ends at either the address aligned to | |
530 | * dma_alignment (4KB) or at the file end. | |
531 | * | |
532 | * @param offset starting address of the range the read bulk should contain | |
533 | * @param range_size size of the addresses range | |
534 | * @param pc the IO priority class under which to queue this operation | |
20effc67 | 535 | * @param intent the IO intention confirmation (\ref seastar::io_intent) |
11fdf7f2 TL |
536 | * |
537 | * @return temporary buffer containing the read data bulk. | |
f67539c2 TL |
538 | * or exceptional future holding: |
539 | * system_error exception in case of I/O error or eof_error when | |
11fdf7f2 TL |
540 | * "offset" is beyond EOF. |
541 | */ | |
542 | template <typename CharType> | |
543 | future<temporary_buffer<CharType>> | |
20effc67 TL |
544 | dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept { |
545 | return dma_read_bulk_impl(offset, range_size, pc, intent).then([] (temporary_buffer<uint8_t> t) { | |
11fdf7f2 TL |
546 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); |
547 | }); | |
548 | } | |
549 | ||
550 | /// \brief Creates a handle that can be transported across shards. | |
551 | /// | |
552 | /// Creates a handle that can be transported across shards, and then | |
553 | /// used to create a new shard-local \ref file object that refers to | |
554 | /// the same on-disk file. | |
555 | /// | |
556 | /// \note Use on read-only files. | |
557 | /// | |
558 | file_handle dup(); | |
11fdf7f2 | 559 | private: |
f67539c2 | 560 | future<temporary_buffer<uint8_t>> |
20effc67 | 561 | dma_read_bulk_impl(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept; |
f67539c2 TL |
562 | |
563 | future<size_t> | |
20effc67 | 564 | dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept; |
f67539c2 TL |
565 | |
566 | future<temporary_buffer<uint8_t>> | |
20effc67 | 567 | dma_read_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept; |
f67539c2 TL |
568 | |
569 | future<size_t> | |
20effc67 | 570 | dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, const io_priority_class& pc, io_intent* intent) noexcept; |
f67539c2 TL |
571 | |
572 | future<temporary_buffer<uint8_t>> | |
20effc67 TL |
573 | dma_read_exactly_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept; |
574 | ||
575 | future<uint64_t> get_lifetime_hint_impl(int op) noexcept; | |
576 | future<> set_lifetime_hint_impl(int op, uint64_t hint) noexcept; | |
f67539c2 | 577 | |
11fdf7f2 TL |
578 | friend class reactor; |
579 | friend class file_impl; | |
580 | }; | |
581 | ||
f67539c2 TL |
582 | /// \brief Helper for ensuring a file is closed after \c func is called. |
583 | /// | |
584 | /// The file provided by the \c file_fut future is passed to \c func. | |
585 | /// | |
586 | /// \param file_fut A future that produces a file | |
587 | /// \param func A function that uses a file | |
588 | /// \returns the future returned by \c func, or an exceptional future if either \c file_fut or closing the file failed. | |
589 | template <typename Func> | |
590 | SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> ) | |
591 | auto with_file(future<file> file_fut, Func func) noexcept { | |
592 | static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw"); | |
593 | return file_fut.then([func = std::move(func)] (file f) mutable { | |
594 | return do_with(std::move(f), [func = std::move(func)] (file& f) mutable { | |
595 | return futurize_invoke(func, f).finally([&f] { | |
596 | return f.close(); | |
597 | }); | |
598 | }); | |
599 | }); | |
600 | } | |
601 | ||
602 | /// \brief Helper for ensuring a file is closed if \c func fails. | |
603 | /// | |
604 | /// The file provided by the \c file_fut future is passed to \c func. | |
605 | /// * If func throws an exception E, the file is closed and we return | |
606 | /// a failed future with E. | |
607 | /// * If func returns a value V, the file is not closed and we return | |
608 | /// a future with V. | |
609 | /// Note that when an exception is not thrown, it is the | |
610 | /// responsibility of func to make sure the file will be closed. It | |
611 | /// can close the file itself, return it, or store it somewhere. | |
612 | /// | |
613 | /// \param file_fut A future that produces a file | |
614 | /// \param func A function that uses a file | |
615 | /// \returns the future returned by \c func, or an exceptional future if \c file_fut failed or a nested exception if closing the file failed. | |
616 | template <typename Func> | |
617 | SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> ) | |
618 | auto with_file_close_on_failure(future<file> file_fut, Func func) noexcept { | |
619 | static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw"); | |
620 | return file_fut.then([func = std::move(func)] (file f) mutable { | |
621 | return do_with(std::move(f), [func = std::move(func)] (file& f) mutable { | |
622 | return futurize_invoke(std::move(func), f).then_wrapped([&f] (auto ret) mutable { | |
623 | if (!ret.failed()) { | |
624 | return ret; | |
625 | } | |
626 | return ret.finally([&f] { | |
627 | // If f.close() fails, return that as nested exception. | |
628 | return f.close(); | |
629 | }); | |
630 | }); | |
631 | }); | |
632 | }); | |
633 | } | |
634 | ||
635 | /// \example file_demo.cc | |
636 | /// A program demonstrating the use of \ref seastar::with_file | |
637 | /// and \ref seastar::with_file_close_on_failure | |
638 | ||
11fdf7f2 TL |
639 | /// \brief A shard-transportable handle to a file |
640 | /// | |
641 | /// If you need to access a file (for reads only) across multiple shards, | |
642 | /// you can use the file::dup() method to create a `file_handle`, transport | |
643 | /// this file handle to another shard, and use the handle to create \ref file | |
644 | /// object on that shard. This is more efficient than calling open_file_dma() | |
645 | /// again. | |
646 | class file_handle { | |
647 | std::unique_ptr<file_handle_impl> _impl; | |
648 | private: | |
649 | explicit file_handle(std::unique_ptr<file_handle_impl> impl) : _impl(std::move(impl)) {} | |
650 | public: | |
651 | /// Copies a file handle object | |
652 | file_handle(const file_handle&); | |
653 | /// Moves a file handle object | |
654 | file_handle(file_handle&&) noexcept; | |
655 | /// Assigns a file handle object | |
656 | file_handle& operator=(const file_handle&); | |
657 | /// Move-assigns a file handle object | |
658 | file_handle& operator=(file_handle&&) noexcept; | |
659 | /// Converts the file handle object to a \ref file. | |
660 | file to_file() const &; | |
661 | /// Converts the file handle object to a \ref file. | |
662 | file to_file() &&; | |
663 | ||
664 | friend class file; | |
665 | }; | |
666 | ||
20effc67 | 667 | /// @} |
11fdf7f2 | 668 | |
20effc67 TL |
669 | /// An exception Cancelled IOs resolve their future into (see \ref io_intent "io_intent") |
670 | class cancelled_error : public std::exception { | |
11fdf7f2 | 671 | public: |
20effc67 TL |
672 | virtual const char* what() const noexcept { |
673 | return "cancelled"; | |
674 | } | |
11fdf7f2 TL |
675 | }; |
676 | ||
11fdf7f2 | 677 | } |