]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright 2015 Cloudius Systems | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
f67539c2 | 24 | #include <seastar/core/do_with.hh> |
11fdf7f2 TL |
25 | #include <seastar/core/stream.hh> |
26 | #include <seastar/core/sstring.hh> | |
27 | #include <seastar/core/shared_ptr.hh> | |
28 | #include <seastar/core/align.hh> | |
11fdf7f2 | 29 | #include <seastar/core/fair_queue.hh> |
9f95a23c | 30 | #include <seastar/core/file-types.hh> |
11fdf7f2 TL |
31 | #include <seastar/util/std-compat.hh> |
32 | #include <system_error> | |
11fdf7f2 TL |
33 | #include <sys/statvfs.h> |
34 | #include <sys/ioctl.h> | |
35 | #include <linux/fs.h> | |
36 | #include <sys/uio.h> | |
37 | #include <unistd.h> | |
38 | ||
39 | namespace seastar { | |
40 | ||
41 | /// \addtogroup fileio-module | |
42 | /// @{ | |
43 | ||
11fdf7f2 TL |
44 | /// A directory entry being listed. |
45 | struct directory_entry { | |
46 | /// Name of the file in a directory entry. Will never be "." or "..". Only the last component is included. | |
47 | sstring name; | |
48 | /// Type of the directory entry, if known. | |
f67539c2 | 49 | std::optional<directory_entry_type> type; |
11fdf7f2 TL |
50 | }; |
51 | ||
9f95a23c TL |
52 | /// Filesystem object stat information |
53 | struct stat_data { | |
54 | uint64_t device_id; // ID of device containing file | |
55 | uint64_t inode_number; // Inode number | |
56 | uint64_t mode; // File type and mode | |
57 | directory_entry_type type; | |
58 | uint64_t number_of_links;// Number of hard links | |
59 | uint64_t uid; // User ID of owner | |
60 | uint64_t gid; // Group ID of owner | |
61 | uint64_t rdev; // Device ID (if special file) | |
62 | uint64_t size; // Total size, in bytes | |
63 | uint64_t block_size; // Block size for filesystem I/O | |
64 | uint64_t allocated_size; // Total size of allocated storage, in bytes | |
65 | ||
66 | std::chrono::system_clock::time_point time_accessed; // Time of last content access | |
67 | std::chrono::system_clock::time_point time_modified; // Time of last content modification | |
68 | std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes) | |
69 | }; | |
70 | ||
11fdf7f2 TL |
71 | /// File open options |
72 | /// | |
73 | /// Options used to configure an open file. | |
74 | /// | |
75 | /// \ref file | |
76 | struct file_open_options { | |
77 | uint64_t extent_allocation_size_hint = 1 << 20; ///< Allocate this much disk space when extending the file | |
78 | bool sloppy_size = false; ///< Allow the file size not to track the amount of data written until a flush | |
79 | uint64_t sloppy_size_hint = 1 << 20; ///< Hint as to what the eventual file size will be | |
9f95a23c | 80 | file_permissions create_permissions = file_permissions::default_file_permissions; ///< File permissions to use when creating a file |
11fdf7f2 TL |
81 | }; |
82 | ||
83 | /// \cond internal | |
84 | class io_queue; | |
9f95a23c | 85 | using io_priority_class_id = unsigned; |
11fdf7f2 | 86 | class io_priority_class { |
9f95a23c | 87 | io_priority_class_id _id; |
11fdf7f2 | 88 | friend io_queue; |
9f95a23c | 89 | |
f67539c2 TL |
90 | io_priority_class() = delete; |
91 | explicit io_priority_class(io_priority_class_id id) noexcept | |
9f95a23c TL |
92 | : _id(id) |
93 | { } | |
94 | ||
11fdf7f2 | 95 | public: |
9f95a23c TL |
96 | io_priority_class_id id() const { |
97 | return _id; | |
11fdf7f2 TL |
98 | } |
99 | }; | |
100 | ||
101 | const io_priority_class& default_priority_class(); | |
102 | ||
103 | class file; | |
104 | class file_impl; | |
105 | ||
106 | class file_handle; | |
107 | ||
108 | // A handle that can be transported across shards and used to | |
109 | // create a dup(2)-like `file` object referring to the same underlying file | |
110 | class file_handle_impl { | |
111 | public: | |
112 | virtual ~file_handle_impl() = default; | |
113 | virtual std::unique_ptr<file_handle_impl> clone() const = 0; | |
114 | virtual shared_ptr<file_impl> to_file() && = 0; | |
115 | }; | |
116 | ||
117 | class file_impl { | |
118 | protected: | |
119 | static file_impl* get_file_impl(file& f); | |
120 | public: | |
121 | unsigned _memory_dma_alignment = 4096; | |
122 | unsigned _disk_read_dma_alignment = 4096; | |
123 | unsigned _disk_write_dma_alignment = 4096; | |
124 | public: | |
125 | virtual ~file_impl() {} | |
126 | ||
127 | virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) = 0; | |
128 | virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0; | |
129 | virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) = 0; | |
130 | virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0; | |
131 | virtual future<> flush(void) = 0; | |
132 | virtual future<struct stat> stat(void) = 0; | |
133 | virtual future<> truncate(uint64_t length) = 0; | |
134 | virtual future<> discard(uint64_t offset, uint64_t length) = 0; | |
135 | virtual future<> allocate(uint64_t position, uint64_t length) = 0; | |
136 | virtual future<uint64_t> size(void) = 0; | |
137 | virtual future<> close() = 0; | |
138 | virtual std::unique_ptr<file_handle_impl> dup(); | |
139 | virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) = 0; | |
140 | virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) = 0; | |
141 | ||
142 | friend class reactor; | |
143 | }; | |
144 | ||
f67539c2 TL |
145 | future<shared_ptr<file_impl>> make_file_impl(int fd, file_open_options options, int oflags) noexcept; |
146 | ||
11fdf7f2 TL |
147 | /// \endcond |
148 | ||
149 | /// A data file on persistent storage. | |
150 | /// | |
151 | /// File objects represent uncached, unbuffered files. As such great care | |
152 | /// must be taken to cache data at the application layer; neither seastar | |
153 | /// nor the OS will cache these file. | |
154 | /// | |
155 | /// Data is transferred using direct memory access (DMA). This imposes | |
156 | /// restrictions on file offsets and data pointers. The former must be aligned | |
157 | /// on a 4096 byte boundary, while a 512 byte boundary suffices for the latter. | |
158 | class file { | |
159 | shared_ptr<file_impl> _file_impl; | |
11fdf7f2 TL |
160 | public: |
161 | /// Default constructor constructs an uninitialized file object. | |
162 | /// | |
163 | /// A default constructor is useful for the common practice of declaring | |
164 | /// a variable, and only assigning to it later. The uninitialized file | |
165 | /// must not be used, or undefined behavior will result (currently, a null | |
166 | /// pointer dereference). | |
167 | /// | |
168 | /// One can check whether a file object is in uninitialized state with | |
169 | /// \ref operator bool(); One can reset a file back to uninitialized state | |
170 | /// by assigning file() to it. | |
f67539c2 | 171 | file() noexcept : _file_impl(nullptr) {} |
11fdf7f2 | 172 | |
f67539c2 | 173 | file(shared_ptr<file_impl> impl) noexcept |
11fdf7f2 TL |
174 | : _file_impl(std::move(impl)) {} |
175 | ||
176 | /// Constructs a file object from a \ref file_handle obtained from another shard | |
f67539c2 | 177 | explicit file(file_handle&& handle) noexcept; |
11fdf7f2 TL |
178 | |
179 | /// Checks whether the file object was initialized. | |
180 | /// | |
181 | /// \return false if the file object is uninitialized (default | |
182 | /// constructed), true if the file object refers to an actual file. | |
183 | explicit operator bool() const noexcept { return bool(_file_impl); } | |
184 | ||
185 | /// Copies a file object. The new and old objects refer to the | |
186 | /// same underlying file. | |
187 | /// | |
188 | /// \param x file object to be copied | |
189 | file(const file& x) = default; | |
190 | /// Moves a file object. | |
191 | file(file&& x) noexcept : _file_impl(std::move(x._file_impl)) {} | |
192 | /// Assigns a file object. After assignent, the destination and source refer | |
193 | /// to the same underlying file. | |
194 | /// | |
195 | /// \param x file object to assign to `this`. | |
196 | file& operator=(const file& x) noexcept = default; | |
197 | /// Moves assigns a file object. | |
198 | file& operator=(file&& x) noexcept = default; | |
199 | ||
200 | // O_DIRECT reading requires that buffer, offset, and read length, are | |
201 | // all aligned. Alignment of 4096 was necessary in the past, but no longer | |
202 | // is - 512 is usually enough; But we'll need to use BLKSSZGET ioctl to | |
203 | // be sure it is really enough on this filesystem. 4096 is always safe. | |
204 | // In addition, if we start reading in things outside page boundaries, | |
205 | // we will end up with various pages around, some of them with | |
206 | // overlapping ranges. Those would be very challenging to cache. | |
207 | ||
208 | /// Alignment requirement for file offsets (for reads) | |
f67539c2 | 209 | uint64_t disk_read_dma_alignment() const noexcept { |
11fdf7f2 TL |
210 | return _file_impl->_disk_read_dma_alignment; |
211 | } | |
212 | ||
213 | /// Alignment requirement for file offsets (for writes) | |
f67539c2 | 214 | uint64_t disk_write_dma_alignment() const noexcept { |
11fdf7f2 TL |
215 | return _file_impl->_disk_write_dma_alignment; |
216 | } | |
217 | ||
218 | /// Alignment requirement for data buffers | |
f67539c2 | 219 | uint64_t memory_dma_alignment() const noexcept { |
11fdf7f2 TL |
220 | return _file_impl->_memory_dma_alignment; |
221 | } | |
222 | ||
223 | ||
224 | /** | |
225 | * Perform a single DMA read operation. | |
226 | * | |
227 | * @param aligned_pos offset to begin reading at (should be aligned) | |
228 | * @param aligned_buffer output buffer (should be aligned) | |
229 | * @param aligned_len number of bytes to read (should be aligned) | |
230 | * @param pc the IO priority class under which to queue this operation | |
231 | * | |
232 | * Alignment is HW dependent but use 4KB alignment to be on the safe side as | |
233 | * explained above. | |
234 | * | |
235 | * @return number of bytes actually read | |
f67539c2 | 236 | * or exceptional future in case of I/O error |
11fdf7f2 TL |
237 | */ |
238 | template <typename CharType> | |
239 | future<size_t> | |
f67539c2 TL |
240 | dma_read(uint64_t aligned_pos, CharType* aligned_buffer, size_t aligned_len, const io_priority_class& pc = default_priority_class()) noexcept { |
241 | return dma_read_impl(aligned_pos, reinterpret_cast<uint8_t*>(aligned_buffer), aligned_len, pc); | |
11fdf7f2 TL |
242 | } |
243 | ||
244 | /** | |
245 | * Read the requested amount of bytes starting from the given offset. | |
246 | * | |
247 | * @param pos offset to begin reading from | |
248 | * @param len number of bytes to read | |
249 | * @param pc the IO priority class under which to queue this operation | |
250 | * | |
251 | * @return temporary buffer containing the requested data. | |
f67539c2 | 252 | * or exceptional future in case of I/O error |
11fdf7f2 TL |
253 | * |
254 | * This function doesn't require any alignment for both "pos" and "len" | |
255 | * | |
256 | * @note size of the returned buffer may be smaller than "len" if EOF is | |
f67539c2 | 257 | * reached or in case of I/O error. |
11fdf7f2 TL |
258 | */ |
259 | template <typename CharType> | |
f67539c2 TL |
260 | future<temporary_buffer<CharType>> dma_read(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class()) noexcept { |
261 | return dma_read_impl(pos, len, pc).then([] (temporary_buffer<uint8_t> t) { | |
262 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); | |
11fdf7f2 TL |
263 | }); |
264 | } | |
265 | ||
266 | /// Error thrown when attempting to read past end-of-file | |
267 | /// with \ref dma_read_exactly(). | |
268 | class eof_error : public std::exception {}; | |
269 | ||
270 | /** | |
271 | * Read the exact amount of bytes. | |
272 | * | |
273 | * @param pos offset in a file to begin reading from | |
274 | * @param len number of bytes to read | |
275 | * @param pc the IO priority class under which to queue this operation | |
276 | * | |
277 | * @return temporary buffer containing the read data | |
f67539c2 TL |
278 | * or exceptional future in case an error, holding: |
279 | * end_of_file_error if EOF is reached, file_io_error or | |
11fdf7f2 TL |
280 | * std::system_error in case of I/O error. |
281 | */ | |
282 | template <typename CharType> | |
283 | future<temporary_buffer<CharType>> | |
f67539c2 TL |
284 | dma_read_exactly(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class()) noexcept { |
285 | return dma_read_exactly_impl(pos, len, pc).then([] (temporary_buffer<uint8_t> t) { | |
286 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); | |
11fdf7f2 TL |
287 | }); |
288 | } | |
289 | ||
290 | /// Performs a DMA read into the specified iovec. | |
291 | /// | |
f67539c2 | 292 | /// \param pos offset to read from. Must be aligned to \ref disk_read_dma_alignment. |
11fdf7f2 TL |
293 | /// \param iov vector of address/size pairs to read into. Addresses must be |
294 | /// aligned. | |
295 | /// \param pc the IO priority class under which to queue this operation | |
296 | /// | |
297 | /// \return a future representing the number of bytes actually read. A short | |
298 | /// read may happen due to end-of-file or an I/O error. | |
f67539c2 | 299 | future<size_t> dma_read(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class()) noexcept; |
11fdf7f2 TL |
300 | |
301 | /// Performs a DMA write from the specified buffer. | |
302 | /// | |
f67539c2 | 303 | /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment. |
11fdf7f2 TL |
304 | /// \param buffer aligned address of buffer to read from. Buffer must exists |
305 | /// until the future is made ready. | |
306 | /// \param len number of bytes to write. Must be aligned. | |
307 | /// \param pc the IO priority class under which to queue this operation | |
308 | /// | |
309 | /// \return a future representing the number of bytes actually written. A short | |
310 | /// write may happen due to an I/O error. | |
311 | template <typename CharType> | |
f67539c2 TL |
312 | future<size_t> dma_write(uint64_t pos, const CharType* buffer, size_t len, const io_priority_class& pc = default_priority_class()) noexcept { |
313 | return dma_write_impl(pos, reinterpret_cast<const uint8_t*>(buffer), len, pc); | |
11fdf7f2 TL |
314 | } |
315 | ||
316 | /// Performs a DMA write to the specified iovec. | |
317 | /// | |
f67539c2 | 318 | /// \param pos offset to write into. Must be aligned to \ref disk_write_dma_alignment. |
11fdf7f2 TL |
319 | /// \param iov vector of address/size pairs to write from. Addresses must be |
320 | /// aligned. | |
321 | /// \param pc the IO priority class under which to queue this operation | |
322 | /// | |
323 | /// \return a future representing the number of bytes actually written. A short | |
324 | /// write may happen due to an I/O error. | |
f67539c2 | 325 | future<size_t> dma_write(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class()) noexcept; |
11fdf7f2 TL |
326 | |
327 | /// Causes any previously written data to be made stable on persistent storage. | |
328 | /// | |
329 | /// Prior to a flush, written data may or may not survive a power failure. After | |
330 | /// a flush, data is guaranteed to be on disk. | |
f67539c2 | 331 | future<> flush() noexcept; |
11fdf7f2 TL |
332 | |
333 | /// Returns \c stat information about the file. | |
f67539c2 | 334 | future<struct stat> stat() noexcept; |
11fdf7f2 TL |
335 | |
336 | /// Truncates the file to a specified length. | |
f67539c2 | 337 | future<> truncate(uint64_t length) noexcept; |
11fdf7f2 TL |
338 | |
339 | /// Preallocate disk blocks for a specified byte range. | |
340 | /// | |
341 | /// Requests the file system to allocate disk blocks to | |
342 | /// back the specified range (\c length bytes starting at | |
343 | /// \c position). The range may be outside the current file | |
344 | /// size; the blocks can then be used when appending to the | |
345 | /// file. | |
346 | /// | |
347 | /// \param position beginning of the range at which to allocate | |
348 | /// blocks. | |
f67539c2 | 349 | /// \param length length of range to allocate. |
11fdf7f2 | 350 | /// \return future that becomes ready when the operation completes. |
f67539c2 | 351 | future<> allocate(uint64_t position, uint64_t length) noexcept; |
11fdf7f2 TL |
352 | |
353 | /// Discard unneeded data from the file. | |
354 | /// | |
355 | /// The discard operation tells the file system that a range of offsets | |
356 | /// (which be aligned) is no longer needed and can be reused. | |
f67539c2 | 357 | future<> discard(uint64_t offset, uint64_t length) noexcept; |
11fdf7f2 TL |
358 | |
359 | /// Gets the file size. | |
f67539c2 | 360 | future<uint64_t> size() const noexcept; |
11fdf7f2 TL |
361 | |
362 | /// Closes the file. | |
363 | /// | |
364 | /// Flushes any pending operations and release any resources associated with | |
365 | /// the file (except for stable storage). | |
366 | /// | |
367 | /// \note | |
368 | /// to ensure file data reaches stable storage, you must call \ref flush() | |
369 | /// before calling \c close(). | |
f67539c2 | 370 | future<> close() noexcept; |
11fdf7f2 TL |
371 | |
372 | /// Returns a directory listing, given that this file object is a directory. | |
f67539c2 | 373 | subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next); |
11fdf7f2 TL |
374 | |
375 | /** | |
376 | * Read a data bulk containing the provided addresses range that starts at | |
377 | * the given offset and ends at either the address aligned to | |
378 | * dma_alignment (4KB) or at the file end. | |
379 | * | |
380 | * @param offset starting address of the range the read bulk should contain | |
381 | * @param range_size size of the addresses range | |
382 | * @param pc the IO priority class under which to queue this operation | |
383 | * | |
384 | * @return temporary buffer containing the read data bulk. | |
f67539c2 TL |
385 | * or exceptional future holding: |
386 | * system_error exception in case of I/O error or eof_error when | |
11fdf7f2 TL |
387 | * "offset" is beyond EOF. |
388 | */ | |
389 | template <typename CharType> | |
390 | future<temporary_buffer<CharType>> | |
f67539c2 TL |
391 | dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc = default_priority_class()) noexcept { |
392 | return dma_read_bulk_impl(offset, range_size, pc).then([] (temporary_buffer<uint8_t> t) { | |
11fdf7f2 TL |
393 | return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release()); |
394 | }); | |
395 | } | |
396 | ||
397 | /// \brief Creates a handle that can be transported across shards. | |
398 | /// | |
399 | /// Creates a handle that can be transported across shards, and then | |
400 | /// used to create a new shard-local \ref file object that refers to | |
401 | /// the same on-disk file. | |
402 | /// | |
403 | /// \note Use on read-only files. | |
404 | /// | |
405 | file_handle dup(); | |
406 | ||
407 | template <typename CharType> | |
408 | struct read_state; | |
409 | private: | |
f67539c2 TL |
410 | future<temporary_buffer<uint8_t>> |
411 | dma_read_bulk_impl(uint64_t offset, size_t range_size, const io_priority_class& pc) noexcept; | |
412 | ||
413 | future<size_t> | |
414 | dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, const io_priority_class& pc) noexcept; | |
415 | ||
416 | future<temporary_buffer<uint8_t>> | |
417 | dma_read_impl(uint64_t pos, size_t len, const io_priority_class& pc) noexcept; | |
418 | ||
419 | future<size_t> | |
420 | dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, const io_priority_class& pc) noexcept; | |
421 | ||
422 | future<temporary_buffer<uint8_t>> | |
423 | dma_read_exactly_impl(uint64_t pos, size_t len, const io_priority_class& pc) noexcept; | |
424 | ||
11fdf7f2 TL |
425 | friend class reactor; |
426 | friend class file_impl; | |
427 | }; | |
428 | ||
f67539c2 TL |
429 | /// \brief Helper for ensuring a file is closed after \c func is called. |
430 | /// | |
431 | /// The file provided by the \c file_fut future is passed to \c func. | |
432 | /// | |
433 | /// \param file_fut A future that produces a file | |
434 | /// \param func A function that uses a file | |
435 | /// \returns the future returned by \c func, or an exceptional future if either \c file_fut or closing the file failed. | |
436 | template <typename Func> | |
437 | SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> ) | |
438 | auto with_file(future<file> file_fut, Func func) noexcept { | |
439 | static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw"); | |
440 | return file_fut.then([func = std::move(func)] (file f) mutable { | |
441 | return do_with(std::move(f), [func = std::move(func)] (file& f) mutable { | |
442 | return futurize_invoke(func, f).finally([&f] { | |
443 | return f.close(); | |
444 | }); | |
445 | }); | |
446 | }); | |
447 | } | |
448 | ||
449 | /// \brief Helper for ensuring a file is closed if \c func fails. | |
450 | /// | |
451 | /// The file provided by the \c file_fut future is passed to \c func. | |
452 | /// * If func throws an exception E, the file is closed and we return | |
453 | /// a failed future with E. | |
454 | /// * If func returns a value V, the file is not closed and we return | |
455 | /// a future with V. | |
456 | /// Note that when an exception is not thrown, it is the | |
457 | /// responsibility of func to make sure the file will be closed. It | |
458 | /// can close the file itself, return it, or store it somewhere. | |
459 | /// | |
460 | /// \param file_fut A future that produces a file | |
461 | /// \param func A function that uses a file | |
462 | /// \returns the future returned by \c func, or an exceptional future if \c file_fut failed or a nested exception if closing the file failed. | |
463 | template <typename Func> | |
464 | SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> ) | |
465 | auto with_file_close_on_failure(future<file> file_fut, Func func) noexcept { | |
466 | static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw"); | |
467 | return file_fut.then([func = std::move(func)] (file f) mutable { | |
468 | return do_with(std::move(f), [func = std::move(func)] (file& f) mutable { | |
469 | return futurize_invoke(std::move(func), f).then_wrapped([&f] (auto ret) mutable { | |
470 | if (!ret.failed()) { | |
471 | return ret; | |
472 | } | |
473 | return ret.finally([&f] { | |
474 | // If f.close() fails, return that as nested exception. | |
475 | return f.close(); | |
476 | }); | |
477 | }); | |
478 | }); | |
479 | }); | |
480 | } | |
481 | ||
482 | /// \example file_demo.cc | |
483 | /// A program demonstrating the use of \ref seastar::with_file | |
484 | /// and \ref seastar::with_file_close_on_failure | |
485 | ||
11fdf7f2 TL |
486 | /// \brief A shard-transportable handle to a file |
487 | /// | |
488 | /// If you need to access a file (for reads only) across multiple shards, | |
489 | /// you can use the file::dup() method to create a `file_handle`, transport | |
490 | /// this file handle to another shard, and use the handle to create \ref file | |
491 | /// object on that shard. This is more efficient than calling open_file_dma() | |
492 | /// again. | |
493 | class file_handle { | |
494 | std::unique_ptr<file_handle_impl> _impl; | |
495 | private: | |
496 | explicit file_handle(std::unique_ptr<file_handle_impl> impl) : _impl(std::move(impl)) {} | |
497 | public: | |
498 | /// Copies a file handle object | |
499 | file_handle(const file_handle&); | |
500 | /// Moves a file handle object | |
501 | file_handle(file_handle&&) noexcept; | |
502 | /// Assigns a file handle object | |
503 | file_handle& operator=(const file_handle&); | |
504 | /// Move-assigns a file handle object | |
505 | file_handle& operator=(file_handle&&) noexcept; | |
506 | /// Converts the file handle object to a \ref file. | |
507 | file to_file() const &; | |
508 | /// Converts the file handle object to a \ref file. | |
509 | file to_file() &&; | |
510 | ||
511 | friend class file; | |
512 | }; | |
513 | ||
514 | /// \cond internal | |
515 | ||
516 | template <typename CharType> | |
517 | struct file::read_state { | |
518 | typedef temporary_buffer<CharType> tmp_buf_type; | |
519 | ||
520 | read_state(uint64_t offset, uint64_t front, size_t to_read, | |
521 | size_t memory_alignment, size_t disk_alignment) | |
522 | : buf(tmp_buf_type::aligned(memory_alignment, | |
523 | align_up(to_read, disk_alignment))) | |
524 | , _offset(offset) | |
525 | , _to_read(to_read) | |
526 | , _front(front) {} | |
527 | ||
528 | bool done() const { | |
529 | return eof || pos >= _to_read; | |
530 | } | |
531 | ||
532 | /** | |
533 | * Trim the buffer to the actual number of read bytes and cut the | |
534 | * bytes from offset 0 till "_front". | |
535 | * | |
536 | * @note this function has to be called only if we read bytes beyond | |
537 | * "_front". | |
538 | */ | |
539 | void trim_buf_before_ret() { | |
540 | if (have_good_bytes()) { | |
541 | buf.trim(pos); | |
542 | buf.trim_front(_front); | |
543 | } else { | |
544 | buf.trim(0); | |
545 | } | |
546 | } | |
547 | ||
548 | uint64_t cur_offset() const { | |
549 | return _offset + pos; | |
550 | } | |
551 | ||
552 | size_t left_space() const { | |
553 | return buf.size() - pos; | |
554 | } | |
555 | ||
556 | size_t left_to_read() const { | |
557 | // positive as long as (done() == false) | |
558 | return _to_read - pos; | |
559 | } | |
560 | ||
561 | void append_new_data(tmp_buf_type& new_data) { | |
562 | auto to_copy = std::min(left_space(), new_data.size()); | |
563 | ||
564 | std::memcpy(buf.get_write() + pos, new_data.get(), to_copy); | |
565 | pos += to_copy; | |
566 | } | |
567 | ||
568 | bool have_good_bytes() const { | |
569 | return pos > _front; | |
570 | } | |
571 | ||
572 | public: | |
573 | bool eof = false; | |
574 | tmp_buf_type buf; | |
575 | size_t pos = 0; | |
576 | private: | |
577 | uint64_t _offset; | |
578 | size_t _to_read; | |
579 | uint64_t _front; | |
580 | }; | |
581 | ||
582 | /// \endcond | |
583 | ||
584 | /// @} | |
585 | ||
586 | } |