]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/io/interfaces.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / io / interfaces.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24
25 #include "arrow/io/type_fwd.h"
26 #include "arrow/type_fwd.h"
27 #include "arrow/util/cancel.h"
28 #include "arrow/util/macros.h"
29 #include "arrow/util/string_view.h"
30 #include "arrow/util/type_fwd.h"
31 #include "arrow/util/visibility.h"
32
33 namespace arrow {
34 namespace io {
35
36 struct ReadRange {
37 int64_t offset;
38 int64_t length;
39
40 friend bool operator==(const ReadRange& left, const ReadRange& right) {
41 return (left.offset == right.offset && left.length == right.length);
42 }
43 friend bool operator!=(const ReadRange& left, const ReadRange& right) {
44 return !(left == right);
45 }
46
47 bool Contains(const ReadRange& other) const {
48 return (offset <= other.offset && offset + length >= other.offset + other.length);
49 }
50 };
51
52 /// EXPERIMENTAL: options provider for IO tasks
53 ///
54 /// Includes an Executor (which will be used to execute asynchronous reads),
55 /// a MemoryPool (which will be used to allocate buffers when zero copy reads
56 /// are not possible), and an external id (in case the executor receives tasks from
57 /// multiple sources and must distinguish tasks associated with this IOContext).
58 struct ARROW_EXPORT IOContext {
59 // No specified executor: will use a global IO thread pool
60 IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {}
61
62 explicit IOContext(StopToken stop_token)
63 : IOContext(default_memory_pool(), std::move(stop_token)) {}
64
65 explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable());
66
67 explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
68 StopToken stop_token = StopToken::Unstoppable(),
69 int64_t external_id = -1)
70 : pool_(pool),
71 executor_(executor),
72 external_id_(external_id),
73 stop_token_(std::move(stop_token)) {}
74
75 explicit IOContext(::arrow::internal::Executor* executor,
76 StopToken stop_token = StopToken::Unstoppable(),
77 int64_t external_id = -1)
78 : pool_(default_memory_pool()),
79 executor_(executor),
80 external_id_(external_id),
81 stop_token_(std::move(stop_token)) {}
82
83 MemoryPool* pool() const { return pool_; }
84
85 ::arrow::internal::Executor* executor() const { return executor_; }
86
87 // An application-specific ID, forwarded to executor task submissions
88 int64_t external_id() const { return external_id_; }
89
90 StopToken stop_token() const { return stop_token_; }
91
92 private:
93 MemoryPool* pool_;
94 ::arrow::internal::Executor* executor_;
95 int64_t external_id_;
96 StopToken stop_token_;
97 };
98
99 struct ARROW_DEPRECATED("renamed to IOContext in 4.0.0") AsyncContext : public IOContext {
100 using IOContext::IOContext;
101 };
102
103 class ARROW_EXPORT FileInterface {
104 public:
105 virtual ~FileInterface() = 0;
106
107 /// \brief Close the stream cleanly
108 ///
109 /// For writable streams, this will attempt to flush any pending data
110 /// before releasing the underlying resource.
111 ///
112 /// After Close() is called, closed() returns true and the stream is not
113 /// available for further operations.
114 virtual Status Close() = 0;
115
116 /// \brief Close the stream abruptly
117 ///
118 /// This method does not guarantee that any pending data is flushed.
119 /// It merely releases any underlying resource used by the stream for
120 /// its operation.
121 ///
122 /// After Abort() is called, closed() returns true and the stream is not
123 /// available for further operations.
124 virtual Status Abort();
125
126 /// \brief Return the position in this stream
127 virtual Result<int64_t> Tell() const = 0;
128
129 /// \brief Return whether the stream is closed
130 virtual bool closed() const = 0;
131
132 FileMode::type mode() const { return mode_; }
133
134 protected:
135 FileInterface() : mode_(FileMode::READ) {}
136 FileMode::type mode_;
137 void set_mode(FileMode::type mode) { mode_ = mode; }
138
139 private:
140 ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
141 };
142
143 class ARROW_EXPORT Seekable {
144 public:
145 virtual ~Seekable() = default;
146 virtual Status Seek(int64_t position) = 0;
147 };
148
149 class ARROW_EXPORT Writable {
150 public:
151 virtual ~Writable() = default;
152
153 /// \brief Write the given data to the stream
154 ///
155 /// This method always processes the bytes in full. Depending on the
156 /// semantics of the stream, the data may be written out immediately,
157 /// held in a buffer, or written asynchronously. In the case where
158 /// the stream buffers the data, it will be copied. To avoid potentially
159 /// large copies, use the Write variant that takes an owned Buffer.
160 virtual Status Write(const void* data, int64_t nbytes) = 0;
161
162 /// \brief Write the given data to the stream
163 ///
164 /// Since the Buffer owns its memory, this method can avoid a copy if
165 /// buffering is required. See Write(const void*, int64_t) for details.
166 virtual Status Write(const std::shared_ptr<Buffer>& data);
167
168 /// \brief Flush buffered bytes, if any
169 virtual Status Flush();
170
171 Status Write(util::string_view data);
172 };
173
174 class ARROW_EXPORT Readable {
175 public:
176 virtual ~Readable() = default;
177
178 /// \brief Read data from current file position.
179 ///
180 /// Read at most `nbytes` from the current file position into `out`.
181 /// The number of bytes read is returned.
182 virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
183
184 /// \brief Read data from current file position.
185 ///
186 /// Read at most `nbytes` from the current file position. Less bytes may
187 /// be read if EOF is reached. This method updates the current file position.
188 ///
189 /// In some cases (e.g. a memory-mapped file), this method may avoid a
190 /// memory copy.
191 virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
192
193 /// EXPERIMENTAL: The IOContext associated with this file.
194 ///
195 /// By default, this is the same as default_io_context(), but it may be
196 /// overriden by subclasses.
197 virtual const IOContext& io_context() const;
198 };
199
200 class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
201 protected:
202 OutputStream() = default;
203 };
204
205 class ARROW_EXPORT InputStream : virtual public FileInterface,
206 virtual public Readable,
207 public std::enable_shared_from_this<InputStream> {
208 public:
209 /// \brief Advance or skip stream indicated number of bytes
210 /// \param[in] nbytes the number to move forward
211 /// \return Status
212 Status Advance(int64_t nbytes);
213
214 /// \brief Return zero-copy string_view to upcoming bytes.
215 ///
216 /// Do not modify the stream position. The view becomes invalid after
217 /// any operation on the stream. May trigger buffering if the requested
218 /// size is larger than the number of buffered bytes.
219 ///
220 /// May return NotImplemented on streams that don't support it.
221 ///
222 /// \param[in] nbytes the maximum number of bytes to see
223 virtual Result<util::string_view> Peek(int64_t nbytes);
224
225 /// \brief Return true if InputStream is capable of zero copy Buffer reads
226 ///
227 /// Zero copy reads imply the use of Buffer-returning Read() overloads.
228 virtual bool supports_zero_copy() const;
229
230 /// \brief Read and return stream metadata
231 ///
232 /// If the stream implementation doesn't support metadata, empty metadata
233 /// is returned. Note that it is allowed to return a null pointer rather
234 /// than an allocated empty metadata.
235 virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
236
237 /// \brief Read stream metadata asynchronously
238 virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
239 const IOContext& io_context);
240 Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
241
242 protected:
243 InputStream() = default;
244 };
245
246 class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
247 public:
248 /// Necessary because we hold a std::unique_ptr
249 ~RandomAccessFile() override;
250
251 /// \brief Create an isolated InputStream that reads a segment of a
252 /// RandomAccessFile. Multiple such stream can be created and used
253 /// independently without interference
254 /// \param[in] file a file instance
255 /// \param[in] file_offset the starting position in the file
256 /// \param[in] nbytes the extent of bytes to read. The file should have
257 /// sufficient bytes available
258 static std::shared_ptr<InputStream> GetStream(std::shared_ptr<RandomAccessFile> file,
259 int64_t file_offset, int64_t nbytes);
260
261 /// \brief Return the total file size in bytes.
262 ///
263 /// This method does not read or move the current file position, so is safe
264 /// to call concurrently with e.g. ReadAt().
265 virtual Result<int64_t> GetSize() = 0;
266
267 /// \brief Read data from given file position.
268 ///
269 /// At most `nbytes` bytes are read. The number of bytes read is returned
270 /// (it can be less than `nbytes` if EOF is reached).
271 ///
272 /// This method can be safely called from multiple threads concurrently.
273 /// It is unspecified whether this method updates the file position or not.
274 ///
275 /// The default RandomAccessFile-provided implementation uses Seek() and Read(),
276 /// but subclasses may override it with a more efficient implementation
277 /// that doesn't depend on implicit file positioning.
278 ///
279 /// \param[in] position Where to read bytes from
280 /// \param[in] nbytes The number of bytes to read
281 /// \param[out] out The buffer to read bytes into
282 /// \return The number of bytes read, or an error
283 virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
284
285 /// \brief Read data from given file position.
286 ///
287 /// At most `nbytes` bytes are read, but it can be less if EOF is reached.
288 ///
289 /// \param[in] position Where to read bytes from
290 /// \param[in] nbytes The number of bytes to read
291 /// \return A buffer containing the bytes read, or an error
292 virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
293
294 /// EXPERIMENTAL: Read data asynchronously.
295 virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
296 int64_t nbytes);
297
298 /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
299 Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
300
301 /// EXPERIMENTAL: Inform that the given ranges may be read soon.
302 ///
303 /// Some implementations might arrange to prefetch some of the data.
304 /// However, no guarantee is made and the default implementation does nothing.
305 /// For robust prefetching, use ReadAt() or ReadAsync().
306 virtual Status WillNeed(const std::vector<ReadRange>& ranges);
307
308 protected:
309 RandomAccessFile();
310
311 private:
312 struct ARROW_NO_EXPORT Impl;
313 std::unique_ptr<Impl> interface_impl_;
314 };
315
316 class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
317 public:
318 virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
319
320 protected:
321 WritableFile() = default;
322 };
323
324 class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
325 protected:
326 ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
327 };
328
329 /// \brief Return an iterator on an input stream
330 ///
331 /// The iterator yields a fixed-size block on each Next() call, except the
332 /// last block in the stream which may be smaller.
333 /// Once the end of stream is reached, Next() returns nullptr
334 /// (unlike InputStream::Read() which returns an empty buffer).
335 ARROW_EXPORT
336 Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
337 std::shared_ptr<InputStream> stream, int64_t block_size);
338
339 } // namespace io
340 } // namespace arrow