]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/port/win/io_win.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / port / win / io_win.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10
11 #include <stdint.h>
12 #include <windows.h>
13
14 #include <mutex>
15 #include <string>
16
17 #include "rocksdb/file_system.h"
18 #include "rocksdb/status.h"
19 #include "util/aligned_buffer.h"
20 #include "util/string_util.h"
21
22 namespace ROCKSDB_NAMESPACE {
23 namespace port {
24
25 std::string GetWindowsErrSz(DWORD err);
26
27 inline IOStatus IOErrorFromWindowsError(const std::string& context, DWORD err) {
28 return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
29 ? IOStatus::NoSpace(context, GetWindowsErrSz(err))
30 : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
31 ? IOStatus::PathNotFound(context, GetWindowsErrSz(err))
32 : IOStatus::IOError(context, GetWindowsErrSz(err));
33 }
34
35 inline IOStatus IOErrorFromLastWindowsError(const std::string& context) {
36 return IOErrorFromWindowsError(context, GetLastError());
37 }
38
39 inline IOStatus IOError(const std::string& context, int err_number) {
40 return (err_number == ENOSPC)
41 ? IOStatus::NoSpace(context, errnoStr(err_number).c_str())
42 : (err_number == ENOENT)
43 ? IOStatus::PathNotFound(context, errnoStr(err_number).c_str())
44 : IOStatus::IOError(context, errnoStr(err_number).c_str());
45 }
46
47 class WinFileData;
48
49 IOStatus pwrite(const WinFileData* file_data, const Slice& data,
50 uint64_t offset, size_t& bytes_written);
51
52 IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
53 uint64_t offset, size_t& bytes_read);
54
55 IOStatus fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
56
57 IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
58
59 size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
60
61 class WinFileData {
62 protected:
63 const std::string filename_;
64 HANDLE hFile_;
65 // If true, the I/O issued would be direct I/O which the buffer
66 // will need to be aligned (not sure there is a guarantee that the buffer
67 // passed in is aligned).
68 const bool use_direct_io_;
69 const size_t sector_size_;
70
71 public:
72 // We want this class be usable both for inheritance (prive
73 // or protected) and for containment so __ctor and __dtor public
74 WinFileData(const std::string& filename, HANDLE hFile, bool direct_io);
75
76 virtual ~WinFileData() { this->CloseFile(); }
77
78 bool CloseFile() {
79 bool result = true;
80
81 if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
82 result = ::CloseHandle(hFile_);
83 assert(result);
84 hFile_ = NULL;
85 }
86 return result;
87 }
88
89 const std::string& GetName() const { return filename_; }
90
91 HANDLE GetFileHandle() const { return hFile_; }
92
93 bool use_direct_io() const { return use_direct_io_; }
94
95 size_t GetSectorSize() const { return sector_size_; }
96
97 bool IsSectorAligned(const size_t off) const;
98
99 WinFileData(const WinFileData&) = delete;
100 WinFileData& operator=(const WinFileData&) = delete;
101 };
102
103 class WinSequentialFile : protected WinFileData, public FSSequentialFile {
104 // Override for behavior change when creating a custom env
105 virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
106 uint64_t offset,
107 size_t& bytes_read) const;
108
109 public:
110 WinSequentialFile(const std::string& fname, HANDLE f,
111 const FileOptions& options);
112
113 ~WinSequentialFile();
114
115 WinSequentialFile(const WinSequentialFile&) = delete;
116 WinSequentialFile& operator=(const WinSequentialFile&) = delete;
117
118 IOStatus Read(size_t n, const IOOptions& options, Slice* result,
119 char* scratch, IODebugContext* dbg) override;
120 IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
121 Slice* result, char* scratch,
122 IODebugContext* dbg) override;
123
124 IOStatus Skip(uint64_t n) override;
125
126 IOStatus InvalidateCache(size_t offset, size_t length) override;
127
128 virtual bool use_direct_io() const override {
129 return WinFileData::use_direct_io();
130 }
131 };
132
133 // mmap() based random-access
134 class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
135 HANDLE hMap_;
136
137 const void* mapped_region_;
138 const size_t length_;
139
140 public:
141 // mapped_region_[0,length-1] contains the mmapped contents of the file.
142 WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
143 const void* mapped_region, size_t length);
144
145 ~WinMmapReadableFile();
146
147 WinMmapReadableFile(const WinMmapReadableFile&) = delete;
148 WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
149
150 IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
151 Slice* result, char* scratch,
152 IODebugContext* dbg) const override;
153
154 virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
155
156 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
157 };
158
159 // We preallocate and use memcpy to append new
160 // data to the file. This is safe since we either properly close the
161 // file before reading from it, or for log files, the reading code
162 // knows enough to skip zero suffixes.
163 class WinMmapFile : private WinFileData, public FSWritableFile {
164 private:
165 HANDLE hMap_;
166
167 const size_t page_size_; // We flush the mapping view in page_size
168 // increments. We may decide if this is a memory
169 // page size or SSD page size
170 const size_t
171 allocation_granularity_; // View must start at such a granularity
172
173 size_t reserved_size_; // Preallocated size
174
175 size_t mapping_size_; // The max size of the mapping object
176 // we want to guess the final file size to minimize the remapping
177 size_t view_size_; // How much memory to map into a view at a time
178
179 char* mapped_begin_; // Must begin at the file offset that is aligned with
180 // allocation_granularity_
181 char* mapped_end_;
182 char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
183 char* last_sync_; // Where have we synced up to
184
185 uint64_t file_offset_; // Offset of mapped_begin_ in file
186
187 // Do we have unsynced writes?
188 bool pending_sync_;
189
190 // Can only truncate or reserve to a sector size aligned if
191 // used on files that are opened with Unbuffered I/O
192 IOStatus TruncateFile(uint64_t toSize);
193
194 IOStatus UnmapCurrentRegion();
195
196 IOStatus MapNewRegion(const IOOptions& options, IODebugContext* dbg);
197
198 virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
199
200 public:
201 WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
202 size_t allocation_granularity, const FileOptions& options);
203
204 ~WinMmapFile();
205
206 WinMmapFile(const WinMmapFile&) = delete;
207 WinMmapFile& operator=(const WinMmapFile&) = delete;
208
209 IOStatus Append(const Slice& data, const IOOptions& options,
210 IODebugContext* dbg) override;
211 IOStatus Append(const Slice& data, const IOOptions& opts,
212 const DataVerificationInfo& /* verification_info */,
213 IODebugContext* dbg) override {
214 return Append(data, opts, dbg);
215 }
216
217 // Means Close() will properly take care of truncate
218 // and it does not need any additional information
219 IOStatus Truncate(uint64_t size, const IOOptions& options,
220 IODebugContext* dbg) override;
221
222 IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
223
224 IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
225
226 // Flush only data
227 IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
228
229 /**
230 * Flush data as well as metadata to stable storage.
231 */
232 IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
233
234 /**
235 * Get the size of valid data in the file. This will not match the
236 * size that is returned from the filesystem because we use mmap
237 * to extend file by map_size every time.
238 */
239 uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
240
241 IOStatus InvalidateCache(size_t offset, size_t length) override;
242
243 IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
244 IODebugContext* dbg) override;
245
246 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
247 };
248
249 class WinRandomAccessImpl {
250 protected:
251 WinFileData* file_base_;
252 size_t alignment_;
253
254 // Override for behavior change when creating a custom env
255 virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
256 uint64_t offset,
257 size_t& bytes_read) const;
258
259 WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
260 const FileOptions& options);
261
262 virtual ~WinRandomAccessImpl() {}
263
264 IOStatus ReadImpl(uint64_t offset, size_t n, Slice* result,
265 char* scratch) const;
266
267 size_t GetAlignment() const { return alignment_; }
268
269 public:
270 WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
271 WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
272 };
273
274 // pread() based random-access
275 class WinRandomAccessFile
276 : private WinFileData,
277 protected WinRandomAccessImpl, // Want to be able to override
278 // PositionedReadInternal
279 public FSRandomAccessFile {
280 public:
281 WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
282 const FileOptions& options);
283
284 ~WinRandomAccessFile();
285
286 IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
287 Slice* result, char* scratch,
288 IODebugContext* dbg) const override;
289
290 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
291
292 virtual bool use_direct_io() const override {
293 return WinFileData::use_direct_io();
294 }
295
296 IOStatus InvalidateCache(size_t offset, size_t length) override;
297
298 virtual size_t GetRequiredBufferAlignment() const override;
299 };
300
301 // This is a sequential write class. It has been mimicked (as others) after
302 // the original Posix class. We add support for unbuffered I/O on windows as
303 // well
304 // we utilize the original buffer as an alignment buffer to write directly to
305 // file with no buffering.
306 // No buffering requires that the provided buffer is aligned to the physical
307 // sector size (SSD page size) and
308 // that all SetFilePointer() operations to occur with such an alignment.
309 // We thus always write in sector/page size increments to the drive and leave
310 // the tail for the next write OR for Close() at which point we pad with zeros.
311 // No padding is required for
312 // buffered access.
313 class WinWritableImpl {
314 protected:
315 WinFileData* file_data_;
316 const uint64_t alignment_;
317 uint64_t
318 next_write_offset_; // Needed because Windows does not support O_APPEND
319 uint64_t reservedsize_; // how far we have reserved space
320
321 virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
322
323 WinWritableImpl(WinFileData* file_data, size_t alignment);
324
325 ~WinWritableImpl() {}
326
327 uint64_t GetAlignment() const { return alignment_; }
328
329 IOStatus AppendImpl(const Slice& data);
330
331 // Requires that the data is aligned as specified by
332 // GetRequiredBufferAlignment()
333 IOStatus PositionedAppendImpl(const Slice& data, uint64_t offset);
334
335 IOStatus TruncateImpl(uint64_t size);
336
337 IOStatus CloseImpl();
338
339 IOStatus SyncImpl(const IOOptions& options, IODebugContext* dbg);
340
341 uint64_t GetFileNextWriteOffset() {
342 // Double accounting now here with WritableFileWriter
343 // and this size will be wrong when unbuffered access is used
344 // but tests implement their own writable files and do not use
345 // WritableFileWrapper
346 // so we need to squeeze a square peg through
347 // a round hole here.
348 return next_write_offset_;
349 }
350
351 IOStatus AllocateImpl(uint64_t offset, uint64_t len);
352
353 public:
354 WinWritableImpl(const WinWritableImpl&) = delete;
355 WinWritableImpl& operator=(const WinWritableImpl&) = delete;
356 };
357
358 class WinWritableFile : private WinFileData,
359 protected WinWritableImpl,
360 public FSWritableFile {
361 public:
362 WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
363 size_t capacity, const FileOptions& options);
364
365 ~WinWritableFile();
366
367 IOStatus Append(const Slice& data, const IOOptions& options,
368 IODebugContext* dbg) override;
369 IOStatus Append(const Slice& data, const IOOptions& opts,
370 const DataVerificationInfo& /* verification_info */,
371 IODebugContext* dbg) override {
372 return Append(data, opts, dbg);
373 }
374
375 // Requires that the data is aligned as specified by
376 // GetRequiredBufferAlignment()
377 IOStatus PositionedAppend(const Slice& data, uint64_t offset,
378 const IOOptions& options,
379 IODebugContext* dbg) override;
380 IOStatus PositionedAppend(const Slice& data, uint64_t offset,
381 const IOOptions& opts,
382 const DataVerificationInfo& /* verification_info */,
383 IODebugContext* dbg) override {
384 return PositionedAppend(data, offset, opts, dbg);
385 }
386
387 // Need to implement this so the file is truncated correctly
388 // when buffered and unbuffered mode
389 IOStatus Truncate(uint64_t size, const IOOptions& options,
390 IODebugContext* dbg) override;
391
392 IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
393
394 // write out the cached data to the OS cache
395 // This is now taken care of the WritableFileWriter
396 IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
397
398 IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
399
400 IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
401
402 virtual bool IsSyncThreadSafe() const override;
403
404 // Indicates if the class makes use of direct I/O
405 // Use PositionedAppend
406 virtual bool use_direct_io() const override;
407
408 virtual size_t GetRequiredBufferAlignment() const override;
409
410 uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
411
412 IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
413 IODebugContext* dbg) override;
414
415 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
416 };
417
418 class WinRandomRWFile : private WinFileData,
419 protected WinRandomAccessImpl,
420 protected WinWritableImpl,
421 public FSRandomRWFile {
422 public:
423 WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
424 const FileOptions& options);
425
426 ~WinRandomRWFile() {}
427
428 // Indicates if the class makes use of direct I/O
429 // If false you must pass aligned buffer to Write()
430 virtual bool use_direct_io() const override;
431
432 // Use the returned alignment value to allocate aligned
433 // buffer for Write() when use_direct_io() returns true
434 virtual size_t GetRequiredBufferAlignment() const override;
435
436 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
437 // Pass aligned buffer when use_direct_io() returns true.
438 IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
439 IODebugContext* dbg) override;
440
441 // Read up to `n` bytes starting from offset `offset` and store them in
442 // result, provided `scratch` size should be at least `n`.
443 // Returns Status::OK() on success.
444 IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
445 Slice* result, char* scratch,
446 IODebugContext* dbg) const override;
447
448 IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
449
450 IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
451
452 IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
453 return Sync(options, dbg);
454 }
455
456 IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
457 };
458
459 class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
460 private:
461 HANDLE file_handle_;
462 HANDLE map_handle_;
463
464 public:
465 WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base,
466 size_t size)
467 : MemoryMappedFileBuffer(base, size),
468 file_handle_(file_handle),
469 map_handle_(map_handle) {}
470 ~WinMemoryMappedBuffer() override;
471 };
472
473 class WinDirectory : public FSDirectory {
474 const std::string filename_;
475 HANDLE handle_;
476
477 public:
478 explicit WinDirectory(const std::string& filename, HANDLE h) noexcept
479 : filename_(filename), handle_(h) {
480 assert(handle_ != INVALID_HANDLE_VALUE);
481 }
482 ~WinDirectory() {
483 if (handle_ != NULL) {
484 IOStatus s = WinDirectory::Close(IOOptions(), nullptr);
485 s.PermitUncheckedError();
486 }
487 }
488 const std::string& GetName() const { return filename_; }
489 IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
490 IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
491
492 size_t GetUniqueId(char* id, size_t max_size) const override;
493 };
494
495 class WinFileLock : public FileLock {
496 public:
497 explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
498 assert(hFile != NULL);
499 assert(hFile != INVALID_HANDLE_VALUE);
500 }
501
502 ~WinFileLock();
503
504 private:
505 HANDLE hFile_;
506 };
507 } // namespace port
508 } // namespace ROCKSDB_NAMESPACE