]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/port/win/io_win.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / port / win / io_win.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9#pragma once
10
11#include <stdint.h>
12#include <mutex>
13#include <string>
14
11fdf7f2 15#include "rocksdb/status.h"
7c673cae
FG
16#include "rocksdb/env.h"
17#include "util/aligned_buffer.h"
18
11fdf7f2 19#include <windows.h>
7c673cae
FG
20
21
22namespace rocksdb {
23namespace port {
24
25std::string GetWindowsErrSz(DWORD err);
26
27inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
28 return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
29 ? Status::NoSpace(context, GetWindowsErrSz(err))
494da23a
TL
30 : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
31 ? Status::PathNotFound(context, GetWindowsErrSz(err))
32 : Status::IOError(context, GetWindowsErrSz(err));
7c673cae
FG
33}
34
35inline Status IOErrorFromLastWindowsError(const std::string& context) {
36 return IOErrorFromWindowsError(context, GetLastError());
37}
38
39inline Status IOError(const std::string& context, int err_number) {
40 return (err_number == ENOSPC)
41 ? Status::NoSpace(context, strerror(err_number))
494da23a
TL
42 : (err_number == ENOENT)
43 ? Status::PathNotFound(context, strerror(err_number))
44 : Status::IOError(context, strerror(err_number));
7c673cae
FG
45}
46
11fdf7f2 47class WinFileData;
7c673cae 48
11fdf7f2
TL
49Status pwrite(const WinFileData* file_data, const Slice& data,
50 uint64_t offset, size_t& bytes_written);
7c673cae 51
11fdf7f2
TL
52Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
53 uint64_t offset, size_t& bytes_read);
7c673cae
FG
54
55Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
56
57Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
58
59size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
60
61class WinFileData {
62 protected:
63 const std::string filename_;
64 HANDLE hFile_;
494da23a 65 // If true, the I/O issued would be direct I/O which the buffer
7c673cae
FG
66 // will need to be aligned (not sure there is a guarantee that the buffer
67 // passed in is aligned).
68 const bool use_direct_io_;
69
70 public:
71 // We want this class be usable both for inheritance (prive
72 // or protected) and for containment so __ctor and __dtor public
11fdf7f2
TL
73 WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
74 : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
7c673cae
FG
75
76 virtual ~WinFileData() { this->CloseFile(); }
77
78 bool CloseFile() {
79 bool result = true;
80
81 if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
82 result = ::CloseHandle(hFile_);
83 assert(result);
84 hFile_ = NULL;
85 }
86 return result;
87 }
88
89 const std::string& GetName() const { return filename_; }
90
91 HANDLE GetFileHandle() const { return hFile_; }
92
93 bool use_direct_io() const { return use_direct_io_; }
94
95 WinFileData(const WinFileData&) = delete;
96 WinFileData& operator=(const WinFileData&) = delete;
97};
98
99class WinSequentialFile : protected WinFileData, public SequentialFile {
100
101 // Override for behavior change when creating a custom env
11fdf7f2
TL
102 virtual Status PositionedReadInternal(char* src, size_t numBytes,
103 uint64_t offset, size_t& bytes_read) const;
7c673cae
FG
104
105public:
106 WinSequentialFile(const std::string& fname, HANDLE f,
107 const EnvOptions& options);
108
109 ~WinSequentialFile();
110
111 WinSequentialFile(const WinSequentialFile&) = delete;
112 WinSequentialFile& operator=(const WinSequentialFile&) = delete;
113
114 virtual Status Read(size_t n, Slice* result, char* scratch) override;
115 virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
116 char* scratch) override;
117
118 virtual Status Skip(uint64_t n) override;
119
120 virtual Status InvalidateCache(size_t offset, size_t length) override;
121
122 virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
123};
124
125// mmap() based random-access
126class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
127 HANDLE hMap_;
128
129 const void* mapped_region_;
130 const size_t length_;
131
132 public:
133 // mapped_region_[0,length-1] contains the mmapped contents of the file.
134 WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
135 const void* mapped_region, size_t length);
136
137 ~WinMmapReadableFile();
138
139 WinMmapReadableFile(const WinMmapReadableFile&) = delete;
140 WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
141
142 virtual Status Read(uint64_t offset, size_t n, Slice* result,
143 char* scratch) const override;
144
145 virtual Status InvalidateCache(size_t offset, size_t length) override;
146
147 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
148};
149
150// We preallocate and use memcpy to append new
151// data to the file. This is safe since we either properly close the
152// file before reading from it, or for log files, the reading code
153// knows enough to skip zero suffixes.
154class WinMmapFile : private WinFileData, public WritableFile {
155 private:
156 HANDLE hMap_;
157
158 const size_t page_size_; // We flush the mapping view in page_size
159 // increments. We may decide if this is a memory
160 // page size or SSD page size
161 const size_t
162 allocation_granularity_; // View must start at such a granularity
163
164 size_t reserved_size_; // Preallocated size
165
166 size_t mapping_size_; // The max size of the mapping object
167 // we want to guess the final file size to minimize the remapping
168 size_t view_size_; // How much memory to map into a view at a time
169
170 char* mapped_begin_; // Must begin at the file offset that is aligned with
171 // allocation_granularity_
172 char* mapped_end_;
173 char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
174 char* last_sync_; // Where have we synced up to
175
176 uint64_t file_offset_; // Offset of mapped_begin_ in file
177
178 // Do we have unsynced writes?
179 bool pending_sync_;
180
181 // Can only truncate or reserve to a sector size aligned if
182 // used on files that are opened with Unbuffered I/O
183 Status TruncateFile(uint64_t toSize);
184
185 Status UnmapCurrentRegion();
186
187 Status MapNewRegion();
188
189 virtual Status PreallocateInternal(uint64_t spaceToReserve);
190
191 public:
192 WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
193 size_t allocation_granularity, const EnvOptions& options);
194
195 ~WinMmapFile();
196
197 WinMmapFile(const WinMmapFile&) = delete;
198 WinMmapFile& operator=(const WinMmapFile&) = delete;
199
200 virtual Status Append(const Slice& data) override;
201
202 // Means Close() will properly take care of truncate
203 // and it does not need any additional information
204 virtual Status Truncate(uint64_t size) override;
205
206 virtual Status Close() override;
207
208 virtual Status Flush() override;
209
210 // Flush only data
211 virtual Status Sync() override;
212
213 /**
214 * Flush data as well as metadata to stable storage.
215 */
216 virtual Status Fsync() override;
217
218 /**
219 * Get the size of valid data in the file. This will not match the
220 * size that is returned from the filesystem because we use mmap
221 * to extend file by map_size every time.
222 */
223 virtual uint64_t GetFileSize() override;
224
225 virtual Status InvalidateCache(size_t offset, size_t length) override;
226
227 virtual Status Allocate(uint64_t offset, uint64_t len) override;
228
229 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
230};
231
232class WinRandomAccessImpl {
233 protected:
234 WinFileData* file_base_;
11fdf7f2 235 size_t alignment_;
7c673cae
FG
236
237 // Override for behavior change when creating a custom env
11fdf7f2
TL
238 virtual Status PositionedReadInternal(char* src, size_t numBytes,
239 uint64_t offset, size_t& bytes_read) const;
7c673cae
FG
240
241 WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
242 const EnvOptions& options);
243
244 virtual ~WinRandomAccessImpl() {}
245
246 Status ReadImpl(uint64_t offset, size_t n, Slice* result,
247 char* scratch) const;
248
11fdf7f2 249 size_t GetAlignment() const { return alignment_; }
7c673cae
FG
250
251 public:
252
253 WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
254 WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
255};
256
257// pread() based random-access
258class WinRandomAccessFile
259 : private WinFileData,
260 protected WinRandomAccessImpl, // Want to be able to override
261 // PositionedReadInternal
262 public RandomAccessFile {
263 public:
264 WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
265 const EnvOptions& options);
266
267 ~WinRandomAccessFile();
268
269 virtual Status Read(uint64_t offset, size_t n, Slice* result,
270 char* scratch) const override;
271
7c673cae
FG
272 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
273
7c673cae
FG
274 virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
275
276 virtual Status InvalidateCache(size_t offset, size_t length) override;
277
278 virtual size_t GetRequiredBufferAlignment() const override;
279};
280
281// This is a sequential write class. It has been mimicked (as others) after
282// the original Posix class. We add support for unbuffered I/O on windows as
283// well
284// we utilize the original buffer as an alignment buffer to write directly to
285// file with no buffering.
286// No buffering requires that the provided buffer is aligned to the physical
287// sector size (SSD page size) and
288// that all SetFilePointer() operations to occur with such an alignment.
289// We thus always write in sector/page size increments to the drive and leave
290// the tail for the next write OR for Close() at which point we pad with zeros.
291// No padding is required for
292// buffered access.
293class WinWritableImpl {
294 protected:
295 WinFileData* file_data_;
296 const uint64_t alignment_;
11fdf7f2 297 uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
7c673cae
FG
298 uint64_t reservedsize_; // how far we have reserved space
299
300 virtual Status PreallocateInternal(uint64_t spaceToReserve);
301
302 WinWritableImpl(WinFileData* file_data, size_t alignment);
303
304 ~WinWritableImpl() {}
305
306 uint64_t GetAlignement() const { return alignment_; }
307
308 Status AppendImpl(const Slice& data);
309
310 // Requires that the data is aligned as specified by
311 // GetRequiredBufferAlignment()
312 Status PositionedAppendImpl(const Slice& data, uint64_t offset);
313
314 Status TruncateImpl(uint64_t size);
315
316 Status CloseImpl();
317
318 Status SyncImpl();
319
11fdf7f2 320 uint64_t GetFileNextWriteOffset() {
7c673cae
FG
321 // Double accounting now here with WritableFileWriter
322 // and this size will be wrong when unbuffered access is used
323 // but tests implement their own writable files and do not use
324 // WritableFileWrapper
325 // so we need to squeeze a square peg through
326 // a round hole here.
11fdf7f2 327 return next_write_offset_;
7c673cae
FG
328 }
329
330 Status AllocateImpl(uint64_t offset, uint64_t len);
331
332 public:
333 WinWritableImpl(const WinWritableImpl&) = delete;
334 WinWritableImpl& operator=(const WinWritableImpl&) = delete;
335};
336
337class WinWritableFile : private WinFileData,
338 protected WinWritableImpl,
339 public WritableFile {
340 public:
341 WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
342 size_t capacity, const EnvOptions& options);
343
344 ~WinWritableFile();
345
346 virtual Status Append(const Slice& data) override;
347
348 // Requires that the data is aligned as specified by
349 // GetRequiredBufferAlignment()
350 virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
351
352 // Need to implement this so the file is truncated correctly
353 // when buffered and unbuffered mode
354 virtual Status Truncate(uint64_t size) override;
355
356 virtual Status Close() override;
357
358 // write out the cached data to the OS cache
359 // This is now taken care of the WritableFileWriter
360 virtual Status Flush() override;
361
362 virtual Status Sync() override;
363
364 virtual Status Fsync() override;
365
11fdf7f2
TL
366 virtual bool IsSyncThreadSafe() const override;
367
7c673cae
FG
368 // Indicates if the class makes use of direct I/O
369 // Use PositionedAppend
370 virtual bool use_direct_io() const override;
371
372 virtual size_t GetRequiredBufferAlignment() const override;
373
374 virtual uint64_t GetFileSize() override;
375
376 virtual Status Allocate(uint64_t offset, uint64_t len) override;
377
378 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
379};
380
381class WinRandomRWFile : private WinFileData,
382 protected WinRandomAccessImpl,
383 protected WinWritableImpl,
384 public RandomRWFile {
385 public:
386 WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
387 const EnvOptions& options);
388
389 ~WinRandomRWFile() {}
390
391 // Indicates if the class makes use of direct I/O
392 // If false you must pass aligned buffer to Write()
393 virtual bool use_direct_io() const override;
394
395 // Use the returned alignment value to allocate aligned
396 // buffer for Write() when use_direct_io() returns true
397 virtual size_t GetRequiredBufferAlignment() const override;
398
7c673cae
FG
399 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
400 // Pass aligned buffer when use_direct_io() returns true.
401 virtual Status Write(uint64_t offset, const Slice& data) override;
402
403 // Read up to `n` bytes starting from offset `offset` and store them in
404 // result, provided `scratch` size should be at least `n`.
405 // Returns Status::OK() on success.
406 virtual Status Read(uint64_t offset, size_t n, Slice* result,
407 char* scratch) const override;
408
409 virtual Status Flush() override;
410
411 virtual Status Sync() override;
412
413 virtual Status Fsync() { return Sync(); }
414
415 virtual Status Close() override;
416};
417
11fdf7f2
TL
418class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
419private:
420 HANDLE file_handle_;
421 HANDLE map_handle_;
422public:
423 WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) :
424 MemoryMappedFileBuffer(base, size),
425 file_handle_(file_handle),
426 map_handle_(map_handle) {}
427 ~WinMemoryMappedBuffer() override;
428};
429
7c673cae 430class WinDirectory : public Directory {
11fdf7f2 431 HANDLE handle_;
7c673cae 432 public:
494da23a 433 explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
11fdf7f2
TL
434 assert(handle_ != INVALID_HANDLE_VALUE);
435 }
436 ~WinDirectory() {
437 ::CloseHandle(handle_);
438 }
7c673cae 439 virtual Status Fsync() override;
11fdf7f2
TL
440
441 size_t GetUniqueId(char* id, size_t max_size) const override;
7c673cae
FG
442};
443
444class WinFileLock : public FileLock {
445 public:
446 explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
447 assert(hFile != NULL);
448 assert(hFile != INVALID_HANDLE_VALUE);
449 }
450
451 ~WinFileLock();
452
453 private:
454 HANDLE hFile_;
455};
456}
457}