]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/port/win/io_win.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
17 #include "rocksdb/file_system.h"
18 #include "rocksdb/status.h"
19 #include "util/aligned_buffer.h"
20 #include "util/string_util.h"
22 namespace ROCKSDB_NAMESPACE
{
25 std::string
GetWindowsErrSz(DWORD err
);
27 inline IOStatus
IOErrorFromWindowsError(const std::string
& context
, DWORD err
) {
28 return ((err
== ERROR_HANDLE_DISK_FULL
) || (err
== ERROR_DISK_FULL
))
29 ? IOStatus::NoSpace(context
, GetWindowsErrSz(err
))
30 : ((err
== ERROR_FILE_NOT_FOUND
) || (err
== ERROR_PATH_NOT_FOUND
))
31 ? IOStatus::PathNotFound(context
, GetWindowsErrSz(err
))
32 : IOStatus::IOError(context
, GetWindowsErrSz(err
));
35 inline IOStatus
IOErrorFromLastWindowsError(const std::string
& context
) {
36 return IOErrorFromWindowsError(context
, GetLastError());
39 inline IOStatus
IOError(const std::string
& context
, int err_number
) {
40 return (err_number
== ENOSPC
)
41 ? IOStatus::NoSpace(context
, errnoStr(err_number
).c_str())
42 : (err_number
== ENOENT
)
43 ? IOStatus::PathNotFound(context
, errnoStr(err_number
).c_str())
44 : IOStatus::IOError(context
, errnoStr(err_number
).c_str());
49 IOStatus
pwrite(const WinFileData
* file_data
, const Slice
& data
,
50 uint64_t offset
, size_t& bytes_written
);
52 IOStatus
pread(const WinFileData
* file_data
, char* src
, size_t num_bytes
,
53 uint64_t offset
, size_t& bytes_read
);
55 IOStatus
fallocate(const std::string
& filename
, HANDLE hFile
, uint64_t to_size
);
57 IOStatus
ftruncate(const std::string
& filename
, HANDLE hFile
, uint64_t toSize
);
59 size_t GetUniqueIdFromFile(HANDLE hFile
, char* id
, size_t max_size
);
63 const std::string filename_
;
65 // If true, the I/O issued would be direct I/O which the buffer
66 // will need to be aligned (not sure there is a guarantee that the buffer
67 // passed in is aligned).
68 const bool use_direct_io_
;
69 const size_t sector_size_
;
72 // We want this class be usable both for inheritance (prive
73 // or protected) and for containment so __ctor and __dtor public
74 WinFileData(const std::string
& filename
, HANDLE hFile
, bool direct_io
);
76 virtual ~WinFileData() { this->CloseFile(); }
81 if (hFile_
!= NULL
&& hFile_
!= INVALID_HANDLE_VALUE
) {
82 result
= ::CloseHandle(hFile_
);
89 const std::string
& GetName() const { return filename_
; }
91 HANDLE
GetFileHandle() const { return hFile_
; }
93 bool use_direct_io() const { return use_direct_io_
; }
95 size_t GetSectorSize() const { return sector_size_
; }
97 bool IsSectorAligned(const size_t off
) const;
99 WinFileData(const WinFileData
&) = delete;
100 WinFileData
& operator=(const WinFileData
&) = delete;
103 class WinSequentialFile
: protected WinFileData
, public FSSequentialFile
{
104 // Override for behavior change when creating a custom env
105 virtual IOStatus
PositionedReadInternal(char* src
, size_t numBytes
,
107 size_t& bytes_read
) const;
110 WinSequentialFile(const std::string
& fname
, HANDLE f
,
111 const FileOptions
& options
);
113 ~WinSequentialFile();
115 WinSequentialFile(const WinSequentialFile
&) = delete;
116 WinSequentialFile
& operator=(const WinSequentialFile
&) = delete;
118 IOStatus
Read(size_t n
, const IOOptions
& options
, Slice
* result
,
119 char* scratch
, IODebugContext
* dbg
) override
;
120 IOStatus
PositionedRead(uint64_t offset
, size_t n
, const IOOptions
& options
,
121 Slice
* result
, char* scratch
,
122 IODebugContext
* dbg
) override
;
124 IOStatus
Skip(uint64_t n
) override
;
126 IOStatus
InvalidateCache(size_t offset
, size_t length
) override
;
128 virtual bool use_direct_io() const override
{
129 return WinFileData::use_direct_io();
133 // mmap() based random-access
134 class WinMmapReadableFile
: private WinFileData
, public FSRandomAccessFile
{
137 const void* mapped_region_
;
138 const size_t length_
;
141 // mapped_region_[0,length-1] contains the mmapped contents of the file.
142 WinMmapReadableFile(const std::string
& fileName
, HANDLE hFile
, HANDLE hMap
,
143 const void* mapped_region
, size_t length
);
145 ~WinMmapReadableFile();
147 WinMmapReadableFile(const WinMmapReadableFile
&) = delete;
148 WinMmapReadableFile
& operator=(const WinMmapReadableFile
&) = delete;
150 IOStatus
Read(uint64_t offset
, size_t n
, const IOOptions
& options
,
151 Slice
* result
, char* scratch
,
152 IODebugContext
* dbg
) const override
;
154 virtual IOStatus
InvalidateCache(size_t offset
, size_t length
) override
;
156 virtual size_t GetUniqueId(char* id
, size_t max_size
) const override
;
159 // We preallocate and use memcpy to append new
160 // data to the file. This is safe since we either properly close the
161 // file before reading from it, or for log files, the reading code
162 // knows enough to skip zero suffixes.
163 class WinMmapFile
: private WinFileData
, public FSWritableFile
{
167 const size_t page_size_
; // We flush the mapping view in page_size
168 // increments. We may decide if this is a memory
169 // page size or SSD page size
171 allocation_granularity_
; // View must start at such a granularity
173 size_t reserved_size_
; // Preallocated size
175 size_t mapping_size_
; // The max size of the mapping object
176 // we want to guess the final file size to minimize the remapping
177 size_t view_size_
; // How much memory to map into a view at a time
179 char* mapped_begin_
; // Must begin at the file offset that is aligned with
180 // allocation_granularity_
182 char* dst_
; // Where to write next (in range [mapped_begin_,mapped_end_])
183 char* last_sync_
; // Where have we synced up to
185 uint64_t file_offset_
; // Offset of mapped_begin_ in file
187 // Do we have unsynced writes?
190 // Can only truncate or reserve to a sector size aligned if
191 // used on files that are opened with Unbuffered I/O
192 IOStatus
TruncateFile(uint64_t toSize
);
194 IOStatus
UnmapCurrentRegion();
196 IOStatus
MapNewRegion(const IOOptions
& options
, IODebugContext
* dbg
);
198 virtual IOStatus
PreallocateInternal(uint64_t spaceToReserve
);
201 WinMmapFile(const std::string
& fname
, HANDLE hFile
, size_t page_size
,
202 size_t allocation_granularity
, const FileOptions
& options
);
206 WinMmapFile(const WinMmapFile
&) = delete;
207 WinMmapFile
& operator=(const WinMmapFile
&) = delete;
209 IOStatus
Append(const Slice
& data
, const IOOptions
& options
,
210 IODebugContext
* dbg
) override
;
211 IOStatus
Append(const Slice
& data
, const IOOptions
& opts
,
212 const DataVerificationInfo
& /* verification_info */,
213 IODebugContext
* dbg
) override
{
214 return Append(data
, opts
, dbg
);
217 // Means Close() will properly take care of truncate
218 // and it does not need any additional information
219 IOStatus
Truncate(uint64_t size
, const IOOptions
& options
,
220 IODebugContext
* dbg
) override
;
222 IOStatus
Close(const IOOptions
& options
, IODebugContext
* dbg
) override
;
224 IOStatus
Flush(const IOOptions
& options
, IODebugContext
* dbg
) override
;
227 IOStatus
Sync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
230 * Flush data as well as metadata to stable storage.
232 IOStatus
Fsync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
235 * Get the size of valid data in the file. This will not match the
236 * size that is returned from the filesystem because we use mmap
237 * to extend file by map_size every time.
239 uint64_t GetFileSize(const IOOptions
& options
, IODebugContext
* dbg
) override
;
241 IOStatus
InvalidateCache(size_t offset
, size_t length
) override
;
243 IOStatus
Allocate(uint64_t offset
, uint64_t len
, const IOOptions
& options
,
244 IODebugContext
* dbg
) override
;
246 virtual size_t GetUniqueId(char* id
, size_t max_size
) const override
;
249 class WinRandomAccessImpl
{
251 WinFileData
* file_base_
;
254 // Override for behavior change when creating a custom env
255 virtual IOStatus
PositionedReadInternal(char* src
, size_t numBytes
,
257 size_t& bytes_read
) const;
259 WinRandomAccessImpl(WinFileData
* file_base
, size_t alignment
,
260 const FileOptions
& options
);
262 virtual ~WinRandomAccessImpl() {}
264 IOStatus
ReadImpl(uint64_t offset
, size_t n
, Slice
* result
,
265 char* scratch
) const;
267 size_t GetAlignment() const { return alignment_
; }
270 WinRandomAccessImpl(const WinRandomAccessImpl
&) = delete;
271 WinRandomAccessImpl
& operator=(const WinRandomAccessImpl
&) = delete;
274 // pread() based random-access
275 class WinRandomAccessFile
276 : private WinFileData
,
277 protected WinRandomAccessImpl
, // Want to be able to override
278 // PositionedReadInternal
279 public FSRandomAccessFile
{
281 WinRandomAccessFile(const std::string
& fname
, HANDLE hFile
, size_t alignment
,
282 const FileOptions
& options
);
284 ~WinRandomAccessFile();
286 IOStatus
Read(uint64_t offset
, size_t n
, const IOOptions
& options
,
287 Slice
* result
, char* scratch
,
288 IODebugContext
* dbg
) const override
;
290 virtual size_t GetUniqueId(char* id
, size_t max_size
) const override
;
292 virtual bool use_direct_io() const override
{
293 return WinFileData::use_direct_io();
296 IOStatus
InvalidateCache(size_t offset
, size_t length
) override
;
298 virtual size_t GetRequiredBufferAlignment() const override
;
301 // This is a sequential write class. It has been mimicked (as others) after
302 // the original Posix class. We add support for unbuffered I/O on windows as
304 // we utilize the original buffer as an alignment buffer to write directly to
305 // file with no buffering.
306 // No buffering requires that the provided buffer is aligned to the physical
307 // sector size (SSD page size) and
308 // that all SetFilePointer() operations to occur with such an alignment.
309 // We thus always write in sector/page size increments to the drive and leave
310 // the tail for the next write OR for Close() at which point we pad with zeros.
311 // No padding is required for
313 class WinWritableImpl
{
315 WinFileData
* file_data_
;
316 const uint64_t alignment_
;
318 next_write_offset_
; // Needed because Windows does not support O_APPEND
319 uint64_t reservedsize_
; // how far we have reserved space
321 virtual IOStatus
PreallocateInternal(uint64_t spaceToReserve
);
323 WinWritableImpl(WinFileData
* file_data
, size_t alignment
);
325 ~WinWritableImpl() {}
327 uint64_t GetAlignment() const { return alignment_
; }
329 IOStatus
AppendImpl(const Slice
& data
);
331 // Requires that the data is aligned as specified by
332 // GetRequiredBufferAlignment()
333 IOStatus
PositionedAppendImpl(const Slice
& data
, uint64_t offset
);
335 IOStatus
TruncateImpl(uint64_t size
);
337 IOStatus
CloseImpl();
339 IOStatus
SyncImpl(const IOOptions
& options
, IODebugContext
* dbg
);
341 uint64_t GetFileNextWriteOffset() {
342 // Double accounting now here with WritableFileWriter
343 // and this size will be wrong when unbuffered access is used
344 // but tests implement their own writable files and do not use
345 // WritableFileWrapper
346 // so we need to squeeze a square peg through
347 // a round hole here.
348 return next_write_offset_
;
351 IOStatus
AllocateImpl(uint64_t offset
, uint64_t len
);
354 WinWritableImpl(const WinWritableImpl
&) = delete;
355 WinWritableImpl
& operator=(const WinWritableImpl
&) = delete;
358 class WinWritableFile
: private WinFileData
,
359 protected WinWritableImpl
,
360 public FSWritableFile
{
362 WinWritableFile(const std::string
& fname
, HANDLE hFile
, size_t alignment
,
363 size_t capacity
, const FileOptions
& options
);
367 IOStatus
Append(const Slice
& data
, const IOOptions
& options
,
368 IODebugContext
* dbg
) override
;
369 IOStatus
Append(const Slice
& data
, const IOOptions
& opts
,
370 const DataVerificationInfo
& /* verification_info */,
371 IODebugContext
* dbg
) override
{
372 return Append(data
, opts
, dbg
);
375 // Requires that the data is aligned as specified by
376 // GetRequiredBufferAlignment()
377 IOStatus
PositionedAppend(const Slice
& data
, uint64_t offset
,
378 const IOOptions
& options
,
379 IODebugContext
* dbg
) override
;
380 IOStatus
PositionedAppend(const Slice
& data
, uint64_t offset
,
381 const IOOptions
& opts
,
382 const DataVerificationInfo
& /* verification_info */,
383 IODebugContext
* dbg
) override
{
384 return PositionedAppend(data
, offset
, opts
, dbg
);
387 // Need to implement this so the file is truncated correctly
388 // when buffered and unbuffered mode
389 IOStatus
Truncate(uint64_t size
, const IOOptions
& options
,
390 IODebugContext
* dbg
) override
;
392 IOStatus
Close(const IOOptions
& options
, IODebugContext
* dbg
) override
;
394 // write out the cached data to the OS cache
395 // This is now taken care of the WritableFileWriter
396 IOStatus
Flush(const IOOptions
& options
, IODebugContext
* dbg
) override
;
398 IOStatus
Sync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
400 IOStatus
Fsync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
402 virtual bool IsSyncThreadSafe() const override
;
404 // Indicates if the class makes use of direct I/O
405 // Use PositionedAppend
406 virtual bool use_direct_io() const override
;
408 virtual size_t GetRequiredBufferAlignment() const override
;
410 uint64_t GetFileSize(const IOOptions
& options
, IODebugContext
* dbg
) override
;
412 IOStatus
Allocate(uint64_t offset
, uint64_t len
, const IOOptions
& options
,
413 IODebugContext
* dbg
) override
;
415 virtual size_t GetUniqueId(char* id
, size_t max_size
) const override
;
418 class WinRandomRWFile
: private WinFileData
,
419 protected WinRandomAccessImpl
,
420 protected WinWritableImpl
,
421 public FSRandomRWFile
{
423 WinRandomRWFile(const std::string
& fname
, HANDLE hFile
, size_t alignment
,
424 const FileOptions
& options
);
426 ~WinRandomRWFile() {}
428 // Indicates if the class makes use of direct I/O
429 // If false you must pass aligned buffer to Write()
430 virtual bool use_direct_io() const override
;
432 // Use the returned alignment value to allocate aligned
433 // buffer for Write() when use_direct_io() returns true
434 virtual size_t GetRequiredBufferAlignment() const override
;
436 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
437 // Pass aligned buffer when use_direct_io() returns true.
438 IOStatus
Write(uint64_t offset
, const Slice
& data
, const IOOptions
& options
,
439 IODebugContext
* dbg
) override
;
441 // Read up to `n` bytes starting from offset `offset` and store them in
442 // result, provided `scratch` size should be at least `n`.
443 // Returns Status::OK() on success.
444 IOStatus
Read(uint64_t offset
, size_t n
, const IOOptions
& options
,
445 Slice
* result
, char* scratch
,
446 IODebugContext
* dbg
) const override
;
448 IOStatus
Flush(const IOOptions
& options
, IODebugContext
* dbg
) override
;
450 IOStatus
Sync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
452 IOStatus
Fsync(const IOOptions
& options
, IODebugContext
* dbg
) override
{
453 return Sync(options
, dbg
);
456 IOStatus
Close(const IOOptions
& options
, IODebugContext
* dbg
) override
;
459 class WinMemoryMappedBuffer
: public MemoryMappedFileBuffer
{
465 WinMemoryMappedBuffer(HANDLE file_handle
, HANDLE map_handle
, void* base
,
467 : MemoryMappedFileBuffer(base
, size
),
468 file_handle_(file_handle
),
469 map_handle_(map_handle
) {}
470 ~WinMemoryMappedBuffer() override
;
473 class WinDirectory
: public FSDirectory
{
474 const std::string filename_
;
478 explicit WinDirectory(const std::string
& filename
, HANDLE h
) noexcept
479 : filename_(filename
), handle_(h
) {
480 assert(handle_
!= INVALID_HANDLE_VALUE
);
483 if (handle_
!= NULL
) {
484 IOStatus s
= WinDirectory::Close(IOOptions(), nullptr);
485 s
.PermitUncheckedError();
488 const std::string
& GetName() const { return filename_
; }
489 IOStatus
Fsync(const IOOptions
& options
, IODebugContext
* dbg
) override
;
490 IOStatus
Close(const IOOptions
& options
, IODebugContext
* dbg
) override
;
492 size_t GetUniqueId(char* id
, size_t max_size
) const override
;
495 class WinFileLock
: public FileLock
{
497 explicit WinFileLock(HANDLE hFile
) : hFile_(hFile
) {
498 assert(hFile
!= NULL
);
499 assert(hFile
!= INVALID_HANDLE_VALUE
);
508 } // namespace ROCKSDB_NAMESPACE