1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
12 #include "port/win/io_win.h"
15 #include "monitoring/iostats_context_imp.h"
16 #include "test_util/sync_point.h"
17 #include "util/aligned_buffer.h"
18 #include "util/coding.h"
20 namespace ROCKSDB_NAMESPACE
{
28 const size_t kSectorSize
= 512;
30 inline bool IsPowerOfTwo(const size_t alignment
) {
31 return ((alignment
) & (alignment
- 1)) == 0;
34 inline bool IsAligned(size_t alignment
, const void* ptr
) {
35 return ((uintptr_t(ptr
)) & (alignment
- 1)) == 0;
39 std::string
GetWindowsErrSz(DWORD err
) {
41 LPSTR lpMsgBuf
= nullptr;
42 FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER
| FORMAT_MESSAGE_FROM_SYSTEM
|
43 FORMAT_MESSAGE_IGNORE_INSERTS
,
45 0, // Default language
46 reinterpret_cast<LPSTR
>(&lpMsgBuf
), 0, NULL
);
55 // We preserve the original name of this interface to denote the original idea
57 // All reads happen by a specified offset and pwrite interface does not change
58 // the position of the file pointer. Judging from the man page and errno it does
60 // lseek atomically to return the position of the file back where it was.
61 // WriteFile() does not
62 // have this capability. Therefore, for both pread and pwrite the pointer is
63 // advanced to the next position
64 // which is fine for writes because they are (should be) sequential.
65 // Because all the reads/writes happen by the specified offset, the caller in
67 // rely on the current file offset.
68 IOStatus
pwrite(const WinFileData
* file_data
, const Slice
& data
,
69 uint64_t offset
, size_t& bytes_written
) {
73 size_t num_bytes
= data
.size();
74 if (num_bytes
> std::numeric_limits
<DWORD
>::max()) {
75 // May happen in 64-bit builds where size_t is 64-bits but
76 // long is still 32-bit, but that's the API here at the moment
77 return IOStatus::InvalidArgument(
78 "num_bytes is too large for a single write: " + file_data
->GetName());
81 OVERLAPPED overlapped
= {0};
82 ULARGE_INTEGER offsetUnion
;
83 offsetUnion
.QuadPart
= offset
;
85 overlapped
.Offset
= offsetUnion
.LowPart
;
86 overlapped
.OffsetHigh
= offsetUnion
.HighPart
;
88 DWORD bytesWritten
= 0;
90 if (FALSE
== WriteFile(file_data
->GetFileHandle(), data
.data(),
91 static_cast<DWORD
>(num_bytes
), &bytesWritten
,
93 auto lastError
= GetLastError();
94 s
= IOErrorFromWindowsError("WriteFile failed: " + file_data
->GetName(),
97 bytes_written
= bytesWritten
;
103 // See comments for pwrite above
104 IOStatus
pread(const WinFileData
* file_data
, char* src
, size_t num_bytes
,
105 uint64_t offset
, size_t& bytes_read
) {
109 if (num_bytes
> std::numeric_limits
<DWORD
>::max()) {
110 return IOStatus::InvalidArgument(
111 "num_bytes is too large for a single read: " + file_data
->GetName());
114 OVERLAPPED overlapped
= {0};
115 ULARGE_INTEGER offsetUnion
;
116 offsetUnion
.QuadPart
= offset
;
118 overlapped
.Offset
= offsetUnion
.LowPart
;
119 overlapped
.OffsetHigh
= offsetUnion
.HighPart
;
123 if (FALSE
== ReadFile(file_data
->GetFileHandle(), src
,
124 static_cast<DWORD
>(num_bytes
), &bytesRead
,
126 auto lastError
= GetLastError();
127 // EOF is OK with zero bytes read
128 if (lastError
!= ERROR_HANDLE_EOF
) {
129 s
= IOErrorFromWindowsError("ReadFile failed: " + file_data
->GetName(),
133 bytes_read
= bytesRead
;
139 // SetFileInformationByHandle() is capable of fast pre-allocates.
140 // However, this does not change the file end position unless the file is
141 // truncated and the pre-allocated space is not considered filled with zeros.
142 IOStatus
fallocate(const std::string
& filename
, HANDLE hFile
,
146 FILE_ALLOCATION_INFO alloc_info
;
147 alloc_info
.AllocationSize
.QuadPart
= to_size
;
149 if (!SetFileInformationByHandle(hFile
, FileAllocationInfo
, &alloc_info
,
150 sizeof(FILE_ALLOCATION_INFO
))) {
151 auto lastError
= GetLastError();
152 status
= IOErrorFromWindowsError(
153 "Failed to pre-allocate space: " + filename
, lastError
);
159 IOStatus
ftruncate(const std::string
& filename
, HANDLE hFile
, uint64_t toSize
) {
162 FILE_END_OF_FILE_INFO end_of_file
;
163 end_of_file
.EndOfFile
.QuadPart
= toSize
;
165 if (!SetFileInformationByHandle(hFile
, FileEndOfFileInfo
, &end_of_file
,
166 sizeof(FILE_END_OF_FILE_INFO
))) {
167 auto lastError
= GetLastError();
168 status
= IOErrorFromWindowsError("Failed to Set end of file: " + filename
,
175 size_t GetUniqueIdFromFile(HANDLE
/*hFile*/, char* /*id*/,
176 size_t /*max_size*/) {
177 // Returning 0 is safe as it causes the table reader to generate a unique ID.
178 // This is suboptimal for performance as it prevents multiple table readers
179 // for the same file from sharing cached blocks. For example, if users have
180 // a low value for `max_open_files`, there can be many table readers opened
181 // for the same file.
183 // TODO: this is a temporarily solution as it is safe but not optimal for
184 // performance. For more details see discussion in
185 // https://github.com/facebook/rocksdb/pull/5844.
189 WinFileData::WinFileData(const std::string
& filename
, HANDLE hFile
,
191 : filename_(filename
),
193 use_direct_io_(direct_io
),
194 sector_size_(WinFileSystem::GetSectorSize(filename
)) {}
196 bool WinFileData::IsSectorAligned(const size_t off
) const {
197 return (off
& (sector_size_
- 1)) == 0;
200 ////////////////////////////////////////////////////////////////////////////////////////////////////
201 // WinMmapReadableFile
203 WinMmapReadableFile::WinMmapReadableFile(const std::string
& fileName
,
204 HANDLE hFile
, HANDLE hMap
,
205 const void* mapped_region
,
207 : WinFileData(fileName
, hFile
, false /* use_direct_io */),
209 mapped_region_(mapped_region
),
212 WinMmapReadableFile::~WinMmapReadableFile() {
213 BOOL ret
__attribute__((__unused__
));
214 ret
= ::UnmapViewOfFile(mapped_region_
);
217 ret
= ::CloseHandle(hMap_
);
221 IOStatus
WinMmapReadableFile::Read(uint64_t offset
, size_t n
,
222 const IOOptions
& /*options*/, Slice
* result
,
224 IODebugContext
* /*dbg*/) const {
227 if (offset
> length_
) {
229 return IOError(filename_
, EINVAL
);
230 } else if (offset
+ n
> length_
) {
231 n
= length_
- static_cast<size_t>(offset
);
233 *result
= Slice(reinterpret_cast<const char*>(mapped_region_
) + offset
, n
);
237 IOStatus
WinMmapReadableFile::InvalidateCache(size_t offset
, size_t length
) {
238 return IOStatus::OK();
241 size_t WinMmapReadableFile::GetUniqueId(char* id
, size_t max_size
) const {
242 return GetUniqueIdFromFile(hFile_
, id
, max_size
);
245 ///////////////////////////////////////////////////////////////////////////////
248 // Can only truncate or reserve to a sector size aligned if
249 // used on files that are opened with Unbuffered I/O
250 IOStatus
WinMmapFile::TruncateFile(uint64_t toSize
) {
251 return ftruncate(filename_
, hFile_
, toSize
);
254 IOStatus
WinMmapFile::UnmapCurrentRegion() {
257 if (mapped_begin_
!= nullptr) {
258 if (!::UnmapViewOfFile(mapped_begin_
)) {
259 status
= IOErrorFromWindowsError(
260 "Failed to unmap file view: " + filename_
, GetLastError());
263 // Move on to the next portion of the file
264 file_offset_
+= view_size_
;
266 // UnmapView automatically sends data to disk but not the metadata
267 // which is good and provides some equivalent of fdatasync() on Linux
268 // therefore, we donot need separate flag for metadata
269 mapped_begin_
= nullptr;
270 mapped_end_
= nullptr;
273 last_sync_
= nullptr;
274 pending_sync_
= false;
280 IOStatus
WinMmapFile::MapNewRegion(const IOOptions
& options
,
281 IODebugContext
* dbg
) {
284 assert(mapped_begin_
== nullptr);
286 size_t minDiskSize
= static_cast<size_t>(file_offset_
) + view_size_
;
288 if (minDiskSize
> reserved_size_
) {
289 status
= Allocate(file_offset_
, view_size_
, options
, dbg
);
296 if (hMap_
== NULL
|| reserved_size_
> mapping_size_
) {
298 // Unmap the previous one
299 BOOL ret
__attribute__((__unused__
));
300 ret
= ::CloseHandle(hMap_
);
305 ULARGE_INTEGER mappingSize
;
306 mappingSize
.QuadPart
= reserved_size_
;
308 hMap_
= CreateFileMappingA(
310 NULL
, // Security attributes
311 PAGE_READWRITE
, // There is not a write only mode for mapping
312 mappingSize
.HighPart
, // Enable mapping the whole file but the actual
313 // amount mapped is determined by MapViewOfFile
315 NULL
); // Mapping name
318 return IOErrorFromWindowsError(
319 "WindowsMmapFile failed to create file mapping for: " + filename_
,
323 mapping_size_
= reserved_size_
;
326 ULARGE_INTEGER offset
;
327 offset
.QuadPart
= file_offset_
;
329 // View must begin at the granularity aligned offset
330 mapped_begin_
= reinterpret_cast<char*>(
331 MapViewOfFileEx(hMap_
, FILE_MAP_WRITE
, offset
.HighPart
, offset
.LowPart
,
334 if (!mapped_begin_
) {
335 status
= IOErrorFromWindowsError(
336 "WindowsMmapFile failed to map file view: " + filename_
,
339 mapped_end_
= mapped_begin_
+ view_size_
;
340 dst_
= mapped_begin_
;
341 last_sync_
= mapped_begin_
;
342 pending_sync_
= false;
347 IOStatus
WinMmapFile::PreallocateInternal(uint64_t spaceToReserve
) {
348 return fallocate(filename_
, hFile_
, spaceToReserve
);
351 WinMmapFile::WinMmapFile(const std::string
& fname
, HANDLE hFile
,
352 size_t page_size
, size_t allocation_granularity
,
353 const FileOptions
& options
)
354 : WinFileData(fname
, hFile
, false),
355 FSWritableFile(options
),
357 page_size_(page_size
),
358 allocation_granularity_(allocation_granularity
),
362 mapped_begin_(nullptr),
363 mapped_end_(nullptr),
367 pending_sync_(false) {
368 // Allocation granularity must be obtained from GetSystemInfo() and must be
370 assert(allocation_granularity
> 0);
371 assert((allocation_granularity
& (allocation_granularity
- 1)) == 0);
373 assert(page_size
> 0);
374 assert((page_size
& (page_size
- 1)) == 0);
376 // Only for memory mapped writes
377 assert(options
.use_mmap_writes
);
379 // View size must be both the multiple of allocation_granularity AND the
380 // page size and the granularity is usually a multiple of a page size.
381 const size_t viewSize
=
382 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
383 view_size_
= Roundup(viewSize
, allocation_granularity_
);
386 WinMmapFile::~WinMmapFile() {
388 this->Close(IOOptions(), nullptr);
392 IOStatus
WinMmapFile::Append(const Slice
& data
, const IOOptions
& options
,
393 IODebugContext
* dbg
) {
394 const char* src
= data
.data();
395 size_t left
= data
.size();
398 assert(mapped_begin_
<= dst_
);
399 size_t avail
= mapped_end_
- dst_
;
402 IOStatus s
= UnmapCurrentRegion();
404 s
= MapNewRegion(options
, dbg
);
411 size_t n
= std::min(left
, avail
);
412 memcpy(dst_
, src
, n
);
416 pending_sync_
= true;
420 // Now make sure that the last partial page is padded with zeros if needed
421 size_t bytesToPad
= Roundup(size_t(dst_
), page_size_
) - size_t(dst_
);
422 if (bytesToPad
> 0) {
423 memset(dst_
, 0, bytesToPad
);
426 return IOStatus::OK();
429 // Means Close() will properly take care of truncate
430 // and it does not need any additional information
431 IOStatus
WinMmapFile::Truncate(uint64_t size
, const IOOptions
& /*options*/,
432 IODebugContext
* /*dbg*/) {
433 return IOStatus::OK();
436 IOStatus
WinMmapFile::Close(const IOOptions
& options
, IODebugContext
* dbg
) {
439 assert(NULL
!= hFile_
);
441 // We truncate to the precise size so no
442 // uninitialized data at the end. SetEndOfFile
443 // which we use does not write zeros and it is good.
444 uint64_t targetSize
= GetFileSize(options
, dbg
);
446 if (mapped_begin_
!= nullptr) {
447 // Sync before unmapping to make sure everything
448 // is on disk and there is not a lazy writing
449 // so we are deterministic with the tests
451 s
= UnmapCurrentRegion();
455 BOOL ret
= ::CloseHandle(hMap_
);
456 if (!ret
&& s
.ok()) {
457 auto lastError
= GetLastError();
458 s
= IOErrorFromWindowsError(
459 "Failed to Close mapping for file: " + filename_
, lastError
);
465 if (hFile_
!= NULL
) {
466 TruncateFile(targetSize
);
468 BOOL ret
= ::CloseHandle(hFile_
);
471 if (!ret
&& s
.ok()) {
472 auto lastError
= GetLastError();
473 s
= IOErrorFromWindowsError(
474 "Failed to close file map handle: " + filename_
, lastError
);
481 IOStatus
WinMmapFile::Flush(const IOOptions
& /*options*/,
482 IODebugContext
* /*dbg*/) {
483 return IOStatus::OK();
487 IOStatus
WinMmapFile::Sync(const IOOptions
& /*options*/,
488 IODebugContext
* /*dbg*/) {
491 // Some writes occurred since last sync
492 if (dst_
> last_sync_
) {
493 assert(mapped_begin_
);
495 assert(dst_
> mapped_begin_
);
496 assert(dst_
< mapped_end_
);
499 TruncateToPageBoundary(page_size_
, last_sync_
- mapped_begin_
);
501 TruncateToPageBoundary(page_size_
, dst_
- mapped_begin_
- 1);
503 // Flush only the amount of that is a multiple of pages
504 if (!::FlushViewOfFile(mapped_begin_
+ page_begin
,
505 (page_end
- page_begin
) + page_size_
)) {
506 s
= IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_
,
517 * Flush data as well as metadata to stable storage.
519 IOStatus
WinMmapFile::Fsync(const IOOptions
& options
, IODebugContext
* dbg
) {
520 IOStatus s
= Sync(options
, dbg
);
523 if (s
.ok() && pending_sync_
) {
524 if (!::FlushFileBuffers(hFile_
)) {
525 s
= IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_
,
528 pending_sync_
= false;
535 * Get the size of valid data in the file. This will not match the
536 * size that is returned from the filesystem because we use mmap
537 * to extend file by map_size every time.
539 uint64_t WinMmapFile::GetFileSize(const IOOptions
& /*options*/,
540 IODebugContext
* /*dbg*/) {
541 size_t used
= dst_
- mapped_begin_
;
542 return file_offset_
+ used
;
545 IOStatus
WinMmapFile::InvalidateCache(size_t offset
, size_t length
) {
546 return IOStatus::OK();
549 IOStatus
WinMmapFile::Allocate(uint64_t offset
, uint64_t len
,
550 const IOOptions
& /*options*/,
551 IODebugContext
* /*dbg*/) {
553 TEST_KILL_RANDOM("WinMmapFile::Allocate");
555 // Make sure that we reserve an aligned amount of space
556 // since the reservation block size is driven outside so we want
557 // to check if we are ok with reservation here
558 size_t spaceToReserve
=
559 Roundup(static_cast<size_t>(offset
+ len
), view_size_
);
561 if (spaceToReserve
<= reserved_size_
) {
565 IOSTATS_TIMER_GUARD(allocate_nanos
);
566 status
= PreallocateInternal(spaceToReserve
);
568 reserved_size_
= spaceToReserve
;
573 size_t WinMmapFile::GetUniqueId(char* id
, size_t max_size
) const {
574 return GetUniqueIdFromFile(hFile_
, id
, max_size
);
577 //////////////////////////////////////////////////////////////////////////////////
580 WinSequentialFile::WinSequentialFile(const std::string
& fname
, HANDLE f
,
581 const FileOptions
& options
)
582 : WinFileData(fname
, f
, options
.use_direct_reads
) {}
584 WinSequentialFile::~WinSequentialFile() {
585 assert(hFile_
!= INVALID_HANDLE_VALUE
);
588 IOStatus
WinSequentialFile::Read(size_t n
, const IOOptions
& /*opts*/,
589 Slice
* result
, char* scratch
,
590 IODebugContext
* /*dbg*/) {
594 assert(result
!= nullptr);
595 if (WinFileData::use_direct_io()) {
596 return IOStatus::NotSupported("Read() does not support direct_io");
599 // Windows ReadFile API accepts a DWORD.
600 // While it is possible to read in a loop if n is too big
601 // it is an unlikely case.
602 if (n
> std::numeric_limits
<DWORD
>::max()) {
603 return IOStatus::InvalidArgument("n is too big for a single ReadFile: " +
608 static_cast<DWORD
>(n
); // cast is safe due to the check above
610 BOOL ret
= ReadFile(hFile_
, scratch
, bytesToRead
, &bytesRead
, NULL
);
614 auto lastError
= GetLastError();
615 if (lastError
!= ERROR_HANDLE_EOF
) {
616 s
= IOErrorFromWindowsError("ReadFile failed: " + filename_
, lastError
);
620 *result
= Slice(scratch
, r
);
624 IOStatus
WinSequentialFile::PositionedReadInternal(char* src
, size_t numBytes
,
626 size_t& bytes_read
) const {
627 return pread(this, src
, numBytes
, offset
, bytes_read
);
630 IOStatus
WinSequentialFile::PositionedRead(uint64_t offset
, size_t n
,
631 const IOOptions
& /*opts*/,
632 Slice
* result
, char* scratch
,
633 IODebugContext
* /*dbg*/) {
634 if (!WinFileData::use_direct_io()) {
635 return IOStatus::NotSupported("This function is only used for direct_io");
638 assert(IsSectorAligned(static_cast<size_t>(offset
)));
639 assert(IsSectorAligned(static_cast<size_t>(n
)));
641 size_t bytes_read
= 0; // out param
642 IOStatus s
= PositionedReadInternal(scratch
, static_cast<size_t>(n
), offset
,
644 *result
= Slice(scratch
, bytes_read
);
648 IOStatus
WinSequentialFile::Skip(uint64_t n
) {
649 // Can't handle more than signed max as SetFilePointerEx accepts a signed
650 // 64-bit integer. As such it is a highly unlikley case to have n so large.
651 if (n
> static_cast<uint64_t>(std::numeric_limits
<LONGLONG
>::max())) {
652 return IOStatus::InvalidArgument(
653 "n is too large for a single SetFilePointerEx() call" + filename_
);
657 li
.QuadPart
= static_cast<LONGLONG
>(n
); // cast is safe due to the check
659 BOOL ret
= SetFilePointerEx(hFile_
, li
, NULL
, FILE_CURRENT
);
661 auto lastError
= GetLastError();
662 return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_
,
665 return IOStatus::OK();
668 IOStatus
WinSequentialFile::InvalidateCache(size_t offset
, size_t length
) {
669 return IOStatus::OK();
672 //////////////////////////////////////////////////////////////////////////////////////////////////
673 /// WinRandomAccessBase
675 inline IOStatus
WinRandomAccessImpl::PositionedReadInternal(
676 char* src
, size_t numBytes
, uint64_t offset
, size_t& bytes_read
) const {
677 return pread(file_base_
, src
, numBytes
, offset
, bytes_read
);
680 inline WinRandomAccessImpl::WinRandomAccessImpl(WinFileData
* file_base
,
682 const FileOptions
& options
)
683 : file_base_(file_base
),
684 alignment_(std::max(alignment
, file_base
->GetSectorSize())) {
685 assert(!options
.use_mmap_reads
);
688 inline IOStatus
WinRandomAccessImpl::ReadImpl(uint64_t offset
, size_t n
,
690 char* scratch
) const {
691 // Check buffer alignment
692 if (file_base_
->use_direct_io()) {
693 assert(file_base_
->IsSectorAligned(static_cast<size_t>(offset
)));
694 assert(IsAligned(alignment_
, scratch
));
698 *result
= Slice(scratch
, 0);
699 return IOStatus::OK();
702 size_t bytes_read
= 0;
703 IOStatus s
= PositionedReadInternal(scratch
, n
, offset
, bytes_read
);
704 *result
= Slice(scratch
, bytes_read
);
708 ///////////////////////////////////////////////////////////////////////////////////////////////////
709 /// WinRandomAccessFile
711 WinRandomAccessFile::WinRandomAccessFile(const std::string
& fname
, HANDLE hFile
,
713 const FileOptions
& options
)
714 : WinFileData(fname
, hFile
, options
.use_direct_reads
),
715 WinRandomAccessImpl(this, alignment
, options
) {}
717 WinRandomAccessFile::~WinRandomAccessFile() {}
719 IOStatus
WinRandomAccessFile::Read(uint64_t offset
, size_t n
,
720 const IOOptions
& /*options*/, Slice
* result
,
722 IODebugContext
* /*dbg*/) const {
723 return ReadImpl(offset
, n
, result
, scratch
);
726 IOStatus
WinRandomAccessFile::InvalidateCache(size_t offset
, size_t length
) {
727 return IOStatus::OK();
730 size_t WinRandomAccessFile::GetUniqueId(char* id
, size_t max_size
) const {
731 return GetUniqueIdFromFile(GetFileHandle(), id
, max_size
);
734 size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
735 return GetAlignment();
738 /////////////////////////////////////////////////////////////////////////////
742 inline IOStatus
WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve
) {
743 return fallocate(file_data_
->GetName(), file_data_
->GetFileHandle(),
747 inline WinWritableImpl::WinWritableImpl(WinFileData
* file_data
,
749 : file_data_(file_data
),
750 alignment_(std::max(alignment
, file_data
->GetSectorSize())),
751 next_write_offset_(0),
753 // Query current position in case ReopenWritableFile is called
754 // This position is only important for buffered writes
755 // for unbuffered writes we explicitely specify the position.
756 LARGE_INTEGER zero_move
;
757 zero_move
.QuadPart
= 0; // Do not move
760 BOOL ret
= SetFilePointerEx(file_data_
->GetFileHandle(), zero_move
, &pos
,
762 // Querying no supped to fail
764 next_write_offset_
= pos
.QuadPart
;
770 inline IOStatus
WinWritableImpl::AppendImpl(const Slice
& data
) {
773 if (data
.size() > std::numeric_limits
<DWORD
>::max()) {
774 return IOStatus::InvalidArgument("data is too long for a single write" +
775 file_data_
->GetName());
778 size_t bytes_written
= 0; // out param
780 if (file_data_
->use_direct_io()) {
781 // With no offset specified we are appending
782 // to the end of the file
783 assert(file_data_
->IsSectorAligned(next_write_offset_
));
784 assert(file_data_
->IsSectorAligned(data
.size()));
785 assert(IsAligned(static_cast<size_t>(GetAlignment()), data
.data()));
786 s
= pwrite(file_data_
, data
, next_write_offset_
, bytes_written
);
788 DWORD bytesWritten
= 0;
789 if (!WriteFile(file_data_
->GetFileHandle(), data
.data(),
790 static_cast<DWORD
>(data
.size()), &bytesWritten
, NULL
)) {
791 auto lastError
= GetLastError();
792 s
= IOErrorFromWindowsError(
793 "Failed to WriteFile: " + file_data_
->GetName(), lastError
);
795 bytes_written
= bytesWritten
;
800 if (bytes_written
== data
.size()) {
801 // This matters for direct_io cases where
802 // we rely on the fact that next_write_offset_
804 next_write_offset_
+= bytes_written
;
806 s
= IOStatus::IOError("Failed to write all bytes: " +
807 file_data_
->GetName());
814 inline IOStatus
WinWritableImpl::PositionedAppendImpl(const Slice
& data
,
816 if (file_data_
->use_direct_io()) {
817 assert(file_data_
->IsSectorAligned(static_cast<size_t>(offset
)));
818 assert(file_data_
->IsSectorAligned(data
.size()));
819 assert(IsAligned(static_cast<size_t>(GetAlignment()), data
.data()));
822 size_t bytes_written
= 0;
823 IOStatus s
= pwrite(file_data_
, data
, offset
, bytes_written
);
826 if (bytes_written
== data
.size()) {
827 // For sequential write this would be simple
828 // size extension by data.size()
829 uint64_t write_end
= offset
+ bytes_written
;
830 if (write_end
>= next_write_offset_
) {
831 next_write_offset_
= write_end
;
834 s
= IOStatus::IOError("Failed to write all of the requested data: " +
835 file_data_
->GetName());
841 inline IOStatus
WinWritableImpl::TruncateImpl(uint64_t size
) {
842 // It is tempting to check for the size for sector alignment
843 // but truncation may come at the end and there is not a requirement
844 // for this to be sector aligned so long as we do not attempt to write
845 // after that. The interface docs state that the behavior is undefined
848 ftruncate(file_data_
->GetName(), file_data_
->GetFileHandle(), size
);
851 next_write_offset_
= size
;
856 inline IOStatus
WinWritableImpl::CloseImpl() {
859 auto hFile
= file_data_
->GetFileHandle();
860 assert(INVALID_HANDLE_VALUE
!= hFile
);
862 if (!::FlushFileBuffers(hFile
)) {
863 auto lastError
= GetLastError();
864 s
= IOErrorFromWindowsError(
865 "FlushFileBuffers failed at Close() for: " + file_data_
->GetName(),
869 if (!file_data_
->CloseFile() && s
.ok()) {
870 auto lastError
= GetLastError();
871 s
= IOErrorFromWindowsError(
872 "CloseHandle failed for: " + file_data_
->GetName(), lastError
);
877 inline IOStatus
WinWritableImpl::SyncImpl(const IOOptions
& /*options*/,
878 IODebugContext
* /*dbg*/) {
880 if (!::FlushFileBuffers(file_data_
->GetFileHandle())) {
881 auto lastError
= GetLastError();
882 s
= IOErrorFromWindowsError(
883 "FlushFileBuffers failed at Sync() for: " + file_data_
->GetName(),
889 inline IOStatus
WinWritableImpl::AllocateImpl(uint64_t offset
, uint64_t len
) {
891 TEST_KILL_RANDOM("WinWritableFile::Allocate");
893 // Make sure that we reserve an aligned amount of space
894 // since the reservation block size is driven outside so we want
895 // to check if we are ok with reservation here
896 size_t spaceToReserve
= Roundup(static_cast<size_t>(offset
+ len
),
897 static_cast<size_t>(alignment_
));
899 if (spaceToReserve
<= reservedsize_
) {
903 IOSTATS_TIMER_GUARD(allocate_nanos
);
904 status
= PreallocateInternal(spaceToReserve
);
906 reservedsize_
= spaceToReserve
;
911 ////////////////////////////////////////////////////////////////////////////////
914 WinWritableFile::WinWritableFile(const std::string
& fname
, HANDLE hFile
,
915 size_t alignment
, size_t /* capacity */,
916 const FileOptions
& options
)
917 : WinFileData(fname
, hFile
, options
.use_direct_writes
),
918 WinWritableImpl(this, alignment
),
919 FSWritableFile(options
) {
920 assert(!options
.use_mmap_writes
);
923 WinWritableFile::~WinWritableFile() {}
925 // Indicates if the class makes use of direct I/O
926 bool WinWritableFile::use_direct_io() const {
927 return WinFileData::use_direct_io();
930 size_t WinWritableFile::GetRequiredBufferAlignment() const {
931 return static_cast<size_t>(GetAlignment());
934 IOStatus
WinWritableFile::Append(const Slice
& data
,
935 const IOOptions
& /*options*/,
936 IODebugContext
* /*dbg*/) {
937 return AppendImpl(data
);
940 IOStatus
WinWritableFile::PositionedAppend(const Slice
& data
, uint64_t offset
,
941 const IOOptions
& /*options*/,
942 IODebugContext
* /*dbg*/) {
943 return PositionedAppendImpl(data
, offset
);
946 // Need to implement this so the file is truncated correctly
947 // when buffered and unbuffered mode
948 IOStatus
WinWritableFile::Truncate(uint64_t size
, const IOOptions
& /*options*/,
949 IODebugContext
* /*dbg*/) {
950 return TruncateImpl(size
);
953 IOStatus
WinWritableFile::Close(const IOOptions
& /*options*/,
954 IODebugContext
* /*dbg*/) {
958 // write out the cached data to the OS cache
959 // This is now taken care of the WritableFileWriter
960 IOStatus
WinWritableFile::Flush(const IOOptions
& /*options*/,
961 IODebugContext
* /*dbg*/) {
962 return IOStatus::OK();
965 IOStatus
WinWritableFile::Sync(const IOOptions
& options
, IODebugContext
* dbg
) {
966 return SyncImpl(options
, dbg
);
969 IOStatus
WinWritableFile::Fsync(const IOOptions
& options
, IODebugContext
* dbg
) {
970 return SyncImpl(options
, dbg
);
973 bool WinWritableFile::IsSyncThreadSafe() const { return true; }
975 uint64_t WinWritableFile::GetFileSize(const IOOptions
& /*options*/,
976 IODebugContext
* /*dbg*/) {
977 return GetFileNextWriteOffset();
980 IOStatus
WinWritableFile::Allocate(uint64_t offset
, uint64_t len
,
981 const IOOptions
& /*options*/,
982 IODebugContext
* /*dbg*/) {
983 return AllocateImpl(offset
, len
);
986 size_t WinWritableFile::GetUniqueId(char* id
, size_t max_size
) const {
987 return GetUniqueIdFromFile(GetFileHandle(), id
, max_size
);
990 /////////////////////////////////////////////////////////////////////////
993 WinRandomRWFile::WinRandomRWFile(const std::string
& fname
, HANDLE hFile
,
994 size_t alignment
, const FileOptions
& options
)
995 : WinFileData(fname
, hFile
,
996 options
.use_direct_reads
&& options
.use_direct_writes
),
997 WinRandomAccessImpl(this, alignment
, options
),
998 WinWritableImpl(this, alignment
) {}
1000 bool WinRandomRWFile::use_direct_io() const {
1001 return WinFileData::use_direct_io();
1004 size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
1005 assert(WinRandomAccessImpl::GetAlignment() ==
1006 WinWritableImpl::GetAlignment());
1007 return static_cast<size_t>(WinRandomAccessImpl::GetAlignment());
1010 IOStatus
WinRandomRWFile::Write(uint64_t offset
, const Slice
& data
,
1011 const IOOptions
& /*options*/,
1012 IODebugContext
* /*dbg*/) {
1013 return PositionedAppendImpl(data
, offset
);
1016 IOStatus
WinRandomRWFile::Read(uint64_t offset
, size_t n
,
1017 const IOOptions
& /*options*/, Slice
* result
,
1018 char* scratch
, IODebugContext
* /*dbg*/) const {
1019 return ReadImpl(offset
, n
, result
, scratch
);
1022 IOStatus
WinRandomRWFile::Flush(const IOOptions
& /*options*/,
1023 IODebugContext
* /*dbg*/) {
1024 return IOStatus::OK();
1027 IOStatus
WinRandomRWFile::Sync(const IOOptions
& options
, IODebugContext
* dbg
) {
1028 return SyncImpl(options
, dbg
);
1031 IOStatus
WinRandomRWFile::Close(const IOOptions
& /*options*/,
1032 IODebugContext
* /*dbg*/) {
1036 //////////////////////////////////////////////////////////////////////////
1037 /// WinMemoryMappedBufer
1038 WinMemoryMappedBuffer::~WinMemoryMappedBuffer() {
1040 #if defined(_MSC_VER)
1043 __attribute__((__unused__
));
1045 if (base_
!= nullptr) {
1046 ret
= ::UnmapViewOfFile(base_
);
1050 if (map_handle_
!= NULL
&& map_handle_
!= INVALID_HANDLE_VALUE
) {
1051 ret
= ::CloseHandle(map_handle_
);
1055 if (file_handle_
!= NULL
&& file_handle_
!= INVALID_HANDLE_VALUE
) {
1056 ret
= ::CloseHandle(file_handle_
);
1058 file_handle_
= NULL
;
1062 //////////////////////////////////////////////////////////////////////////
1065 IOStatus
WinDirectory::Fsync(const IOOptions
& /*options*/,
1066 IODebugContext
* /*dbg*/) {
1067 return IOStatus::OK();
1070 IOStatus
WinDirectory::Close(const IOOptions
& /*options*/,
1071 IODebugContext
* /*dbg*/) {
1072 IOStatus s
= IOStatus::OK();
1073 BOOL ret
__attribute__((__unused__
));
1074 if (handle_
!= INVALID_HANDLE_VALUE
) {
1075 ret
= ::CloseHandle(handle_
);
1077 auto lastError
= GetLastError();
1078 s
= IOErrorFromWindowsError("Directory closes failed for : " + GetName(),
1086 size_t WinDirectory::GetUniqueId(char* id
, size_t max_size
) const {
1087 return GetUniqueIdFromFile(handle_
, id
, max_size
);
1089 //////////////////////////////////////////////////////////////////////////
1092 WinFileLock::~WinFileLock() {
1093 BOOL ret
__attribute__((__unused__
));
1094 ret
= ::CloseHandle(hFile_
);
1099 } // namespace ROCKSDB_NAMESPACE