1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "port/win/io_win.h"
12 #include "monitoring/iostats_context_imp.h"
13 #include "util/aligned_buffer.h"
14 #include "util/coding.h"
15 #include "util/sync_point.h"
25 const size_t kSectorSize
= 512;
28 bool IsPowerOfTwo(const size_t alignment
) {
29 return ((alignment
) & (alignment
- 1)) == 0;
33 bool IsSectorAligned(const size_t off
) {
34 return (off
& (kSectorSize
- 1)) == 0;
38 bool IsAligned(size_t alignment
, const void* ptr
) {
39 return ((uintptr_t(ptr
)) & (alignment
- 1)) == 0;
44 std::string
GetWindowsErrSz(DWORD err
) {
46 FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER
| FORMAT_MESSAGE_FROM_SYSTEM
|
47 FORMAT_MESSAGE_IGNORE_INSERTS
,
49 0, // Default language
50 reinterpret_cast<LPSTR
>(&lpMsgBuf
), 0, NULL
);
52 std::string Err
= lpMsgBuf
;
57 // We preserve the original name of this interface to denote the original idea
59 // All reads happen by a specified offset and pwrite interface does not change
60 // the position of the file pointer. Judging from the man page and errno it does
62 // lseek atomically to return the position of the file back where it was.
63 // WriteFile() does not
64 // have this capability. Therefore, for both pread and pwrite the pointer is
65 // advanced to the next position
66 // which is fine for writes because they are (should be) sequential.
67 // Because all the reads/writes happen by the specified offset, the caller in
69 // rely on the current file offset.
70 SSIZE_T
pwrite(HANDLE hFile
, const char* src
, size_t numBytes
,
72 assert(numBytes
<= std::numeric_limits
<DWORD
>::max());
73 OVERLAPPED overlapped
= { 0 };
74 ULARGE_INTEGER offsetUnion
;
75 offsetUnion
.QuadPart
= offset
;
77 overlapped
.Offset
= offsetUnion
.LowPart
;
78 overlapped
.OffsetHigh
= offsetUnion
.HighPart
;
82 unsigned long bytesWritten
= 0;
84 if (FALSE
== WriteFile(hFile
, src
, static_cast<DWORD
>(numBytes
), &bytesWritten
,
88 result
= bytesWritten
;
94 // See comments for pwrite above
95 SSIZE_T
pread(HANDLE hFile
, char* src
, size_t numBytes
, uint64_t offset
) {
96 assert(numBytes
<= std::numeric_limits
<DWORD
>::max());
97 OVERLAPPED overlapped
= { 0 };
98 ULARGE_INTEGER offsetUnion
;
99 offsetUnion
.QuadPart
= offset
;
101 overlapped
.Offset
= offsetUnion
.LowPart
;
102 overlapped
.OffsetHigh
= offsetUnion
.HighPart
;
106 unsigned long bytesRead
= 0;
108 if (FALSE
== ReadFile(hFile
, src
, static_cast<DWORD
>(numBytes
), &bytesRead
,
118 // SetFileInformationByHandle() is capable of fast pre-allocates.
119 // However, this does not change the file end position unless the file is
120 // truncated and the pre-allocated space is not considered filled with zeros.
121 Status
fallocate(const std::string
& filename
, HANDLE hFile
,
125 FILE_ALLOCATION_INFO alloc_info
;
126 alloc_info
.AllocationSize
.QuadPart
= to_size
;
128 if (!SetFileInformationByHandle(hFile
, FileAllocationInfo
, &alloc_info
,
129 sizeof(FILE_ALLOCATION_INFO
))) {
130 auto lastError
= GetLastError();
131 status
= IOErrorFromWindowsError(
132 "Failed to pre-allocate space: " + filename
, lastError
);
138 Status
ftruncate(const std::string
& filename
, HANDLE hFile
,
142 FILE_END_OF_FILE_INFO end_of_file
;
143 end_of_file
.EndOfFile
.QuadPart
= toSize
;
145 if (!SetFileInformationByHandle(hFile
, FileEndOfFileInfo
, &end_of_file
,
146 sizeof(FILE_END_OF_FILE_INFO
))) {
147 auto lastError
= GetLastError();
148 status
= IOErrorFromWindowsError("Failed to Set end of file: " + filename
,
155 size_t GetUniqueIdFromFile(HANDLE hFile
, char* id
, size_t max_size
) {
157 if (max_size
< kMaxVarint64Length
* 3) {
161 // This function has to be re-worked for cases when
162 // ReFS file system introduced on Windows Server 2012 is used
163 BY_HANDLE_FILE_INFORMATION FileInfo
;
165 BOOL result
= GetFileInformationByHandle(hFile
, &FileInfo
);
167 TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result
);
174 rid
= EncodeVarint64(rid
, uint64_t(FileInfo
.dwVolumeSerialNumber
));
175 rid
= EncodeVarint64(rid
, uint64_t(FileInfo
.nFileIndexHigh
));
176 rid
= EncodeVarint64(rid
, uint64_t(FileInfo
.nFileIndexLow
));
179 return static_cast<size_t>(rid
- id
);
182 ////////////////////////////////////////////////////////////////////////////////////////////////////
183 // WinMmapReadableFile
185 WinMmapReadableFile::WinMmapReadableFile(const std::string
& fileName
,
186 HANDLE hFile
, HANDLE hMap
,
187 const void* mapped_region
,
189 : WinFileData(fileName
, hFile
, false /* use_direct_io */),
191 mapped_region_(mapped_region
),
194 WinMmapReadableFile::~WinMmapReadableFile() {
195 BOOL ret
= ::UnmapViewOfFile(mapped_region_
);
198 ret
= ::CloseHandle(hMap_
);
202 Status
WinMmapReadableFile::Read(uint64_t offset
, size_t n
, Slice
* result
,
203 char* scratch
) const {
206 if (offset
> length_
) {
208 return IOError(filename_
, EINVAL
);
209 } else if (offset
+ n
> length_
) {
210 n
= length_
- offset
;
213 Slice(reinterpret_cast<const char*>(mapped_region_
)+offset
, n
);
217 Status
WinMmapReadableFile::InvalidateCache(size_t offset
, size_t length
) {
221 size_t WinMmapReadableFile::GetUniqueId(char* id
, size_t max_size
) const {
222 return GetUniqueIdFromFile(hFile_
, id
, max_size
);
225 ///////////////////////////////////////////////////////////////////////////////
229 // Can only truncate or reserve to a sector size aligned if
230 // used on files that are opened with Unbuffered I/O
231 Status
WinMmapFile::TruncateFile(uint64_t toSize
) {
232 return ftruncate(filename_
, hFile_
, toSize
);
235 Status
WinMmapFile::UnmapCurrentRegion() {
238 if (mapped_begin_
!= nullptr) {
239 if (!::UnmapViewOfFile(mapped_begin_
)) {
240 status
= IOErrorFromWindowsError(
241 "Failed to unmap file view: " + filename_
, GetLastError());
244 // Move on to the next portion of the file
245 file_offset_
+= view_size_
;
247 // UnmapView automatically sends data to disk but not the metadata
248 // which is good and provides some equivalent of fdatasync() on Linux
249 // therefore, we donot need separate flag for metadata
250 mapped_begin_
= nullptr;
251 mapped_end_
= nullptr;
254 last_sync_
= nullptr;
255 pending_sync_
= false;
261 Status
WinMmapFile::MapNewRegion() {
265 assert(mapped_begin_
== nullptr);
267 size_t minDiskSize
= file_offset_
+ view_size_
;
269 if (minDiskSize
> reserved_size_
) {
270 status
= Allocate(file_offset_
, view_size_
);
277 if (hMap_
== NULL
|| reserved_size_
> mapping_size_
) {
280 // Unmap the previous one
281 BOOL ret
= ::CloseHandle(hMap_
);
286 ULARGE_INTEGER mappingSize
;
287 mappingSize
.QuadPart
= reserved_size_
;
289 hMap_
= CreateFileMappingA(
291 NULL
, // Security attributes
292 PAGE_READWRITE
, // There is not a write only mode for mapping
293 mappingSize
.HighPart
, // Enable mapping the whole file but the actual
294 // amount mapped is determined by MapViewOfFile
296 NULL
); // Mapping name
299 return IOErrorFromWindowsError(
300 "WindowsMmapFile failed to create file mapping for: " + filename_
,
304 mapping_size_
= reserved_size_
;
307 ULARGE_INTEGER offset
;
308 offset
.QuadPart
= file_offset_
;
310 // View must begin at the granularity aligned offset
311 mapped_begin_
= reinterpret_cast<char*>(
312 MapViewOfFileEx(hMap_
, FILE_MAP_WRITE
, offset
.HighPart
, offset
.LowPart
,
315 if (!mapped_begin_
) {
316 status
= IOErrorFromWindowsError(
317 "WindowsMmapFile failed to map file view: " + filename_
,
320 mapped_end_
= mapped_begin_
+ view_size_
;
321 dst_
= mapped_begin_
;
322 last_sync_
= mapped_begin_
;
323 pending_sync_
= false;
328 Status
WinMmapFile::PreallocateInternal(uint64_t spaceToReserve
) {
329 return fallocate(filename_
, hFile_
, spaceToReserve
);
332 WinMmapFile::WinMmapFile(const std::string
& fname
, HANDLE hFile
, size_t page_size
,
333 size_t allocation_granularity
, const EnvOptions
& options
)
334 : WinFileData(fname
, hFile
, false),
336 page_size_(page_size
),
337 allocation_granularity_(allocation_granularity
),
341 mapped_begin_(nullptr),
342 mapped_end_(nullptr),
346 pending_sync_(false) {
347 // Allocation granularity must be obtained from GetSystemInfo() and must be
349 assert(allocation_granularity
> 0);
350 assert((allocation_granularity
& (allocation_granularity
- 1)) == 0);
352 assert(page_size
> 0);
353 assert((page_size
& (page_size
- 1)) == 0);
355 // Only for memory mapped writes
356 assert(options
.use_mmap_writes
);
358 // View size must be both the multiple of allocation_granularity AND the
359 // page size and the granularity is usually a multiple of a page size.
360 const size_t viewSize
= 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
361 view_size_
= Roundup(viewSize
, allocation_granularity_
);
364 WinMmapFile::~WinMmapFile() {
370 Status
WinMmapFile::Append(const Slice
& data
) {
371 const char* src
= data
.data();
372 size_t left
= data
.size();
375 assert(mapped_begin_
<= dst_
);
376 size_t avail
= mapped_end_
- dst_
;
379 Status s
= UnmapCurrentRegion();
388 size_t n
= std::min(left
, avail
);
389 memcpy(dst_
, src
, n
);
393 pending_sync_
= true;
397 // Now make sure that the last partial page is padded with zeros if needed
398 size_t bytesToPad
= Roundup(size_t(dst_
), page_size_
) - size_t(dst_
);
399 if (bytesToPad
> 0) {
400 memset(dst_
, 0, bytesToPad
);
406 // Means Close() will properly take care of truncate
407 // and it does not need any additional information
408 Status
WinMmapFile::Truncate(uint64_t size
) {
412 Status
WinMmapFile::Close() {
415 assert(NULL
!= hFile_
);
417 // We truncate to the precise size so no
418 // uninitialized data at the end. SetEndOfFile
419 // which we use does not write zeros and it is good.
420 uint64_t targetSize
= GetFileSize();
422 if (mapped_begin_
!= nullptr) {
423 // Sync before unmapping to make sure everything
424 // is on disk and there is not a lazy writing
425 // so we are deterministic with the tests
427 s
= UnmapCurrentRegion();
431 BOOL ret
= ::CloseHandle(hMap_
);
432 if (!ret
&& s
.ok()) {
433 auto lastError
= GetLastError();
434 s
= IOErrorFromWindowsError(
435 "Failed to Close mapping for file: " + filename_
, lastError
);
441 if (hFile_
!= NULL
) {
443 TruncateFile(targetSize
);
445 BOOL ret
= ::CloseHandle(hFile_
);
448 if (!ret
&& s
.ok()) {
449 auto lastError
= GetLastError();
450 s
= IOErrorFromWindowsError(
451 "Failed to close file map handle: " + filename_
, lastError
);
458 Status
WinMmapFile::Flush() { return Status::OK(); }
461 Status
WinMmapFile::Sync() {
464 // Some writes occurred since last sync
465 if (dst_
> last_sync_
) {
466 assert(mapped_begin_
);
468 assert(dst_
> mapped_begin_
);
469 assert(dst_
< mapped_end_
);
472 TruncateToPageBoundary(page_size_
, last_sync_
- mapped_begin_
);
474 TruncateToPageBoundary(page_size_
, dst_
- mapped_begin_
- 1);
476 // Flush only the amount of that is a multiple of pages
477 if (!::FlushViewOfFile(mapped_begin_
+ page_begin
,
478 (page_end
- page_begin
) + page_size_
)) {
479 s
= IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_
,
490 * Flush data as well as metadata to stable storage.
492 Status
WinMmapFile::Fsync() {
496 if (s
.ok() && pending_sync_
) {
497 if (!::FlushFileBuffers(hFile_
)) {
498 s
= IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_
,
501 pending_sync_
= false;
508 * Get the size of valid data in the file. This will not match the
509 * size that is returned from the filesystem because we use mmap
510 * to extend file by map_size every time.
512 uint64_t WinMmapFile::GetFileSize() {
513 size_t used
= dst_
- mapped_begin_
;
514 return file_offset_
+ used
;
517 Status
WinMmapFile::InvalidateCache(size_t offset
, size_t length
) {
521 Status
WinMmapFile::Allocate(uint64_t offset
, uint64_t len
) {
523 TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds
);
525 // Make sure that we reserve an aligned amount of space
526 // since the reservation block size is driven outside so we want
527 // to check if we are ok with reservation here
528 size_t spaceToReserve
= Roundup(offset
+ len
, view_size_
);
530 if (spaceToReserve
<= reserved_size_
) {
534 IOSTATS_TIMER_GUARD(allocate_nanos
);
535 status
= PreallocateInternal(spaceToReserve
);
537 reserved_size_
= spaceToReserve
;
542 size_t WinMmapFile::GetUniqueId(char* id
, size_t max_size
) const {
543 return GetUniqueIdFromFile(hFile_
, id
, max_size
);
546 //////////////////////////////////////////////////////////////////////////////////
549 WinSequentialFile::WinSequentialFile(const std::string
& fname
, HANDLE f
,
550 const EnvOptions
& options
)
551 : WinFileData(fname
, f
, options
.use_direct_reads
) {}
553 WinSequentialFile::~WinSequentialFile() {
554 assert(hFile_
!= INVALID_HANDLE_VALUE
);
557 Status
WinSequentialFile::Read(size_t n
, Slice
* result
, char* scratch
) {
558 assert(result
!= nullptr && !WinFileData::use_direct_io());
562 // Windows ReadFile API accepts a DWORD.
563 // While it is possible to read in a loop if n is > UINT_MAX
564 // it is a highly unlikely case.
566 return IOErrorFromWindowsError(filename_
, ERROR_INVALID_PARAMETER
);
569 DWORD bytesToRead
= static_cast<DWORD
>(n
); //cast is safe due to the check above
571 BOOL ret
= ReadFile(hFile_
, scratch
, bytesToRead
, &bytesRead
, NULL
);
575 return IOErrorFromWindowsError(filename_
, GetLastError());
578 *result
= Slice(scratch
, r
);
583 SSIZE_T
WinSequentialFile::PositionedReadInternal(char* src
, size_t numBytes
,
584 uint64_t offset
) const {
585 return pread(GetFileHandle(), src
, numBytes
, offset
);
588 Status
WinSequentialFile::PositionedRead(uint64_t offset
, size_t n
, Slice
* result
,
593 assert(WinFileData::use_direct_io());
595 // Windows ReadFile API accepts a DWORD.
596 // While it is possible to read in a loop if n is > UINT_MAX
597 // it is a highly unlikely case.
599 return IOErrorFromWindowsError(GetName(), ERROR_INVALID_PARAMETER
);
602 auto r
= PositionedReadInternal(scratch
, n
, offset
);
605 auto lastError
= GetLastError();
606 // Posix impl wants to treat reads from beyond
607 // of the file as OK.
608 if (lastError
!= ERROR_HANDLE_EOF
) {
609 s
= IOErrorFromWindowsError(GetName(), lastError
);
613 *result
= Slice(scratch
, (r
< 0) ? 0 : size_t(r
));
618 Status
WinSequentialFile::Skip(uint64_t n
) {
619 // Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit
620 // integer. As such it is a highly unlikley case to have n so large.
622 return IOErrorFromWindowsError(filename_
, ERROR_INVALID_PARAMETER
);
626 li
.QuadPart
= static_cast<int64_t>(n
); //cast is safe due to the check above
627 BOOL ret
= SetFilePointerEx(hFile_
, li
, NULL
, FILE_CURRENT
);
629 return IOErrorFromWindowsError(filename_
, GetLastError());
634 Status
WinSequentialFile::InvalidateCache(size_t offset
, size_t length
) {
638 //////////////////////////////////////////////////////////////////////////////////////////////////
639 /// WinRandomAccessBase
642 void CalculateReadParameters(size_t alignment
, uint64_t offset
,
643 size_t bytes_requested
,
644 size_t& actual_bytes_toread
,
645 uint64_t& first_page_start
) {
647 first_page_start
= TruncateToPageBoundary(alignment
, offset
);
648 const uint64_t last_page_start
=
649 TruncateToPageBoundary(alignment
, offset
+ bytes_requested
- 1);
650 actual_bytes_toread
= (last_page_start
- first_page_start
) + alignment
;
653 SSIZE_T
WinRandomAccessImpl::ReadIntoBuffer(uint64_t user_offset
,
654 uint64_t first_page_start
,
655 size_t bytes_to_read
, size_t& left
,
656 AlignedBuffer
& buffer
, char* dest
) const {
657 assert(buffer
.CurrentSize() == 0);
658 assert(buffer
.Capacity() >= bytes_to_read
);
661 PositionedReadInternal(buffer
.Destination(), bytes_to_read
,
667 // Let's figure out how much we read from the users standpoint
668 if ((first_page_start
+ buffer
.CurrentSize()) > user_offset
) {
669 assert(first_page_start
<= user_offset
);
670 size_t buffer_offset
= user_offset
- first_page_start
;
671 read
= buffer
.Read(dest
, buffer_offset
, left
);
680 SSIZE_T
WinRandomAccessImpl::ReadIntoOneShotBuffer(uint64_t user_offset
,
681 uint64_t first_page_start
,
682 size_t bytes_to_read
, size_t& left
,
684 AlignedBuffer bigBuffer
;
685 bigBuffer
.Alignment(buffer_
.Alignment());
686 bigBuffer
.AllocateNewBuffer(bytes_to_read
);
688 return ReadIntoBuffer(user_offset
, first_page_start
, bytes_to_read
, left
,
692 SSIZE_T
WinRandomAccessImpl::ReadIntoInstanceBuffer(uint64_t user_offset
,
693 uint64_t first_page_start
,
694 size_t bytes_to_read
, size_t& left
,
696 SSIZE_T read
= ReadIntoBuffer(user_offset
, first_page_start
, bytes_to_read
,
697 left
, buffer_
, dest
);
700 buffered_start_
= first_page_start
;
706 SSIZE_T
WinRandomAccessImpl::PositionedReadInternal(char* src
,
708 uint64_t offset
) const {
709 return pread(file_base_
->GetFileHandle(), src
, numBytes
, offset
);
713 WinRandomAccessImpl::WinRandomAccessImpl(WinFileData
* file_base
,
715 const EnvOptions
& options
) :
716 file_base_(file_base
),
718 compaction_readahead_size_(options
.compaction_readahead_size
),
719 random_access_max_buffer_size_(options
.random_access_max_buffer_size
),
723 assert(!options
.use_mmap_reads
);
725 // Do not allocate the buffer either until the first request or
726 // until there is a call to allocate a read-ahead buffer
727 buffer_
.Alignment(alignment
);
731 Status
WinRandomAccessImpl::ReadImpl(uint64_t offset
, size_t n
, Slice
* result
,
732 char* scratch
) const {
737 char* dest
= scratch
;
740 *result
= Slice(scratch
, 0);
744 // When in direct I/O mode we need to do the following changes:
745 // - use our own aligned buffer
746 // - always read at the offset of that is a multiple of alignment
747 if (file_base_
->use_direct_io()) {
748 uint64_t first_page_start
= 0;
749 size_t actual_bytes_toread
= 0;
750 size_t bytes_requested
= left
;
752 if (!read_ahead_
&& random_access_max_buffer_size_
== 0) {
753 CalculateReadParameters(buffer_
.Alignment(), offset
, bytes_requested
,
757 assert(actual_bytes_toread
> 0);
759 r
= ReadIntoOneShotBuffer(offset
, first_page_start
,
760 actual_bytes_toread
, left
, dest
);
763 std::unique_lock
<std::mutex
> lock(buffer_mut_
);
765 // Let's see if at least some of the requested data is already
767 if (offset
>= buffered_start_
&&
768 offset
< (buffered_start_
+ buffer_
.CurrentSize())) {
769 size_t buffer_offset
= offset
- buffered_start_
;
770 r
= buffer_
.Read(dest
, buffer_offset
, left
);
778 // Still some left or none was buffered
780 // Figure out the start/end offset for reading and amount to read
781 bytes_requested
= left
;
783 if (read_ahead_
&& bytes_requested
< compaction_readahead_size_
) {
784 bytes_requested
= compaction_readahead_size_
;
787 CalculateReadParameters(buffer_
.Alignment(), offset
, bytes_requested
,
791 assert(actual_bytes_toread
> 0);
793 if (buffer_
.Capacity() < actual_bytes_toread
) {
794 // If we are in read-ahead mode or the requested size
795 // exceeds max buffer size then use one-shot
796 // big buffer otherwise reallocate main buffer
798 (actual_bytes_toread
> random_access_max_buffer_size_
)) {
799 // Unlock the mutex since we are not using instance buffer
801 r
= ReadIntoOneShotBuffer(offset
, first_page_start
,
802 actual_bytes_toread
, left
, dest
);
804 buffer_
.AllocateNewBuffer(actual_bytes_toread
);
805 r
= ReadIntoInstanceBuffer(offset
, first_page_start
,
806 actual_bytes_toread
, left
, dest
);
810 r
= ReadIntoInstanceBuffer(offset
, first_page_start
,
811 actual_bytes_toread
, left
, dest
);
816 r
= PositionedReadInternal(scratch
, left
, offset
);
823 auto lastError
= GetLastError();
824 // Posix impl wants to treat reads from beyond
825 // of the file as OK.
826 if(lastError
!= ERROR_HANDLE_EOF
) {
827 s
= IOErrorFromWindowsError(file_base_
->GetName(), lastError
);
831 *result
= Slice(scratch
, (r
< 0) ? 0 : n
- left
);
837 void WinRandomAccessImpl::HintImpl(RandomAccessFile::AccessPattern pattern
) {
838 if (pattern
== RandomAccessFile::SEQUENTIAL
&& file_base_
->use_direct_io() &&
839 compaction_readahead_size_
> 0) {
840 std::lock_guard
<std::mutex
> lg(buffer_mut_
);
843 // This would allocate read-ahead size + 2 alignments
844 // - one for memory alignment which added implicitly by AlignedBuffer
845 // - We add one more alignment because we will read one alignment more
847 buffer_
.AllocateNewBuffer(compaction_readahead_size_
+
848 buffer_
.Alignment());
853 ///////////////////////////////////////////////////////////////////////////////////////////////////
854 /// WinRandomAccessFile
856 WinRandomAccessFile::WinRandomAccessFile(const std::string
& fname
, HANDLE hFile
,
858 const EnvOptions
& options
)
859 : WinFileData(fname
, hFile
, options
.use_direct_reads
),
860 WinRandomAccessImpl(this, alignment
, options
) {}
862 WinRandomAccessFile::~WinRandomAccessFile() {
865 Status
WinRandomAccessFile::Read(uint64_t offset
, size_t n
, Slice
* result
,
866 char* scratch
) const {
867 return ReadImpl(offset
, n
, result
, scratch
);
870 void WinRandomAccessFile::EnableReadAhead() {
871 HintImpl(SEQUENTIAL
);
874 bool WinRandomAccessFile::ShouldForwardRawRequest() const {
878 void WinRandomAccessFile::Hint(AccessPattern pattern
) {
882 Status
WinRandomAccessFile::InvalidateCache(size_t offset
, size_t length
) {
886 size_t WinRandomAccessFile::GetUniqueId(char* id
, size_t max_size
) const {
887 return GetUniqueIdFromFile(GetFileHandle(), id
, max_size
);
890 size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
891 return GetAlignment();
894 /////////////////////////////////////////////////////////////////////////////
899 Status
WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve
) {
900 return fallocate(file_data_
->GetName(), file_data_
->GetFileHandle(), spaceToReserve
);
903 WinWritableImpl::WinWritableImpl(WinFileData
* file_data
, size_t alignment
)
904 : file_data_(file_data
),
905 alignment_(alignment
),
910 Status
WinWritableImpl::AppendImpl(const Slice
& data
) {
914 assert(data
.size() < std::numeric_limits
<DWORD
>::max());
916 uint64_t written
= 0;
918 if (file_data_
->use_direct_io()) {
920 // With no offset specified we are appending
921 // to the end of the file
923 assert(IsSectorAligned(filesize_
));
924 assert(IsSectorAligned(data
.size()));
925 assert(IsAligned(GetAlignement(), data
.data()));
927 SSIZE_T ret
= pwrite(file_data_
->GetFileHandle(), data
.data(),
928 data
.size(), filesize_
);
931 auto lastError
= GetLastError();
932 s
= IOErrorFromWindowsError(
933 "Failed to pwrite for: " + file_data_
->GetName(), lastError
);
941 DWORD bytesWritten
= 0;
942 if (!WriteFile(file_data_
->GetFileHandle(), data
.data(),
943 static_cast<DWORD
>(data
.size()), &bytesWritten
, NULL
)) {
944 auto lastError
= GetLastError();
945 s
= IOErrorFromWindowsError(
946 "Failed to WriteFile: " + file_data_
->GetName(),
950 written
= bytesWritten
;
955 assert(written
== data
.size());
956 filesize_
+= data
.size();
962 Status
WinWritableImpl::PositionedAppendImpl(const Slice
& data
, uint64_t offset
) {
964 if(file_data_
->use_direct_io()) {
965 assert(IsSectorAligned(offset
));
966 assert(IsSectorAligned(data
.size()));
967 assert(IsAligned(GetAlignement(), data
.data()));
972 SSIZE_T ret
= pwrite(file_data_
->GetFileHandle(), data
.data(), data
.size(), offset
);
976 auto lastError
= GetLastError();
977 s
= IOErrorFromWindowsError(
978 "Failed to pwrite for: " + file_data_
->GetName(), lastError
);
981 assert(size_t(ret
) == data
.size());
982 // For sequential write this would be simple
983 // size extension by data.size()
984 uint64_t write_end
= offset
+ data
.size();
985 if (write_end
>= filesize_
) {
986 filesize_
= write_end
;
992 // Need to implement this so the file is truncated correctly
993 // when buffered and unbuffered mode
995 Status
WinWritableImpl::TruncateImpl(uint64_t size
) {
996 Status s
= ftruncate(file_data_
->GetName(), file_data_
->GetFileHandle(),
1004 Status
WinWritableImpl::CloseImpl() {
1008 auto hFile
= file_data_
->GetFileHandle();
1009 assert(INVALID_HANDLE_VALUE
!= hFile
);
1011 if (fsync(hFile
) < 0) {
1012 auto lastError
= GetLastError();
1013 s
= IOErrorFromWindowsError("fsync failed at Close() for: " +
1014 file_data_
->GetName(),
1018 if(!file_data_
->CloseFile()) {
1019 auto lastError
= GetLastError();
1020 s
= IOErrorFromWindowsError("CloseHandle failed for: " + file_data_
->GetName(),
1026 Status
WinWritableImpl::SyncImpl() {
1028 // Calls flush buffers
1029 if (fsync(file_data_
->GetFileHandle()) < 0) {
1030 auto lastError
= GetLastError();
1031 s
= IOErrorFromWindowsError(
1032 "fsync failed at Sync() for: " + file_data_
->GetName(), lastError
);
1038 Status
WinWritableImpl::AllocateImpl(uint64_t offset
, uint64_t len
) {
1040 TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds
);
1042 // Make sure that we reserve an aligned amount of space
1043 // since the reservation block size is driven outside so we want
1044 // to check if we are ok with reservation here
1045 size_t spaceToReserve
= Roundup(offset
+ len
, alignment_
);
1047 if (spaceToReserve
<= reservedsize_
) {
1051 IOSTATS_TIMER_GUARD(allocate_nanos
);
1052 status
= PreallocateInternal(spaceToReserve
);
1054 reservedsize_
= spaceToReserve
;
1060 ////////////////////////////////////////////////////////////////////////////////
1063 WinWritableFile::WinWritableFile(const std::string
& fname
, HANDLE hFile
,
1064 size_t alignment
, size_t /* capacity */,
1065 const EnvOptions
& options
)
1066 : WinFileData(fname
, hFile
, options
.use_direct_writes
),
1067 WinWritableImpl(this, alignment
) {
1068 assert(!options
.use_mmap_writes
);
1071 WinWritableFile::~WinWritableFile() {
1074 // Indicates if the class makes use of direct I/O
1075 bool WinWritableFile::use_direct_io() const { return WinFileData::use_direct_io(); }
1077 size_t WinWritableFile::GetRequiredBufferAlignment() const {
1078 return GetAlignement();
1081 Status
WinWritableFile::Append(const Slice
& data
) {
1082 return AppendImpl(data
);
1085 Status
WinWritableFile::PositionedAppend(const Slice
& data
, uint64_t offset
) {
1086 return PositionedAppendImpl(data
, offset
);
1089 // Need to implement this so the file is truncated correctly
1090 // when buffered and unbuffered mode
1091 Status
WinWritableFile::Truncate(uint64_t size
) {
1092 return TruncateImpl(size
);
1095 Status
WinWritableFile::Close() {
1099 // write out the cached data to the OS cache
1100 // This is now taken care of the WritableFileWriter
1101 Status
WinWritableFile::Flush() {
1102 return Status::OK();
1105 Status
WinWritableFile::Sync() {
1109 Status
WinWritableFile::Fsync() { return SyncImpl(); }
1111 uint64_t WinWritableFile::GetFileSize() {
1112 return GetFileSizeImpl();
1115 Status
WinWritableFile::Allocate(uint64_t offset
, uint64_t len
) {
1116 return AllocateImpl(offset
, len
);
1119 size_t WinWritableFile::GetUniqueId(char* id
, size_t max_size
) const {
1120 return GetUniqueIdFromFile(GetFileHandle(), id
, max_size
);
1123 /////////////////////////////////////////////////////////////////////////
1126 WinRandomRWFile::WinRandomRWFile(const std::string
& fname
, HANDLE hFile
,
1127 size_t alignment
, const EnvOptions
& options
)
1128 : WinFileData(fname
, hFile
,
1129 options
.use_direct_reads
&& options
.use_direct_writes
),
1130 WinRandomAccessImpl(this, alignment
, options
),
1131 WinWritableImpl(this, alignment
) {}
1133 bool WinRandomRWFile::use_direct_io() const { return WinFileData::use_direct_io(); }
1135 size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
1136 return GetAlignement();
1139 bool WinRandomRWFile::ShouldForwardRawRequest() const {
1143 void WinRandomRWFile::EnableReadAhead() {
1144 HintImpl(RandomAccessFile::SEQUENTIAL
);
1147 Status
WinRandomRWFile::Write(uint64_t offset
, const Slice
& data
) {
1148 return PositionedAppendImpl(data
, offset
);
1151 Status
WinRandomRWFile::Read(uint64_t offset
, size_t n
, Slice
* result
,
1152 char* scratch
) const {
1153 return ReadImpl(offset
, n
, result
, scratch
);
1156 Status
WinRandomRWFile::Flush() {
1157 return Status::OK();
1160 Status
WinRandomRWFile::Sync() {
1164 Status
WinRandomRWFile::Close() {
1168 //////////////////////////////////////////////////////////////////////////
1171 Status
WinDirectory::Fsync() { return Status::OK(); }
1173 //////////////////////////////////////////////////////////////////////////
1176 WinFileLock::~WinFileLock() {
1177 BOOL ret
= ::CloseHandle(hFile_
);