]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | #pragma once | |
10 | ||
11 | #include <stdint.h> | |
12 | #include <mutex> | |
13 | #include <string> | |
14 | ||
11fdf7f2 | 15 | #include "rocksdb/status.h" |
7c673cae FG |
16 | #include "rocksdb/env.h" |
17 | #include "util/aligned_buffer.h" | |
18 | ||
11fdf7f2 | 19 | #include <windows.h> |
7c673cae FG |
20 | |
21 | ||
22 | namespace rocksdb { | |
23 | namespace port { | |
24 | ||
25 | std::string GetWindowsErrSz(DWORD err); | |
26 | ||
27 | inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) { | |
28 | return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL)) | |
29 | ? Status::NoSpace(context, GetWindowsErrSz(err)) | |
494da23a TL |
30 | : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND)) |
31 | ? Status::PathNotFound(context, GetWindowsErrSz(err)) | |
32 | : Status::IOError(context, GetWindowsErrSz(err)); | |
7c673cae FG |
33 | } |
34 | ||
35 | inline Status IOErrorFromLastWindowsError(const std::string& context) { | |
36 | return IOErrorFromWindowsError(context, GetLastError()); | |
37 | } | |
38 | ||
39 | inline Status IOError(const std::string& context, int err_number) { | |
40 | return (err_number == ENOSPC) | |
41 | ? Status::NoSpace(context, strerror(err_number)) | |
494da23a TL |
42 | : (err_number == ENOENT) |
43 | ? Status::PathNotFound(context, strerror(err_number)) | |
44 | : Status::IOError(context, strerror(err_number)); | |
7c673cae FG |
45 | } |
46 | ||
11fdf7f2 | 47 | class WinFileData; |
7c673cae | 48 | |
11fdf7f2 TL |
49 | Status pwrite(const WinFileData* file_data, const Slice& data, |
50 | uint64_t offset, size_t& bytes_written); | |
7c673cae | 51 | |
11fdf7f2 TL |
52 | Status pread(const WinFileData* file_data, char* src, size_t num_bytes, |
53 | uint64_t offset, size_t& bytes_read); | |
7c673cae FG |
54 | |
55 | Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); | |
56 | ||
57 | Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize); | |
58 | ||
59 | size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size); | |
60 | ||
61 | class WinFileData { | |
62 | protected: | |
63 | const std::string filename_; | |
64 | HANDLE hFile_; | |
494da23a | 65 | // If true, the I/O issued would be direct I/O which the buffer |
7c673cae FG |
66 | // will need to be aligned (not sure there is a guarantee that the buffer |
67 | // passed in is aligned). | |
68 | const bool use_direct_io_; | |
69 | ||
70 | public: | |
71 | // We want this class be usable both for inheritance (prive | |
72 | // or protected) and for containment so __ctor and __dtor public | |
11fdf7f2 TL |
73 | WinFileData(const std::string& filename, HANDLE hFile, bool direct_io) |
74 | : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {} | |
7c673cae FG |
75 | |
76 | virtual ~WinFileData() { this->CloseFile(); } | |
77 | ||
78 | bool CloseFile() { | |
79 | bool result = true; | |
80 | ||
81 | if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) { | |
82 | result = ::CloseHandle(hFile_); | |
83 | assert(result); | |
84 | hFile_ = NULL; | |
85 | } | |
86 | return result; | |
87 | } | |
88 | ||
89 | const std::string& GetName() const { return filename_; } | |
90 | ||
91 | HANDLE GetFileHandle() const { return hFile_; } | |
92 | ||
93 | bool use_direct_io() const { return use_direct_io_; } | |
94 | ||
95 | WinFileData(const WinFileData&) = delete; | |
96 | WinFileData& operator=(const WinFileData&) = delete; | |
97 | }; | |
98 | ||
99 | class WinSequentialFile : protected WinFileData, public SequentialFile { | |
100 | ||
101 | // Override for behavior change when creating a custom env | |
11fdf7f2 TL |
102 | virtual Status PositionedReadInternal(char* src, size_t numBytes, |
103 | uint64_t offset, size_t& bytes_read) const; | |
7c673cae FG |
104 | |
105 | public: | |
106 | WinSequentialFile(const std::string& fname, HANDLE f, | |
107 | const EnvOptions& options); | |
108 | ||
109 | ~WinSequentialFile(); | |
110 | ||
111 | WinSequentialFile(const WinSequentialFile&) = delete; | |
112 | WinSequentialFile& operator=(const WinSequentialFile&) = delete; | |
113 | ||
114 | virtual Status Read(size_t n, Slice* result, char* scratch) override; | |
115 | virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, | |
116 | char* scratch) override; | |
117 | ||
118 | virtual Status Skip(uint64_t n) override; | |
119 | ||
120 | virtual Status InvalidateCache(size_t offset, size_t length) override; | |
121 | ||
122 | virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } | |
123 | }; | |
124 | ||
125 | // mmap() based random-access | |
126 | class WinMmapReadableFile : private WinFileData, public RandomAccessFile { | |
127 | HANDLE hMap_; | |
128 | ||
129 | const void* mapped_region_; | |
130 | const size_t length_; | |
131 | ||
132 | public: | |
133 | // mapped_region_[0,length-1] contains the mmapped contents of the file. | |
134 | WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap, | |
135 | const void* mapped_region, size_t length); | |
136 | ||
137 | ~WinMmapReadableFile(); | |
138 | ||
139 | WinMmapReadableFile(const WinMmapReadableFile&) = delete; | |
140 | WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete; | |
141 | ||
142 | virtual Status Read(uint64_t offset, size_t n, Slice* result, | |
143 | char* scratch) const override; | |
144 | ||
145 | virtual Status InvalidateCache(size_t offset, size_t length) override; | |
146 | ||
147 | virtual size_t GetUniqueId(char* id, size_t max_size) const override; | |
148 | }; | |
149 | ||
150 | // We preallocate and use memcpy to append new | |
151 | // data to the file. This is safe since we either properly close the | |
152 | // file before reading from it, or for log files, the reading code | |
153 | // knows enough to skip zero suffixes. | |
154 | class WinMmapFile : private WinFileData, public WritableFile { | |
155 | private: | |
156 | HANDLE hMap_; | |
157 | ||
158 | const size_t page_size_; // We flush the mapping view in page_size | |
159 | // increments. We may decide if this is a memory | |
160 | // page size or SSD page size | |
161 | const size_t | |
162 | allocation_granularity_; // View must start at such a granularity | |
163 | ||
164 | size_t reserved_size_; // Preallocated size | |
165 | ||
166 | size_t mapping_size_; // The max size of the mapping object | |
167 | // we want to guess the final file size to minimize the remapping | |
168 | size_t view_size_; // How much memory to map into a view at a time | |
169 | ||
170 | char* mapped_begin_; // Must begin at the file offset that is aligned with | |
171 | // allocation_granularity_ | |
172 | char* mapped_end_; | |
173 | char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_]) | |
174 | char* last_sync_; // Where have we synced up to | |
175 | ||
176 | uint64_t file_offset_; // Offset of mapped_begin_ in file | |
177 | ||
178 | // Do we have unsynced writes? | |
179 | bool pending_sync_; | |
180 | ||
181 | // Can only truncate or reserve to a sector size aligned if | |
182 | // used on files that are opened with Unbuffered I/O | |
183 | Status TruncateFile(uint64_t toSize); | |
184 | ||
185 | Status UnmapCurrentRegion(); | |
186 | ||
187 | Status MapNewRegion(); | |
188 | ||
189 | virtual Status PreallocateInternal(uint64_t spaceToReserve); | |
190 | ||
191 | public: | |
192 | WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size, | |
193 | size_t allocation_granularity, const EnvOptions& options); | |
194 | ||
195 | ~WinMmapFile(); | |
196 | ||
197 | WinMmapFile(const WinMmapFile&) = delete; | |
198 | WinMmapFile& operator=(const WinMmapFile&) = delete; | |
199 | ||
200 | virtual Status Append(const Slice& data) override; | |
201 | ||
202 | // Means Close() will properly take care of truncate | |
203 | // and it does not need any additional information | |
204 | virtual Status Truncate(uint64_t size) override; | |
205 | ||
206 | virtual Status Close() override; | |
207 | ||
208 | virtual Status Flush() override; | |
209 | ||
210 | // Flush only data | |
211 | virtual Status Sync() override; | |
212 | ||
213 | /** | |
214 | * Flush data as well as metadata to stable storage. | |
215 | */ | |
216 | virtual Status Fsync() override; | |
217 | ||
218 | /** | |
219 | * Get the size of valid data in the file. This will not match the | |
220 | * size that is returned from the filesystem because we use mmap | |
221 | * to extend file by map_size every time. | |
222 | */ | |
223 | virtual uint64_t GetFileSize() override; | |
224 | ||
225 | virtual Status InvalidateCache(size_t offset, size_t length) override; | |
226 | ||
227 | virtual Status Allocate(uint64_t offset, uint64_t len) override; | |
228 | ||
229 | virtual size_t GetUniqueId(char* id, size_t max_size) const override; | |
230 | }; | |
231 | ||
232 | class WinRandomAccessImpl { | |
233 | protected: | |
234 | WinFileData* file_base_; | |
11fdf7f2 | 235 | size_t alignment_; |
7c673cae FG |
236 | |
237 | // Override for behavior change when creating a custom env | |
11fdf7f2 TL |
238 | virtual Status PositionedReadInternal(char* src, size_t numBytes, |
239 | uint64_t offset, size_t& bytes_read) const; | |
7c673cae FG |
240 | |
241 | WinRandomAccessImpl(WinFileData* file_base, size_t alignment, | |
242 | const EnvOptions& options); | |
243 | ||
244 | virtual ~WinRandomAccessImpl() {} | |
245 | ||
246 | Status ReadImpl(uint64_t offset, size_t n, Slice* result, | |
247 | char* scratch) const; | |
248 | ||
11fdf7f2 | 249 | size_t GetAlignment() const { return alignment_; } |
7c673cae FG |
250 | |
251 | public: | |
252 | ||
253 | WinRandomAccessImpl(const WinRandomAccessImpl&) = delete; | |
254 | WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete; | |
255 | }; | |
256 | ||
257 | // pread() based random-access | |
258 | class WinRandomAccessFile | |
259 | : private WinFileData, | |
260 | protected WinRandomAccessImpl, // Want to be able to override | |
261 | // PositionedReadInternal | |
262 | public RandomAccessFile { | |
263 | public: | |
264 | WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, | |
265 | const EnvOptions& options); | |
266 | ||
267 | ~WinRandomAccessFile(); | |
268 | ||
269 | virtual Status Read(uint64_t offset, size_t n, Slice* result, | |
270 | char* scratch) const override; | |
271 | ||
7c673cae FG |
272 | virtual size_t GetUniqueId(char* id, size_t max_size) const override; |
273 | ||
7c673cae FG |
274 | virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } |
275 | ||
276 | virtual Status InvalidateCache(size_t offset, size_t length) override; | |
277 | ||
278 | virtual size_t GetRequiredBufferAlignment() const override; | |
279 | }; | |
280 | ||
281 | // This is a sequential write class. It has been mimicked (as others) after | |
282 | // the original Posix class. We add support for unbuffered I/O on windows as | |
283 | // well | |
284 | // we utilize the original buffer as an alignment buffer to write directly to | |
285 | // file with no buffering. | |
286 | // No buffering requires that the provided buffer is aligned to the physical | |
287 | // sector size (SSD page size) and | |
288 | // that all SetFilePointer() operations to occur with such an alignment. | |
289 | // We thus always write in sector/page size increments to the drive and leave | |
290 | // the tail for the next write OR for Close() at which point we pad with zeros. | |
291 | // No padding is required for | |
292 | // buffered access. | |
293 | class WinWritableImpl { | |
294 | protected: | |
295 | WinFileData* file_data_; | |
296 | const uint64_t alignment_; | |
11fdf7f2 | 297 | uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND |
7c673cae FG |
298 | uint64_t reservedsize_; // how far we have reserved space |
299 | ||
300 | virtual Status PreallocateInternal(uint64_t spaceToReserve); | |
301 | ||
302 | WinWritableImpl(WinFileData* file_data, size_t alignment); | |
303 | ||
304 | ~WinWritableImpl() {} | |
305 | ||
306 | uint64_t GetAlignement() const { return alignment_; } | |
307 | ||
308 | Status AppendImpl(const Slice& data); | |
309 | ||
310 | // Requires that the data is aligned as specified by | |
311 | // GetRequiredBufferAlignment() | |
312 | Status PositionedAppendImpl(const Slice& data, uint64_t offset); | |
313 | ||
314 | Status TruncateImpl(uint64_t size); | |
315 | ||
316 | Status CloseImpl(); | |
317 | ||
318 | Status SyncImpl(); | |
319 | ||
11fdf7f2 | 320 | uint64_t GetFileNextWriteOffset() { |
7c673cae FG |
321 | // Double accounting now here with WritableFileWriter |
322 | // and this size will be wrong when unbuffered access is used | |
323 | // but tests implement their own writable files and do not use | |
324 | // WritableFileWrapper | |
325 | // so we need to squeeze a square peg through | |
326 | // a round hole here. | |
11fdf7f2 | 327 | return next_write_offset_; |
7c673cae FG |
328 | } |
329 | ||
330 | Status AllocateImpl(uint64_t offset, uint64_t len); | |
331 | ||
332 | public: | |
333 | WinWritableImpl(const WinWritableImpl&) = delete; | |
334 | WinWritableImpl& operator=(const WinWritableImpl&) = delete; | |
335 | }; | |
336 | ||
337 | class WinWritableFile : private WinFileData, | |
338 | protected WinWritableImpl, | |
339 | public WritableFile { | |
340 | public: | |
341 | WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment, | |
342 | size_t capacity, const EnvOptions& options); | |
343 | ||
344 | ~WinWritableFile(); | |
345 | ||
346 | virtual Status Append(const Slice& data) override; | |
347 | ||
348 | // Requires that the data is aligned as specified by | |
349 | // GetRequiredBufferAlignment() | |
350 | virtual Status PositionedAppend(const Slice& data, uint64_t offset) override; | |
351 | ||
352 | // Need to implement this so the file is truncated correctly | |
353 | // when buffered and unbuffered mode | |
354 | virtual Status Truncate(uint64_t size) override; | |
355 | ||
356 | virtual Status Close() override; | |
357 | ||
358 | // write out the cached data to the OS cache | |
359 | // This is now taken care of the WritableFileWriter | |
360 | virtual Status Flush() override; | |
361 | ||
362 | virtual Status Sync() override; | |
363 | ||
364 | virtual Status Fsync() override; | |
365 | ||
11fdf7f2 TL |
366 | virtual bool IsSyncThreadSafe() const override; |
367 | ||
7c673cae FG |
368 | // Indicates if the class makes use of direct I/O |
369 | // Use PositionedAppend | |
370 | virtual bool use_direct_io() const override; | |
371 | ||
372 | virtual size_t GetRequiredBufferAlignment() const override; | |
373 | ||
374 | virtual uint64_t GetFileSize() override; | |
375 | ||
376 | virtual Status Allocate(uint64_t offset, uint64_t len) override; | |
377 | ||
378 | virtual size_t GetUniqueId(char* id, size_t max_size) const override; | |
379 | }; | |
380 | ||
381 | class WinRandomRWFile : private WinFileData, | |
382 | protected WinRandomAccessImpl, | |
383 | protected WinWritableImpl, | |
384 | public RandomRWFile { | |
385 | public: | |
386 | WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment, | |
387 | const EnvOptions& options); | |
388 | ||
389 | ~WinRandomRWFile() {} | |
390 | ||
391 | // Indicates if the class makes use of direct I/O | |
392 | // If false you must pass aligned buffer to Write() | |
393 | virtual bool use_direct_io() const override; | |
394 | ||
395 | // Use the returned alignment value to allocate aligned | |
396 | // buffer for Write() when use_direct_io() returns true | |
397 | virtual size_t GetRequiredBufferAlignment() const override; | |
398 | ||
7c673cae FG |
399 | // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. |
400 | // Pass aligned buffer when use_direct_io() returns true. | |
401 | virtual Status Write(uint64_t offset, const Slice& data) override; | |
402 | ||
403 | // Read up to `n` bytes starting from offset `offset` and store them in | |
404 | // result, provided `scratch` size should be at least `n`. | |
405 | // Returns Status::OK() on success. | |
406 | virtual Status Read(uint64_t offset, size_t n, Slice* result, | |
407 | char* scratch) const override; | |
408 | ||
409 | virtual Status Flush() override; | |
410 | ||
411 | virtual Status Sync() override; | |
412 | ||
413 | virtual Status Fsync() { return Sync(); } | |
414 | ||
415 | virtual Status Close() override; | |
416 | }; | |
417 | ||
11fdf7f2 TL |
418 | class WinMemoryMappedBuffer : public MemoryMappedFileBuffer { |
419 | private: | |
420 | HANDLE file_handle_; | |
421 | HANDLE map_handle_; | |
422 | public: | |
423 | WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) : | |
424 | MemoryMappedFileBuffer(base, size), | |
425 | file_handle_(file_handle), | |
426 | map_handle_(map_handle) {} | |
427 | ~WinMemoryMappedBuffer() override; | |
428 | }; | |
429 | ||
7c673cae | 430 | class WinDirectory : public Directory { |
11fdf7f2 | 431 | HANDLE handle_; |
7c673cae | 432 | public: |
494da23a | 433 | explicit WinDirectory(HANDLE h) noexcept : handle_(h) { |
11fdf7f2 TL |
434 | assert(handle_ != INVALID_HANDLE_VALUE); |
435 | } | |
436 | ~WinDirectory() { | |
437 | ::CloseHandle(handle_); | |
438 | } | |
7c673cae | 439 | virtual Status Fsync() override; |
11fdf7f2 TL |
440 | |
441 | size_t GetUniqueId(char* id, size_t max_size) const override; | |
7c673cae FG |
442 | }; |
443 | ||
444 | class WinFileLock : public FileLock { | |
445 | public: | |
446 | explicit WinFileLock(HANDLE hFile) : hFile_(hFile) { | |
447 | assert(hFile != NULL); | |
448 | assert(hFile != INVALID_HANDLE_VALUE); | |
449 | } | |
450 | ||
451 | ~WinFileLock(); | |
452 | ||
453 | private: | |
454 | HANDLE hFile_; | |
455 | }; | |
456 | } | |
457 | } |