]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/env/io_posix.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / env / io_posix.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 #include <errno.h>
11 #include <unistd.h>
12 #include <atomic>
13 #include <string>
14 #include "rocksdb/env.h"
15
16 // For non linux platform, the following macros are used only as place
17 // holder.
18 #if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX)
19 #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */
20 #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */
21 #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
22 #define POSIX_FADV_WILLNEED 3 /* [MC1] will need these pages */
23 #define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */
24 #endif
25
26 namespace rocksdb {
27 static std::string IOErrorMsg(const std::string& context,
28 const std::string& file_name) {
29 if (file_name.empty()) {
30 return context;
31 }
32 return context + ": " + file_name;
33 }
34
35 // file_name can be left empty if it is not unkown.
36 static Status IOError(const std::string& context, const std::string& file_name,
37 int err_number) {
38 switch (err_number) {
39 case ENOSPC:
40 return Status::NoSpace(IOErrorMsg(context, file_name),
41 strerror(err_number));
42 case ESTALE:
43 return Status::IOError(Status::kStaleFile);
44 case ENOENT:
45 return Status::PathNotFound(IOErrorMsg(context, file_name),
46 strerror(err_number));
47 default:
48 return Status::IOError(IOErrorMsg(context, file_name),
49 strerror(err_number));
50 }
51 }
52
53 class PosixHelper {
54 public:
55 static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size);
56 };
57
58 class PosixSequentialFile : public SequentialFile {
59 private:
60 std::string filename_;
61 FILE* file_;
62 int fd_;
63 bool use_direct_io_;
64 size_t logical_sector_size_;
65
66 public:
67 PosixSequentialFile(const std::string& fname, FILE* file, int fd,
68 const EnvOptions& options);
69 virtual ~PosixSequentialFile();
70
71 virtual Status Read(size_t n, Slice* result, char* scratch) override;
72 virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
73 char* scratch) override;
74 virtual Status Skip(uint64_t n) override;
75 virtual Status InvalidateCache(size_t offset, size_t length) override;
76 virtual bool use_direct_io() const override { return use_direct_io_; }
77 virtual size_t GetRequiredBufferAlignment() const override {
78 return logical_sector_size_;
79 }
80 };
81
82 class PosixRandomAccessFile : public RandomAccessFile {
83 protected:
84 std::string filename_;
85 int fd_;
86 bool use_direct_io_;
87 size_t logical_sector_size_;
88
89 public:
90 PosixRandomAccessFile(const std::string& fname, int fd,
91 const EnvOptions& options);
92 virtual ~PosixRandomAccessFile();
93
94 virtual Status Read(uint64_t offset, size_t n, Slice* result,
95 char* scratch) const override;
96
97 virtual Status Prefetch(uint64_t offset, size_t n) override;
98
99 #if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX)
100 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
101 #endif
102 virtual void Hint(AccessPattern pattern) override;
103 virtual Status InvalidateCache(size_t offset, size_t length) override;
104 virtual bool use_direct_io() const override { return use_direct_io_; }
105 virtual size_t GetRequiredBufferAlignment() const override {
106 return logical_sector_size_;
107 }
108 };
109
110 class PosixWritableFile : public WritableFile {
111 protected:
112 const std::string filename_;
113 const bool use_direct_io_;
114 int fd_;
115 uint64_t filesize_;
116 size_t logical_sector_size_;
117 #ifdef ROCKSDB_FALLOCATE_PRESENT
118 bool allow_fallocate_;
119 bool fallocate_with_keep_size_;
120 #endif
121
122 public:
123 explicit PosixWritableFile(const std::string& fname, int fd,
124 const EnvOptions& options);
125 virtual ~PosixWritableFile();
126
127 // Need to implement this so the file is truncated correctly
128 // with direct I/O
129 virtual Status Truncate(uint64_t size) override;
130 virtual Status Close() override;
131 virtual Status Append(const Slice& data) override;
132 virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
133 virtual Status Flush() override;
134 virtual Status Sync() override;
135 virtual Status Fsync() override;
136 virtual bool IsSyncThreadSafe() const override;
137 virtual bool use_direct_io() const override { return use_direct_io_; }
138 virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override;
139 virtual uint64_t GetFileSize() override;
140 virtual Status InvalidateCache(size_t offset, size_t length) override;
141 virtual size_t GetRequiredBufferAlignment() const override {
142 return logical_sector_size_;
143 }
144 #ifdef ROCKSDB_FALLOCATE_PRESENT
145 virtual Status Allocate(uint64_t offset, uint64_t len) override;
146 #endif
147 #ifdef ROCKSDB_RANGESYNC_PRESENT
148 virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
149 #endif
150 #ifdef OS_LINUX
151 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
152 #endif
153 };
154
155 // mmap() based random-access
156 class PosixMmapReadableFile : public RandomAccessFile {
157 private:
158 int fd_;
159 std::string filename_;
160 void* mmapped_region_;
161 size_t length_;
162
163 public:
164 PosixMmapReadableFile(const int fd, const std::string& fname, void* base,
165 size_t length, const EnvOptions& options);
166 virtual ~PosixMmapReadableFile();
167 virtual Status Read(uint64_t offset, size_t n, Slice* result,
168 char* scratch) const override;
169 virtual Status InvalidateCache(size_t offset, size_t length) override;
170 };
171
172 class PosixMmapFile : public WritableFile {
173 private:
174 std::string filename_;
175 int fd_;
176 size_t page_size_;
177 size_t map_size_; // How much extra memory to map at a time
178 char* base_; // The mapped region
179 char* limit_; // Limit of the mapped region
180 char* dst_; // Where to write next (in range [base_,limit_])
181 char* last_sync_; // Where have we synced up to
182 uint64_t file_offset_; // Offset of base_ in file
183 #ifdef ROCKSDB_FALLOCATE_PRESENT
184 bool allow_fallocate_; // If false, fallocate calls are bypassed
185 bool fallocate_with_keep_size_;
186 #endif
187
188 // Roundup x to a multiple of y
189 static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
190
191 size_t TruncateToPageBoundary(size_t s) {
192 s -= (s & (page_size_ - 1));
193 assert((s % page_size_) == 0);
194 return s;
195 }
196
197 Status MapNewRegion();
198 Status UnmapCurrentRegion();
199 Status Msync();
200
201 public:
202 PosixMmapFile(const std::string& fname, int fd, size_t page_size,
203 const EnvOptions& options);
204 ~PosixMmapFile();
205
206 // Means Close() will properly take care of truncate
207 // and it does not need any additional information
208 virtual Status Truncate(uint64_t /*size*/) override { return Status::OK(); }
209 virtual Status Close() override;
210 virtual Status Append(const Slice& data) override;
211 virtual Status Flush() override;
212 virtual Status Sync() override;
213 virtual Status Fsync() override;
214 virtual uint64_t GetFileSize() override;
215 virtual Status InvalidateCache(size_t offset, size_t length) override;
216 #ifdef ROCKSDB_FALLOCATE_PRESENT
217 virtual Status Allocate(uint64_t offset, uint64_t len) override;
218 #endif
219 };
220
221 class PosixRandomRWFile : public RandomRWFile {
222 public:
223 explicit PosixRandomRWFile(const std::string& fname, int fd,
224 const EnvOptions& options);
225 virtual ~PosixRandomRWFile();
226
227 virtual Status Write(uint64_t offset, const Slice& data) override;
228
229 virtual Status Read(uint64_t offset, size_t n, Slice* result,
230 char* scratch) const override;
231
232 virtual Status Flush() override;
233 virtual Status Sync() override;
234 virtual Status Fsync() override;
235 virtual Status Close() override;
236
237 private:
238 const std::string filename_;
239 int fd_;
240 };
241
242 struct PosixMemoryMappedFileBuffer : public MemoryMappedFileBuffer {
243 PosixMemoryMappedFileBuffer(void* _base, size_t _length)
244 : MemoryMappedFileBuffer(_base, _length) {}
245 virtual ~PosixMemoryMappedFileBuffer();
246 };
247
248 class PosixDirectory : public Directory {
249 public:
250 explicit PosixDirectory(int fd) : fd_(fd) {}
251 ~PosixDirectory();
252 virtual Status Fsync() override;
253
254 private:
255 int fd_;
256 };
257
258 } // namespace rocksdb