]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/env/io_posix.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / env / io_posix.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 #include <errno.h>
11 #include <unistd.h>
12 #include <atomic>
13 #include <string>
14 #include "rocksdb/env.h"
15
16 // For non linux platform, the following macros are used only as place
17 // holder.
18 #if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX)
19 #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */
20 #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */
21 #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
22 #define POSIX_FADV_WILLNEED 3 /* [MC1] will need these pages */
23 #define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */
24 #endif
25
26 namespace rocksdb {
27
28 static Status IOError(const std::string& context, int err_number) {
29 switch (err_number) {
30 case ENOSPC:
31 return Status::NoSpace(context, strerror(err_number));
32 case ESTALE:
33 return Status::IOError(Status::kStaleFile);
34 default:
35 return Status::IOError(context, strerror(err_number));
36 }
37 }
38
39 class PosixHelper {
40 public:
41 static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size);
42 };
43
44 class PosixSequentialFile : public SequentialFile {
45 private:
46 std::string filename_;
47 FILE* file_;
48 int fd_;
49 bool use_direct_io_;
50 size_t logical_sector_size_;
51
52 public:
53 PosixSequentialFile(const std::string& fname, FILE* file, int fd,
54 const EnvOptions& options);
55 virtual ~PosixSequentialFile();
56
57 virtual Status Read(size_t n, Slice* result, char* scratch) override;
58 virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
59 char* scratch) override;
60 virtual Status Skip(uint64_t n) override;
61 virtual Status InvalidateCache(size_t offset, size_t length) override;
62 virtual bool use_direct_io() const override { return use_direct_io_; }
63 virtual size_t GetRequiredBufferAlignment() const override {
64 return logical_sector_size_;
65 }
66 };
67
68 class PosixRandomAccessFile : public RandomAccessFile {
69 protected:
70 std::string filename_;
71 int fd_;
72 bool use_direct_io_;
73 size_t logical_sector_size_;
74
75 public:
76 PosixRandomAccessFile(const std::string& fname, int fd,
77 const EnvOptions& options);
78 virtual ~PosixRandomAccessFile();
79
80 virtual Status Read(uint64_t offset, size_t n, Slice* result,
81 char* scratch) const override;
82
83 virtual Status Prefetch(uint64_t offset, size_t n) override;
84
85 #if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX)
86 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
87 #endif
88 virtual void Hint(AccessPattern pattern) override;
89 virtual Status InvalidateCache(size_t offset, size_t length) override;
90 virtual bool use_direct_io() const override { return use_direct_io_; }
91 virtual size_t GetRequiredBufferAlignment() const override {
92 return logical_sector_size_;
93 }
94 };
95
96 class PosixWritableFile : public WritableFile {
97 protected:
98 const std::string filename_;
99 const bool use_direct_io_;
100 int fd_;
101 uint64_t filesize_;
102 size_t logical_sector_size_;
103 #ifdef ROCKSDB_FALLOCATE_PRESENT
104 bool allow_fallocate_;
105 bool fallocate_with_keep_size_;
106 #endif
107
108 public:
109 explicit PosixWritableFile(const std::string& fname, int fd,
110 const EnvOptions& options);
111 virtual ~PosixWritableFile();
112
113 // Need to implement this so the file is truncated correctly
114 // with direct I/O
115 virtual Status Truncate(uint64_t size) override;
116 virtual Status Close() override;
117 virtual Status Append(const Slice& data) override;
118 virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
119 virtual Status Flush() override;
120 virtual Status Sync() override;
121 virtual Status Fsync() override;
122 virtual bool IsSyncThreadSafe() const override;
123 virtual bool use_direct_io() const override { return use_direct_io_; }
124 virtual uint64_t GetFileSize() override;
125 virtual Status InvalidateCache(size_t offset, size_t length) override;
126 virtual size_t GetRequiredBufferAlignment() const override {
127 return logical_sector_size_;
128 }
129 #ifdef ROCKSDB_FALLOCATE_PRESENT
130 virtual Status Allocate(uint64_t offset, uint64_t len) override;
131 #endif
132 #ifdef ROCKSDB_RANGESYNC_PRESENT
133 virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
134 #endif
135 #ifdef OS_LINUX
136 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
137 #endif
138 };
139
140 // mmap() based random-access
141 class PosixMmapReadableFile : public RandomAccessFile {
142 private:
143 int fd_;
144 std::string filename_;
145 void* mmapped_region_;
146 size_t length_;
147
148 public:
149 PosixMmapReadableFile(const int fd, const std::string& fname, void* base,
150 size_t length, const EnvOptions& options);
151 virtual ~PosixMmapReadableFile();
152 virtual Status Read(uint64_t offset, size_t n, Slice* result,
153 char* scratch) const override;
154 virtual Status InvalidateCache(size_t offset, size_t length) override;
155 };
156
157 class PosixMmapFile : public WritableFile {
158 private:
159 std::string filename_;
160 int fd_;
161 size_t page_size_;
162 size_t map_size_; // How much extra memory to map at a time
163 char* base_; // The mapped region
164 char* limit_; // Limit of the mapped region
165 char* dst_; // Where to write next (in range [base_,limit_])
166 char* last_sync_; // Where have we synced up to
167 uint64_t file_offset_; // Offset of base_ in file
168 #ifdef ROCKSDB_FALLOCATE_PRESENT
169 bool allow_fallocate_; // If false, fallocate calls are bypassed
170 bool fallocate_with_keep_size_;
171 #endif
172
173 // Roundup x to a multiple of y
174 static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
175
176 size_t TruncateToPageBoundary(size_t s) {
177 s -= (s & (page_size_ - 1));
178 assert((s % page_size_) == 0);
179 return s;
180 }
181
182 Status MapNewRegion();
183 Status UnmapCurrentRegion();
184 Status Msync();
185
186 public:
187 PosixMmapFile(const std::string& fname, int fd, size_t page_size,
188 const EnvOptions& options);
189 ~PosixMmapFile();
190
191 // Means Close() will properly take care of truncate
192 // and it does not need any additional information
193 virtual Status Truncate(uint64_t size) override { return Status::OK(); }
194 virtual Status Close() override;
195 virtual Status Append(const Slice& data) override;
196 virtual Status Flush() override;
197 virtual Status Sync() override;
198 virtual Status Fsync() override;
199 virtual uint64_t GetFileSize() override;
200 virtual Status InvalidateCache(size_t offset, size_t length) override;
201 #ifdef ROCKSDB_FALLOCATE_PRESENT
202 virtual Status Allocate(uint64_t offset, uint64_t len) override;
203 #endif
204 };
205
206 class PosixRandomRWFile : public RandomRWFile {
207 public:
208 explicit PosixRandomRWFile(const std::string& fname, int fd,
209 const EnvOptions& options);
210 virtual ~PosixRandomRWFile();
211
212 virtual Status Write(uint64_t offset, const Slice& data) override;
213
214 virtual Status Read(uint64_t offset, size_t n, Slice* result,
215 char* scratch) const override;
216
217 virtual Status Flush() override;
218 virtual Status Sync() override;
219 virtual Status Fsync() override;
220 virtual Status Close() override;
221
222 private:
223 const std::string filename_;
224 int fd_;
225 };
226
227 class PosixDirectory : public Directory {
228 public:
229 explicit PosixDirectory(int fd) : fd_(fd) {}
230 ~PosixDirectory();
231 virtual Status Fsync() override;
232
233 private:
234 int fd_;
235 };
236
237 } // namespace rocksdb