]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
11 | #include <algorithm> | |
12 | #include <set> | |
13 | #include <utility> | |
14 | #include <vector> | |
15 | #include <string> | |
16 | #include "rocksdb/cache.h" | |
17 | #include "db/dbformat.h" | |
18 | #include "util/arena.h" | |
19 | #include "util/autovector.h" | |
20 | ||
21 | namespace rocksdb { | |
22 | ||
23 | class VersionSet; | |
24 | ||
25 | const uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; | |
26 | ||
27 | extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id); | |
28 | ||
29 | // A copyable structure contains information needed to read data from an SST | |
11fdf7f2 | 30 | // file. It can contain a pointer to a table reader opened for the file, or |
7c673cae FG |
31 | // file number and size, which can be used to create a new table reader for it. |
32 | // The behavior is undefined when a copied of the structure is used when the | |
33 | // file is not in any live version any more. | |
34 | struct FileDescriptor { | |
35 | // Table reader in table_reader_handle | |
36 | TableReader* table_reader; | |
37 | uint64_t packed_number_and_path_id; | |
38 | uint64_t file_size; // File size in bytes | |
11fdf7f2 TL |
39 | SequenceNumber smallest_seqno; // The smallest seqno in this file |
40 | SequenceNumber largest_seqno; // The largest seqno in this file | |
7c673cae FG |
41 | |
42 | FileDescriptor() : FileDescriptor(0, 0, 0) {} | |
43 | ||
44 | FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size) | |
11fdf7f2 TL |
45 | : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {} |
46 | ||
47 | FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size, | |
48 | SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno) | |
7c673cae FG |
49 | : table_reader(nullptr), |
50 | packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)), | |
11fdf7f2 TL |
51 | file_size(_file_size), |
52 | smallest_seqno(_smallest_seqno), | |
53 | largest_seqno(_largest_seqno) {} | |
7c673cae FG |
54 | |
55 | FileDescriptor& operator=(const FileDescriptor& fd) { | |
56 | table_reader = fd.table_reader; | |
57 | packed_number_and_path_id = fd.packed_number_and_path_id; | |
58 | file_size = fd.file_size; | |
11fdf7f2 TL |
59 | smallest_seqno = fd.smallest_seqno; |
60 | largest_seqno = fd.largest_seqno; | |
7c673cae FG |
61 | return *this; |
62 | } | |
63 | ||
64 | uint64_t GetNumber() const { | |
65 | return packed_number_and_path_id & kFileNumberMask; | |
66 | } | |
67 | uint32_t GetPathId() const { | |
68 | return static_cast<uint32_t>( | |
69 | packed_number_and_path_id / (kFileNumberMask + 1)); | |
70 | } | |
71 | uint64_t GetFileSize() const { return file_size; } | |
72 | }; | |
73 | ||
11fdf7f2 TL |
74 | struct FileSampledStats { |
75 | FileSampledStats() : num_reads_sampled(0) {} | |
76 | FileSampledStats(const FileSampledStats& other) { *this = other; } | |
77 | FileSampledStats& operator=(const FileSampledStats& other) { | |
78 | num_reads_sampled = other.num_reads_sampled.load(); | |
79 | return *this; | |
80 | } | |
81 | ||
82 | // number of user reads to this file. | |
83 | mutable std::atomic<uint64_t> num_reads_sampled; | |
84 | }; | |
85 | ||
7c673cae | 86 | struct FileMetaData { |
7c673cae FG |
87 | FileDescriptor fd; |
88 | InternalKey smallest; // Smallest internal key served by table | |
89 | InternalKey largest; // Largest internal key served by table | |
7c673cae FG |
90 | |
91 | // Needs to be disposed when refs becomes 0. | |
92 | Cache::Handle* table_reader_handle; | |
93 | ||
11fdf7f2 TL |
94 | FileSampledStats stats; |
95 | ||
7c673cae FG |
96 | // Stats for compensating deletion entries during compaction |
97 | ||
98 | // File size compensated by deletion entry. | |
99 | // This is updated in Version::UpdateAccumulatedStats() first time when the | |
100 | // file is created or loaded. After it is updated (!= 0), it is immutable. | |
101 | uint64_t compensated_file_size; | |
102 | // These values can mutate, but they can only be read or written from | |
103 | // single-threaded LogAndApply thread | |
104 | uint64_t num_entries; // the number of entries. | |
105 | uint64_t num_deletions; // the number of deletion entries. | |
106 | uint64_t raw_key_size; // total uncompressed key size. | |
107 | uint64_t raw_value_size; // total uncompressed value size. | |
11fdf7f2 TL |
108 | |
109 | int refs; // Reference count | |
110 | ||
111 | bool being_compacted; // Is this file undergoing compaction? | |
7c673cae FG |
112 | bool init_stats_from_file; // true if the data-entry stats of this file |
113 | // has initialized from file. | |
114 | ||
115 | bool marked_for_compaction; // True if client asked us nicely to compact this | |
116 | // file. | |
117 | ||
118 | FileMetaData() | |
11fdf7f2 | 119 | : table_reader_handle(nullptr), |
7c673cae FG |
120 | compensated_file_size(0), |
121 | num_entries(0), | |
122 | num_deletions(0), | |
123 | raw_key_size(0), | |
124 | raw_value_size(0), | |
11fdf7f2 TL |
125 | refs(0), |
126 | being_compacted(false), | |
7c673cae FG |
127 | init_stats_from_file(false), |
128 | marked_for_compaction(false) {} | |
129 | ||
130 | // REQUIRED: Keys must be given to the function in sorted order (it expects | |
131 | // the last key to be the largest). | |
132 | void UpdateBoundaries(const Slice& key, SequenceNumber seqno) { | |
133 | if (smallest.size() == 0) { | |
134 | smallest.DecodeFrom(key); | |
135 | } | |
136 | largest.DecodeFrom(key); | |
11fdf7f2 TL |
137 | fd.smallest_seqno = std::min(fd.smallest_seqno, seqno); |
138 | fd.largest_seqno = std::max(fd.largest_seqno, seqno); | |
139 | } | |
140 | ||
141 | // Unlike UpdateBoundaries, ranges do not need to be presented in any | |
142 | // particular order. | |
143 | void UpdateBoundariesForRange(const InternalKey& start, | |
144 | const InternalKey& end, SequenceNumber seqno, | |
145 | const InternalKeyComparator& icmp) { | |
146 | if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) { | |
147 | smallest = start; | |
148 | } | |
149 | if (largest.size() == 0 || icmp.Compare(largest, end) < 0) { | |
150 | largest = end; | |
151 | } | |
152 | fd.smallest_seqno = std::min(fd.smallest_seqno, seqno); | |
153 | fd.largest_seqno = std::max(fd.largest_seqno, seqno); | |
7c673cae FG |
154 | } |
155 | }; | |
156 | ||
11fdf7f2 TL |
157 | // A compressed copy of file meta data that just contain minimum data needed |
158 | // to server read operations, while still keeping the pointer to full metadata | |
159 | // of the file in case it is needed. | |
7c673cae FG |
160 | struct FdWithKeyRange { |
161 | FileDescriptor fd; | |
11fdf7f2 | 162 | FileMetaData* file_metadata; // Point to all metadata |
7c673cae FG |
163 | Slice smallest_key; // slice that contain smallest key |
164 | Slice largest_key; // slice that contain largest key | |
165 | ||
166 | FdWithKeyRange() | |
167 | : fd(), | |
11fdf7f2 | 168 | file_metadata(nullptr), |
7c673cae FG |
169 | smallest_key(), |
170 | largest_key() { | |
171 | } | |
172 | ||
11fdf7f2 TL |
173 | FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key, |
174 | FileMetaData* _file_metadata) | |
175 | : fd(_fd), | |
176 | file_metadata(_file_metadata), | |
177 | smallest_key(_smallest_key), | |
178 | largest_key(_largest_key) {} | |
7c673cae FG |
179 | }; |
180 | ||
181 | // Data structure to store an array of FdWithKeyRange in one level | |
182 | // Actual data is guaranteed to be stored closely | |
183 | struct LevelFilesBrief { | |
184 | size_t num_files; | |
185 | FdWithKeyRange* files; | |
186 | LevelFilesBrief() { | |
187 | num_files = 0; | |
188 | files = nullptr; | |
189 | } | |
190 | }; | |
191 | ||
192 | class VersionEdit { | |
193 | public: | |
194 | VersionEdit() { Clear(); } | |
195 | ~VersionEdit() { } | |
196 | ||
197 | void Clear(); | |
198 | ||
199 | void SetComparatorName(const Slice& name) { | |
200 | has_comparator_ = true; | |
201 | comparator_ = name.ToString(); | |
202 | } | |
203 | void SetLogNumber(uint64_t num) { | |
204 | has_log_number_ = true; | |
205 | log_number_ = num; | |
206 | } | |
207 | void SetPrevLogNumber(uint64_t num) { | |
208 | has_prev_log_number_ = true; | |
209 | prev_log_number_ = num; | |
210 | } | |
211 | void SetNextFile(uint64_t num) { | |
212 | has_next_file_number_ = true; | |
213 | next_file_number_ = num; | |
214 | } | |
215 | void SetLastSequence(SequenceNumber seq) { | |
216 | has_last_sequence_ = true; | |
217 | last_sequence_ = seq; | |
218 | } | |
219 | void SetMaxColumnFamily(uint32_t max_column_family) { | |
220 | has_max_column_family_ = true; | |
221 | max_column_family_ = max_column_family; | |
222 | } | |
11fdf7f2 TL |
223 | void SetMinLogNumberToKeep(uint64_t num) { |
224 | has_min_log_number_to_keep_ = true; | |
225 | min_log_number_to_keep_ = num; | |
226 | } | |
227 | ||
228 | bool has_log_number() { return has_log_number_; } | |
229 | ||
230 | uint64_t log_number() { return log_number_; } | |
7c673cae | 231 | |
494da23a TL |
232 | bool has_next_file_number() const { return has_next_file_number_; } |
233 | ||
234 | uint64_t next_file_number() const { return next_file_number_; } | |
235 | ||
7c673cae FG |
236 | // Add the specified file at the specified number. |
237 | // REQUIRES: This version has not been saved (see VersionSet::SaveTo) | |
238 | // REQUIRES: "smallest" and "largest" are smallest and largest keys in file | |
239 | void AddFile(int level, uint64_t file, uint32_t file_path_id, | |
240 | uint64_t file_size, const InternalKey& smallest, | |
241 | const InternalKey& largest, const SequenceNumber& smallest_seqno, | |
242 | const SequenceNumber& largest_seqno, | |
243 | bool marked_for_compaction) { | |
244 | assert(smallest_seqno <= largest_seqno); | |
245 | FileMetaData f; | |
11fdf7f2 TL |
246 | f.fd = FileDescriptor(file, file_path_id, file_size, smallest_seqno, |
247 | largest_seqno); | |
7c673cae FG |
248 | f.smallest = smallest; |
249 | f.largest = largest; | |
11fdf7f2 TL |
250 | f.fd.smallest_seqno = smallest_seqno; |
251 | f.fd.largest_seqno = largest_seqno; | |
7c673cae FG |
252 | f.marked_for_compaction = marked_for_compaction; |
253 | new_files_.emplace_back(level, std::move(f)); | |
254 | } | |
255 | ||
256 | void AddFile(int level, const FileMetaData& f) { | |
11fdf7f2 | 257 | assert(f.fd.smallest_seqno <= f.fd.largest_seqno); |
7c673cae FG |
258 | new_files_.emplace_back(level, f); |
259 | } | |
260 | ||
261 | // Delete the specified "file" from the specified "level". | |
262 | void DeleteFile(int level, uint64_t file) { | |
263 | deleted_files_.insert({level, file}); | |
264 | } | |
265 | ||
266 | // Number of edits | |
267 | size_t NumEntries() { return new_files_.size() + deleted_files_.size(); } | |
268 | ||
269 | bool IsColumnFamilyManipulation() { | |
270 | return is_column_family_add_ || is_column_family_drop_; | |
271 | } | |
272 | ||
273 | void SetColumnFamily(uint32_t column_family_id) { | |
274 | column_family_ = column_family_id; | |
275 | } | |
276 | ||
277 | // set column family ID by calling SetColumnFamily() | |
278 | void AddColumnFamily(const std::string& name) { | |
279 | assert(!is_column_family_drop_); | |
280 | assert(!is_column_family_add_); | |
281 | assert(NumEntries() == 0); | |
282 | is_column_family_add_ = true; | |
283 | column_family_name_ = name; | |
284 | } | |
285 | ||
286 | // set column family ID by calling SetColumnFamily() | |
287 | void DropColumnFamily() { | |
288 | assert(!is_column_family_drop_); | |
289 | assert(!is_column_family_add_); | |
290 | assert(NumEntries() == 0); | |
291 | is_column_family_drop_ = true; | |
292 | } | |
293 | ||
294 | // return true on success. | |
295 | bool EncodeTo(std::string* dst) const; | |
296 | Status DecodeFrom(const Slice& src); | |
297 | ||
298 | const char* DecodeNewFile4From(Slice* input); | |
299 | ||
300 | typedef std::set<std::pair<int, uint64_t>> DeletedFileSet; | |
301 | ||
302 | const DeletedFileSet& GetDeletedFiles() { return deleted_files_; } | |
303 | const std::vector<std::pair<int, FileMetaData>>& GetNewFiles() { | |
304 | return new_files_; | |
305 | } | |
306 | ||
11fdf7f2 TL |
307 | void MarkAtomicGroup(uint32_t remaining_entries) { |
308 | is_in_atomic_group_ = true; | |
309 | remaining_entries_ = remaining_entries; | |
310 | } | |
311 | ||
7c673cae FG |
312 | std::string DebugString(bool hex_key = false) const; |
313 | std::string DebugJSON(int edit_num, bool hex_key = false) const; | |
314 | ||
315 | private: | |
494da23a | 316 | friend class ReactiveVersionSet; |
7c673cae FG |
317 | friend class VersionSet; |
318 | friend class Version; | |
319 | ||
320 | bool GetLevel(Slice* input, int* level, const char** msg); | |
321 | ||
322 | int max_level_; | |
323 | std::string comparator_; | |
324 | uint64_t log_number_; | |
325 | uint64_t prev_log_number_; | |
326 | uint64_t next_file_number_; | |
327 | uint32_t max_column_family_; | |
11fdf7f2 TL |
328 | // The most recent WAL log number that is deleted |
329 | uint64_t min_log_number_to_keep_; | |
7c673cae FG |
330 | SequenceNumber last_sequence_; |
331 | bool has_comparator_; | |
332 | bool has_log_number_; | |
333 | bool has_prev_log_number_; | |
334 | bool has_next_file_number_; | |
335 | bool has_last_sequence_; | |
336 | bool has_max_column_family_; | |
11fdf7f2 | 337 | bool has_min_log_number_to_keep_; |
7c673cae FG |
338 | |
339 | DeletedFileSet deleted_files_; | |
340 | std::vector<std::pair<int, FileMetaData>> new_files_; | |
341 | ||
11fdf7f2 | 342 | // Each version edit record should have column_family_ set |
7c673cae FG |
343 | // If it's not set, it is default (0) |
344 | uint32_t column_family_; | |
345 | // a version edit can be either column_family add or | |
346 | // column_family drop. If it's column family add, | |
347 | // it also includes column family name. | |
348 | bool is_column_family_drop_; | |
349 | bool is_column_family_add_; | |
350 | std::string column_family_name_; | |
11fdf7f2 TL |
351 | |
352 | bool is_in_atomic_group_; | |
353 | uint32_t remaining_entries_; | |
7c673cae FG |
354 | }; |
355 | ||
356 | } // namespace rocksdb |