]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
11 | #include "db/version_set.h" | |
12 | #include "options/cf_options.h" | |
13 | #include "util/arena.h" | |
14 | #include "util/autovector.h" | |
15 | ||
16 | namespace rocksdb { | |
17 | ||
494da23a TL |
18 | // Utility for comparing sstable boundary keys. Returns -1 if either a or b is |
19 | // null which provides the property that a==null indicates a key that is less | |
20 | // than any key and b==null indicates a key that is greater than any key. Note | |
21 | // that the comparison is performed primarily on the user-key portion of the | |
22 | // key. If the user-keys compare equal, an additional test is made to sort | |
23 | // range tombstone sentinel keys before other keys with the same user-key. The | |
24 | // result is that 2 user-keys will compare equal if they differ purely on | |
25 | // their sequence number and value, but the range tombstone sentinel for that | |
26 | // user-key will compare not equal. This is necessary because the range | |
27 | // tombstone sentinel key is set as the largest key for an sstable even though | |
28 | // that key never appears in the database. We don't want adjacent sstables to | |
29 | // be considered overlapping if they are separated by the range tombstone | |
30 | // sentinel. | |
31 | int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, | |
32 | const InternalKey& b); | |
33 | int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a, | |
34 | const InternalKey& b); | |
35 | int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, | |
36 | const InternalKey* b); | |
37 | ||
38 | // An AtomicCompactionUnitBoundary represents a range of keys [smallest, | |
39 | // largest] that exactly spans one ore more neighbouring SSTs on the same | |
40 | // level. Every pair of SSTs in this range "overlap" (i.e., the largest | |
41 | // user key of one file is the smallest user key of the next file). These | |
42 | // boundaries are propagated down to RangeDelAggregator during compaction | |
43 | // to provide safe truncation boundaries for range tombstones. | |
44 | struct AtomicCompactionUnitBoundary { | |
45 | const InternalKey* smallest = nullptr; | |
46 | const InternalKey* largest = nullptr; | |
47 | }; | |
48 | ||
7c673cae FG |
49 | // The structure that manages compaction input files associated |
50 | // with the same physical level. | |
51 | struct CompactionInputFiles { | |
52 | int level; | |
53 | std::vector<FileMetaData*> files; | |
494da23a | 54 | std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries; |
7c673cae FG |
55 | inline bool empty() const { return files.empty(); } |
56 | inline size_t size() const { return files.size(); } | |
57 | inline void clear() { files.clear(); } | |
58 | inline FileMetaData* operator[](size_t i) const { return files[i]; } | |
59 | }; | |
60 | ||
61 | class Version; | |
62 | class ColumnFamilyData; | |
63 | class VersionStorageInfo; | |
64 | class CompactionFilter; | |
65 | ||
66 | // A Compaction encapsulates information about a compaction. | |
67 | class Compaction { | |
68 | public: | |
69 | Compaction(VersionStorageInfo* input_version, | |
70 | const ImmutableCFOptions& immutable_cf_options, | |
71 | const MutableCFOptions& mutable_cf_options, | |
72 | std::vector<CompactionInputFiles> inputs, int output_level, | |
73 | uint64_t target_file_size, uint64_t max_compaction_bytes, | |
74 | uint32_t output_path_id, CompressionType compression, | |
11fdf7f2 | 75 | CompressionOptions compression_opts, uint32_t max_subcompactions, |
7c673cae FG |
76 | std::vector<FileMetaData*> grandparents, |
77 | bool manual_compaction = false, double score = -1, | |
78 | bool deletion_compaction = false, | |
79 | CompactionReason compaction_reason = CompactionReason::kUnknown); | |
80 | ||
81 | // No copying allowed | |
82 | Compaction(const Compaction&) = delete; | |
83 | void operator=(const Compaction&) = delete; | |
84 | ||
85 | ~Compaction(); | |
86 | ||
87 | // Returns the level associated to the specified compaction input level. | |
88 | // If compaction_input_level is not specified, then input_level is set to 0. | |
89 | int level(size_t compaction_input_level = 0) const { | |
90 | return inputs_[compaction_input_level].level; | |
91 | } | |
92 | ||
93 | int start_level() const { return start_level_; } | |
94 | ||
95 | // Outputs will go to this level | |
96 | int output_level() const { return output_level_; } | |
97 | ||
98 | // Returns the number of input levels in this compaction. | |
99 | size_t num_input_levels() const { return inputs_.size(); } | |
100 | ||
101 | // Return the object that holds the edits to the descriptor done | |
102 | // by this compaction. | |
103 | VersionEdit* edit() { return &edit_; } | |
104 | ||
105 | // Returns the number of input files associated to the specified | |
106 | // compaction input level. | |
107 | // The function will return 0 if when "compaction_input_level" < 0 | |
108 | // or "compaction_input_level" >= "num_input_levels()". | |
109 | size_t num_input_files(size_t compaction_input_level) const { | |
110 | if (compaction_input_level < inputs_.size()) { | |
111 | return inputs_[compaction_input_level].size(); | |
112 | } | |
113 | return 0; | |
114 | } | |
115 | ||
116 | // Returns input version of the compaction | |
117 | Version* input_version() const { return input_version_; } | |
118 | ||
119 | // Returns the ColumnFamilyData associated with the compaction. | |
120 | ColumnFamilyData* column_family_data() const { return cfd_; } | |
121 | ||
122 | // Returns the file meta data of the 'i'th input file at the | |
123 | // specified compaction input level. | |
124 | // REQUIREMENT: "compaction_input_level" must be >= 0 and | |
125 | // < "input_levels()" | |
126 | FileMetaData* input(size_t compaction_input_level, size_t i) const { | |
127 | assert(compaction_input_level < inputs_.size()); | |
128 | return inputs_[compaction_input_level][i]; | |
129 | } | |
130 | ||
494da23a TL |
131 | const std::vector<AtomicCompactionUnitBoundary>* boundaries( |
132 | size_t compaction_input_level) const { | |
133 | assert(compaction_input_level < inputs_.size()); | |
134 | return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries; | |
135 | } | |
136 | ||
7c673cae FG |
137 | // Returns the list of file meta data of the specified compaction |
138 | // input level. | |
139 | // REQUIREMENT: "compaction_input_level" must be >= 0 and | |
140 | // < "input_levels()" | |
11fdf7f2 TL |
141 | const std::vector<FileMetaData*>* inputs( |
142 | size_t compaction_input_level) const { | |
7c673cae FG |
143 | assert(compaction_input_level < inputs_.size()); |
144 | return &inputs_[compaction_input_level].files; | |
145 | } | |
146 | ||
147 | const std::vector<CompactionInputFiles>* inputs() { return &inputs_; } | |
148 | ||
149 | // Returns the LevelFilesBrief of the specified compaction input level. | |
150 | const LevelFilesBrief* input_levels(size_t compaction_input_level) const { | |
151 | return &input_levels_[compaction_input_level]; | |
152 | } | |
153 | ||
154 | // Maximum size of files to build during this compaction. | |
155 | uint64_t max_output_file_size() const { return max_output_file_size_; } | |
156 | ||
157 | // What compression for output | |
158 | CompressionType output_compression() const { return output_compression_; } | |
159 | ||
11fdf7f2 TL |
160 | // What compression options for output |
161 | CompressionOptions output_compression_opts() const { | |
162 | return output_compression_opts_; | |
163 | } | |
164 | ||
7c673cae FG |
165 | // Whether need to write output file to second DB path. |
166 | uint32_t output_path_id() const { return output_path_id_; } | |
167 | ||
168 | // Is this a trivial compaction that can be implemented by just | |
169 | // moving a single input file to the next level (no merging or splitting) | |
170 | bool IsTrivialMove() const; | |
171 | ||
172 | // If true, then the compaction can be done by simply deleting input files. | |
173 | bool deletion_compaction() const { return deletion_compaction_; } | |
174 | ||
175 | // Add all inputs to this compaction as delete operations to *edit. | |
176 | void AddInputDeletions(VersionEdit* edit); | |
177 | ||
178 | // Returns true if the available information we have guarantees that | |
179 | // the input "user_key" does not exist in any level beyond "output_level()". | |
180 | bool KeyNotExistsBeyondOutputLevel(const Slice& user_key, | |
181 | std::vector<size_t>* level_ptrs) const; | |
182 | ||
183 | // Clear all files to indicate that they are not being compacted | |
184 | // Delete this compaction from the list of running compactions. | |
185 | // | |
186 | // Requirement: DB mutex held | |
187 | void ReleaseCompactionFiles(Status status); | |
188 | ||
189 | // Returns the summary of the compaction in "output" with maximum "len" | |
190 | // in bytes. The caller is responsible for the memory management of | |
191 | // "output". | |
192 | void Summary(char* output, int len); | |
193 | ||
194 | // Return the score that was used to pick this compaction run. | |
195 | double score() const { return score_; } | |
196 | ||
197 | // Is this compaction creating a file in the bottom most level? | |
198 | bool bottommost_level() const { return bottommost_level_; } | |
199 | ||
200 | // Does this compaction include all sst files? | |
201 | bool is_full_compaction() const { return is_full_compaction_; } | |
202 | ||
203 | // Was this compaction triggered manually by the client? | |
204 | bool is_manual_compaction() const { return is_manual_compaction_; } | |
205 | ||
206 | // Used when allow_trivial_move option is set in | |
207 | // Universal compaction. If all the input files are | |
208 | // non overlapping, then is_trivial_move_ variable | |
209 | // will be set true, else false | |
210 | void set_is_trivial_move(bool trivial_move) { | |
211 | is_trivial_move_ = trivial_move; | |
212 | } | |
213 | ||
214 | // Used when allow_trivial_move option is set in | |
215 | // Universal compaction. Returns true, if the input files | |
216 | // are non-overlapping and can be trivially moved. | |
217 | bool is_trivial_move() const { return is_trivial_move_; } | |
218 | ||
219 | // How many total levels are there? | |
220 | int number_levels() const { return number_levels_; } | |
221 | ||
222 | // Return the ImmutableCFOptions that should be used throughout the compaction | |
223 | // procedure | |
224 | const ImmutableCFOptions* immutable_cf_options() const { | |
225 | return &immutable_cf_options_; | |
226 | } | |
227 | ||
228 | // Return the MutableCFOptions that should be used throughout the compaction | |
229 | // procedure | |
230 | const MutableCFOptions* mutable_cf_options() const { | |
231 | return &mutable_cf_options_; | |
232 | } | |
233 | ||
234 | // Returns the size in bytes that the output file should be preallocated to. | |
235 | // In level compaction, that is max_file_size_. In universal compaction, that | |
236 | // is the sum of all input file sizes. | |
237 | uint64_t OutputFilePreallocationSize() const; | |
238 | ||
239 | void SetInputVersion(Version* input_version); | |
240 | ||
241 | struct InputLevelSummaryBuffer { | |
242 | char buffer[128]; | |
243 | }; | |
244 | ||
245 | const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const; | |
246 | ||
247 | uint64_t CalculateTotalInputSize() const; | |
248 | ||
249 | // In case of compaction error, reset the nextIndex that is used | |
250 | // to pick up the next file to be compacted from files_by_size_ | |
251 | void ResetNextCompactionIndex(); | |
252 | ||
253 | // Create a CompactionFilter from compaction_filter_factory | |
254 | std::unique_ptr<CompactionFilter> CreateCompactionFilter() const; | |
255 | ||
256 | // Is the input level corresponding to output_level_ empty? | |
257 | bool IsOutputLevelEmpty() const; | |
258 | ||
259 | // Should this compaction be broken up into smaller ones run in parallel? | |
260 | bool ShouldFormSubcompactions() const; | |
261 | ||
262 | // test function to validate the functionality of IsBottommostLevel() | |
263 | // function -- determines if compaction with inputs and storage is bottommost | |
264 | static bool TEST_IsBottommostLevel( | |
265 | int output_level, VersionStorageInfo* vstorage, | |
266 | const std::vector<CompactionInputFiles>& inputs); | |
267 | ||
268 | TablePropertiesCollection GetOutputTableProperties() const { | |
269 | return output_table_properties_; | |
270 | } | |
271 | ||
272 | void SetOutputTableProperties(TablePropertiesCollection tp) { | |
273 | output_table_properties_ = std::move(tp); | |
274 | } | |
275 | ||
276 | Slice GetSmallestUserKey() const { return smallest_user_key_; } | |
277 | ||
278 | Slice GetLargestUserKey() const { return largest_user_key_; } | |
279 | ||
11fdf7f2 TL |
280 | int GetInputBaseLevel() const; |
281 | ||
7c673cae FG |
282 | CompactionReason compaction_reason() { return compaction_reason_; } |
283 | ||
284 | const std::vector<FileMetaData*>& grandparents() const { | |
285 | return grandparents_; | |
286 | } | |
287 | ||
288 | uint64_t max_compaction_bytes() const { return max_compaction_bytes_; } | |
289 | ||
11fdf7f2 TL |
290 | uint32_t max_subcompactions() const { return max_subcompactions_; } |
291 | ||
292 | uint64_t MaxInputFileCreationTime() const; | |
293 | ||
7c673cae FG |
294 | private: |
295 | // mark (or clear) all files that are being compacted | |
296 | void MarkFilesBeingCompacted(bool mark_as_compacted); | |
297 | ||
298 | // get the smallest and largest key present in files to be compacted | |
299 | static void GetBoundaryKeys(VersionStorageInfo* vstorage, | |
300 | const std::vector<CompactionInputFiles>& inputs, | |
301 | Slice* smallest_key, Slice* largest_key); | |
302 | ||
494da23a TL |
303 | // Get the atomic file boundaries for all files in the compaction. Necessary |
304 | // in order to avoid the scenario described in | |
305 | // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb | |
306 | // down appropriate key boundaries to RangeDelAggregator during compaction. | |
307 | static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries( | |
308 | VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs); | |
309 | ||
7c673cae FG |
310 | // helper function to determine if compaction with inputs and storage is |
311 | // bottommost | |
312 | static bool IsBottommostLevel( | |
313 | int output_level, VersionStorageInfo* vstorage, | |
314 | const std::vector<CompactionInputFiles>& inputs); | |
315 | ||
316 | static bool IsFullCompaction(VersionStorageInfo* vstorage, | |
317 | const std::vector<CompactionInputFiles>& inputs); | |
318 | ||
319 | VersionStorageInfo* input_vstorage_; | |
320 | ||
321 | const int start_level_; // the lowest level to be compacted | |
322 | const int output_level_; // levels to which output files are stored | |
323 | uint64_t max_output_file_size_; | |
324 | uint64_t max_compaction_bytes_; | |
11fdf7f2 | 325 | uint32_t max_subcompactions_; |
7c673cae FG |
326 | const ImmutableCFOptions immutable_cf_options_; |
327 | const MutableCFOptions mutable_cf_options_; | |
328 | Version* input_version_; | |
329 | VersionEdit edit_; | |
330 | const int number_levels_; | |
331 | ColumnFamilyData* cfd_; | |
332 | Arena arena_; // Arena used to allocate space for file_levels_ | |
333 | ||
334 | const uint32_t output_path_id_; | |
335 | CompressionType output_compression_; | |
11fdf7f2 | 336 | CompressionOptions output_compression_opts_; |
7c673cae FG |
337 | // If true, then the comaction can be done by simply deleting input files. |
338 | const bool deletion_compaction_; | |
339 | ||
340 | // Compaction input files organized by level. Constant after construction | |
341 | const std::vector<CompactionInputFiles> inputs_; | |
342 | ||
343 | // A copy of inputs_, organized more closely in memory | |
344 | autovector<LevelFilesBrief, 2> input_levels_; | |
345 | ||
346 | // State used to check for number of overlapping grandparent files | |
347 | // (grandparent == "output_level_ + 1") | |
348 | std::vector<FileMetaData*> grandparents_; | |
349 | const double score_; // score that was used to pick this compaction. | |
350 | ||
351 | // Is this compaction creating a file in the bottom most level? | |
352 | const bool bottommost_level_; | |
353 | // Does this compaction include all sst files? | |
354 | const bool is_full_compaction_; | |
355 | ||
356 | // Is this compaction requested by the client? | |
357 | const bool is_manual_compaction_; | |
358 | ||
359 | // True if we can do trivial move in Universal multi level | |
360 | // compaction | |
361 | bool is_trivial_move_; | |
362 | ||
363 | // Does input compression match the output compression? | |
364 | bool InputCompressionMatchesOutput() const; | |
365 | ||
366 | // table properties of output files | |
367 | TablePropertiesCollection output_table_properties_; | |
368 | ||
369 | // smallest user keys in compaction | |
370 | Slice smallest_user_key_; | |
371 | ||
372 | // largest user keys in compaction | |
373 | Slice largest_user_key_; | |
374 | ||
375 | // Reason for compaction | |
376 | CompactionReason compaction_reason_; | |
377 | }; | |
378 | ||
379 | // Utility function | |
380 | extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files); | |
381 | ||
382 | } // namespace rocksdb |