]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/compaction.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / compaction.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #pragma once
11 #include "db/version_set.h"
12 #include "options/cf_options.h"
13 #include "util/arena.h"
14 #include "util/autovector.h"
15
16 namespace rocksdb {
17
18 // Utility for comparing sstable boundary keys. Returns -1 if either a or b is
19 // null which provides the property that a==null indicates a key that is less
20 // than any key and b==null indicates a key that is greater than any key. Note
21 // that the comparison is performed primarily on the user-key portion of the
22 // key. If the user-keys compare equal, an additional test is made to sort
23 // range tombstone sentinel keys before other keys with the same user-key. The
24 // result is that 2 user-keys will compare equal if they differ purely on
25 // their sequence number and value, but the range tombstone sentinel for that
26 // user-key will compare not equal. This is necessary because the range
27 // tombstone sentinel key is set as the largest key for an sstable even though
28 // that key never appears in the database. We don't want adjacent sstables to
29 // be considered overlapping if they are separated by the range tombstone
30 // sentinel.
31 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
32 const InternalKey& b);
33 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
34 const InternalKey& b);
35 int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
36 const InternalKey* b);
37
38 // An AtomicCompactionUnitBoundary represents a range of keys [smallest,
39 // largest] that exactly spans one ore more neighbouring SSTs on the same
40 // level. Every pair of SSTs in this range "overlap" (i.e., the largest
41 // user key of one file is the smallest user key of the next file). These
42 // boundaries are propagated down to RangeDelAggregator during compaction
43 // to provide safe truncation boundaries for range tombstones.
44 struct AtomicCompactionUnitBoundary {
45 const InternalKey* smallest = nullptr;
46 const InternalKey* largest = nullptr;
47 };
48
49 // The structure that manages compaction input files associated
50 // with the same physical level.
51 struct CompactionInputFiles {
52 int level;
53 std::vector<FileMetaData*> files;
54 std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
55 inline bool empty() const { return files.empty(); }
56 inline size_t size() const { return files.size(); }
57 inline void clear() { files.clear(); }
58 inline FileMetaData* operator[](size_t i) const { return files[i]; }
59 };
60
61 class Version;
62 class ColumnFamilyData;
63 class VersionStorageInfo;
64 class CompactionFilter;
65
66 // A Compaction encapsulates information about a compaction.
67 class Compaction {
68 public:
69 Compaction(VersionStorageInfo* input_version,
70 const ImmutableCFOptions& immutable_cf_options,
71 const MutableCFOptions& mutable_cf_options,
72 std::vector<CompactionInputFiles> inputs, int output_level,
73 uint64_t target_file_size, uint64_t max_compaction_bytes,
74 uint32_t output_path_id, CompressionType compression,
75 CompressionOptions compression_opts, uint32_t max_subcompactions,
76 std::vector<FileMetaData*> grandparents,
77 bool manual_compaction = false, double score = -1,
78 bool deletion_compaction = false,
79 CompactionReason compaction_reason = CompactionReason::kUnknown);
80
81 // No copying allowed
82 Compaction(const Compaction&) = delete;
83 void operator=(const Compaction&) = delete;
84
85 ~Compaction();
86
87 // Returns the level associated to the specified compaction input level.
88 // If compaction_input_level is not specified, then input_level is set to 0.
89 int level(size_t compaction_input_level = 0) const {
90 return inputs_[compaction_input_level].level;
91 }
92
93 int start_level() const { return start_level_; }
94
95 // Outputs will go to this level
96 int output_level() const { return output_level_; }
97
98 // Returns the number of input levels in this compaction.
99 size_t num_input_levels() const { return inputs_.size(); }
100
101 // Return the object that holds the edits to the descriptor done
102 // by this compaction.
103 VersionEdit* edit() { return &edit_; }
104
105 // Returns the number of input files associated to the specified
106 // compaction input level.
107 // The function will return 0 if when "compaction_input_level" < 0
108 // or "compaction_input_level" >= "num_input_levels()".
109 size_t num_input_files(size_t compaction_input_level) const {
110 if (compaction_input_level < inputs_.size()) {
111 return inputs_[compaction_input_level].size();
112 }
113 return 0;
114 }
115
116 // Returns input version of the compaction
117 Version* input_version() const { return input_version_; }
118
119 // Returns the ColumnFamilyData associated with the compaction.
120 ColumnFamilyData* column_family_data() const { return cfd_; }
121
122 // Returns the file meta data of the 'i'th input file at the
123 // specified compaction input level.
124 // REQUIREMENT: "compaction_input_level" must be >= 0 and
125 // < "input_levels()"
126 FileMetaData* input(size_t compaction_input_level, size_t i) const {
127 assert(compaction_input_level < inputs_.size());
128 return inputs_[compaction_input_level][i];
129 }
130
131 const std::vector<AtomicCompactionUnitBoundary>* boundaries(
132 size_t compaction_input_level) const {
133 assert(compaction_input_level < inputs_.size());
134 return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
135 }
136
137 // Returns the list of file meta data of the specified compaction
138 // input level.
139 // REQUIREMENT: "compaction_input_level" must be >= 0 and
140 // < "input_levels()"
141 const std::vector<FileMetaData*>* inputs(
142 size_t compaction_input_level) const {
143 assert(compaction_input_level < inputs_.size());
144 return &inputs_[compaction_input_level].files;
145 }
146
147 const std::vector<CompactionInputFiles>* inputs() { return &inputs_; }
148
149 // Returns the LevelFilesBrief of the specified compaction input level.
150 const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
151 return &input_levels_[compaction_input_level];
152 }
153
154 // Maximum size of files to build during this compaction.
155 uint64_t max_output_file_size() const { return max_output_file_size_; }
156
157 // What compression for output
158 CompressionType output_compression() const { return output_compression_; }
159
160 // What compression options for output
161 CompressionOptions output_compression_opts() const {
162 return output_compression_opts_;
163 }
164
165 // Whether need to write output file to second DB path.
166 uint32_t output_path_id() const { return output_path_id_; }
167
168 // Is this a trivial compaction that can be implemented by just
169 // moving a single input file to the next level (no merging or splitting)
170 bool IsTrivialMove() const;
171
172 // If true, then the compaction can be done by simply deleting input files.
173 bool deletion_compaction() const { return deletion_compaction_; }
174
175 // Add all inputs to this compaction as delete operations to *edit.
176 void AddInputDeletions(VersionEdit* edit);
177
178 // Returns true if the available information we have guarantees that
179 // the input "user_key" does not exist in any level beyond "output_level()".
180 bool KeyNotExistsBeyondOutputLevel(const Slice& user_key,
181 std::vector<size_t>* level_ptrs) const;
182
183 // Clear all files to indicate that they are not being compacted
184 // Delete this compaction from the list of running compactions.
185 //
186 // Requirement: DB mutex held
187 void ReleaseCompactionFiles(Status status);
188
189 // Returns the summary of the compaction in "output" with maximum "len"
190 // in bytes. The caller is responsible for the memory management of
191 // "output".
192 void Summary(char* output, int len);
193
194 // Return the score that was used to pick this compaction run.
195 double score() const { return score_; }
196
197 // Is this compaction creating a file in the bottom most level?
198 bool bottommost_level() const { return bottommost_level_; }
199
200 // Does this compaction include all sst files?
201 bool is_full_compaction() const { return is_full_compaction_; }
202
203 // Was this compaction triggered manually by the client?
204 bool is_manual_compaction() const { return is_manual_compaction_; }
205
206 // Used when allow_trivial_move option is set in
207 // Universal compaction. If all the input files are
208 // non overlapping, then is_trivial_move_ variable
209 // will be set true, else false
210 void set_is_trivial_move(bool trivial_move) {
211 is_trivial_move_ = trivial_move;
212 }
213
214 // Used when allow_trivial_move option is set in
215 // Universal compaction. Returns true, if the input files
216 // are non-overlapping and can be trivially moved.
217 bool is_trivial_move() const { return is_trivial_move_; }
218
219 // How many total levels are there?
220 int number_levels() const { return number_levels_; }
221
222 // Return the ImmutableCFOptions that should be used throughout the compaction
223 // procedure
224 const ImmutableCFOptions* immutable_cf_options() const {
225 return &immutable_cf_options_;
226 }
227
228 // Return the MutableCFOptions that should be used throughout the compaction
229 // procedure
230 const MutableCFOptions* mutable_cf_options() const {
231 return &mutable_cf_options_;
232 }
233
234 // Returns the size in bytes that the output file should be preallocated to.
235 // In level compaction, that is max_file_size_. In universal compaction, that
236 // is the sum of all input file sizes.
237 uint64_t OutputFilePreallocationSize() const;
238
239 void SetInputVersion(Version* input_version);
240
241 struct InputLevelSummaryBuffer {
242 char buffer[128];
243 };
244
245 const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
246
247 uint64_t CalculateTotalInputSize() const;
248
249 // In case of compaction error, reset the nextIndex that is used
250 // to pick up the next file to be compacted from files_by_size_
251 void ResetNextCompactionIndex();
252
253 // Create a CompactionFilter from compaction_filter_factory
254 std::unique_ptr<CompactionFilter> CreateCompactionFilter() const;
255
256 // Is the input level corresponding to output_level_ empty?
257 bool IsOutputLevelEmpty() const;
258
259 // Should this compaction be broken up into smaller ones run in parallel?
260 bool ShouldFormSubcompactions() const;
261
262 // test function to validate the functionality of IsBottommostLevel()
263 // function -- determines if compaction with inputs and storage is bottommost
264 static bool TEST_IsBottommostLevel(
265 int output_level, VersionStorageInfo* vstorage,
266 const std::vector<CompactionInputFiles>& inputs);
267
268 TablePropertiesCollection GetOutputTableProperties() const {
269 return output_table_properties_;
270 }
271
272 void SetOutputTableProperties(TablePropertiesCollection tp) {
273 output_table_properties_ = std::move(tp);
274 }
275
276 Slice GetSmallestUserKey() const { return smallest_user_key_; }
277
278 Slice GetLargestUserKey() const { return largest_user_key_; }
279
280 int GetInputBaseLevel() const;
281
282 CompactionReason compaction_reason() { return compaction_reason_; }
283
284 const std::vector<FileMetaData*>& grandparents() const {
285 return grandparents_;
286 }
287
288 uint64_t max_compaction_bytes() const { return max_compaction_bytes_; }
289
290 uint32_t max_subcompactions() const { return max_subcompactions_; }
291
292 uint64_t MaxInputFileCreationTime() const;
293
294 private:
295 // mark (or clear) all files that are being compacted
296 void MarkFilesBeingCompacted(bool mark_as_compacted);
297
298 // get the smallest and largest key present in files to be compacted
299 static void GetBoundaryKeys(VersionStorageInfo* vstorage,
300 const std::vector<CompactionInputFiles>& inputs,
301 Slice* smallest_key, Slice* largest_key);
302
303 // Get the atomic file boundaries for all files in the compaction. Necessary
304 // in order to avoid the scenario described in
305 // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb
306 // down appropriate key boundaries to RangeDelAggregator during compaction.
307 static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
308 VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);
309
310 // helper function to determine if compaction with inputs and storage is
311 // bottommost
312 static bool IsBottommostLevel(
313 int output_level, VersionStorageInfo* vstorage,
314 const std::vector<CompactionInputFiles>& inputs);
315
316 static bool IsFullCompaction(VersionStorageInfo* vstorage,
317 const std::vector<CompactionInputFiles>& inputs);
318
319 VersionStorageInfo* input_vstorage_;
320
321 const int start_level_; // the lowest level to be compacted
322 const int output_level_; // levels to which output files are stored
323 uint64_t max_output_file_size_;
324 uint64_t max_compaction_bytes_;
325 uint32_t max_subcompactions_;
326 const ImmutableCFOptions immutable_cf_options_;
327 const MutableCFOptions mutable_cf_options_;
328 Version* input_version_;
329 VersionEdit edit_;
330 const int number_levels_;
331 ColumnFamilyData* cfd_;
332 Arena arena_; // Arena used to allocate space for file_levels_
333
334 const uint32_t output_path_id_;
335 CompressionType output_compression_;
336 CompressionOptions output_compression_opts_;
337 // If true, then the comaction can be done by simply deleting input files.
338 const bool deletion_compaction_;
339
340 // Compaction input files organized by level. Constant after construction
341 const std::vector<CompactionInputFiles> inputs_;
342
343 // A copy of inputs_, organized more closely in memory
344 autovector<LevelFilesBrief, 2> input_levels_;
345
346 // State used to check for number of overlapping grandparent files
347 // (grandparent == "output_level_ + 1")
348 std::vector<FileMetaData*> grandparents_;
349 const double score_; // score that was used to pick this compaction.
350
351 // Is this compaction creating a file in the bottom most level?
352 const bool bottommost_level_;
353 // Does this compaction include all sst files?
354 const bool is_full_compaction_;
355
356 // Is this compaction requested by the client?
357 const bool is_manual_compaction_;
358
359 // True if we can do trivial move in Universal multi level
360 // compaction
361 bool is_trivial_move_;
362
363 // Does input compression match the output compression?
364 bool InputCompressionMatchesOutput() const;
365
366 // table properties of output files
367 TablePropertiesCollection output_table_properties_;
368
369 // smallest user keys in compaction
370 Slice smallest_user_key_;
371
372 // largest user keys in compaction
373 Slice largest_user_key_;
374
375 // Reason for compaction
376 CompactionReason compaction_reason_;
377 };
378
379 // Utility function
380 extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
381
382 } // namespace rocksdb