1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "db/compaction.h"
12 #ifndef __STDC_FORMAT_MACROS
13 #define __STDC_FORMAT_MACROS
19 #include "db/column_family.h"
20 #include "rocksdb/compaction_filter.h"
21 #include "util/string_util.h"
22 #include "util/sync_point.h"
26 uint64_t TotalFileSize(const std::vector
<FileMetaData
*>& files
) {
28 for (size_t i
= 0; i
< files
.size() && files
[i
]; i
++) {
29 sum
+= files
[i
]->fd
.GetFileSize();
34 void Compaction::SetInputVersion(Version
* _input_version
) {
35 input_version_
= _input_version
;
36 cfd_
= input_version_
->cfd();
39 input_version_
->Ref();
40 edit_
.SetColumnFamily(cfd_
->GetID());
43 void Compaction::GetBoundaryKeys(
44 VersionStorageInfo
* vstorage
,
45 const std::vector
<CompactionInputFiles
>& inputs
, Slice
* smallest_user_key
,
46 Slice
* largest_user_key
) {
47 bool initialized
= false;
48 const Comparator
* ucmp
= vstorage
->InternalComparator()->user_comparator();
49 for (size_t i
= 0; i
< inputs
.size(); ++i
) {
50 if (inputs
[i
].files
.empty()) {
53 if (inputs
[i
].level
== 0) {
54 // we need to consider all files on level 0
55 for (const auto* f
: inputs
[i
].files
) {
56 const Slice
& start_user_key
= f
->smallest
.user_key();
58 ucmp
->Compare(start_user_key
, *smallest_user_key
) < 0) {
59 *smallest_user_key
= start_user_key
;
61 const Slice
& end_user_key
= f
->largest
.user_key();
63 ucmp
->Compare(end_user_key
, *largest_user_key
) > 0) {
64 *largest_user_key
= end_user_key
;
69 // we only need to consider the first and last file
70 const Slice
& start_user_key
= inputs
[i
].files
[0]->smallest
.user_key();
72 ucmp
->Compare(start_user_key
, *smallest_user_key
) < 0) {
73 *smallest_user_key
= start_user_key
;
75 const Slice
& end_user_key
= inputs
[i
].files
.back()->largest
.user_key();
76 if (!initialized
|| ucmp
->Compare(end_user_key
, *largest_user_key
) > 0) {
77 *largest_user_key
= end_user_key
;
84 // helper function to determine if compaction is creating files at the
86 bool Compaction::IsBottommostLevel(
87 int output_level
, VersionStorageInfo
* vstorage
,
88 const std::vector
<CompactionInputFiles
>& inputs
) {
89 if (inputs
[0].level
== 0 &&
90 inputs
[0].files
.back() != vstorage
->LevelFiles(0).back()) {
94 Slice smallest_key
, largest_key
;
95 GetBoundaryKeys(vstorage
, inputs
, &smallest_key
, &largest_key
);
97 // Checks whether there are files living beyond the output_level.
98 // If lower levels have files, it checks for overlap between files
99 // if the compaction process and those files.
100 // Bottomlevel optimizations can be made if there are no files in
101 // lower levels or if there is no overlap with the files in
103 for (int i
= output_level
+ 1; i
< vstorage
->num_levels(); i
++) {
104 // It is not the bottommost level if there are files in higher
105 // levels when the output level is 0 or if there are files in
106 // higher levels which overlap with files to be compacted.
107 // output_level == 0 means that we want it to be considered
108 // s the bottommost level only if the last file on the level
109 // is a part of the files to be compacted - this is verified by
110 // the first if condition in this function
111 if (vstorage
->NumLevelFiles(i
) > 0 &&
112 (output_level
== 0 ||
113 vstorage
->OverlapInLevel(i
, &smallest_key
, &largest_key
))) {
120 // test function to validate the functionality of IsBottommostLevel()
121 // function -- determines if compaction with inputs and storage is bottommost
122 bool Compaction::TEST_IsBottommostLevel(
123 int output_level
, VersionStorageInfo
* vstorage
,
124 const std::vector
<CompactionInputFiles
>& inputs
) {
125 return IsBottommostLevel(output_level
, vstorage
, inputs
);
128 bool Compaction::IsFullCompaction(
129 VersionStorageInfo
* vstorage
,
130 const std::vector
<CompactionInputFiles
>& inputs
) {
131 size_t num_files_in_compaction
= 0;
132 size_t total_num_files
= 0;
133 for (int l
= 0; l
< vstorage
->num_levels(); l
++) {
134 total_num_files
+= vstorage
->NumLevelFiles(l
);
136 for (size_t i
= 0; i
< inputs
.size(); i
++) {
137 num_files_in_compaction
+= inputs
[i
].size();
139 return num_files_in_compaction
== total_num_files
;
142 Compaction::Compaction(VersionStorageInfo
* vstorage
,
143 const ImmutableCFOptions
& _immutable_cf_options
,
144 const MutableCFOptions
& _mutable_cf_options
,
145 std::vector
<CompactionInputFiles
> _inputs
,
146 int _output_level
, uint64_t _target_file_size
,
147 uint64_t _max_compaction_bytes
, uint32_t _output_path_id
,
148 CompressionType _compression
,
149 std::vector
<FileMetaData
*> _grandparents
,
150 bool _manual_compaction
, double _score
,
151 bool _deletion_compaction
,
152 CompactionReason _compaction_reason
)
153 : input_vstorage_(vstorage
),
154 start_level_(_inputs
[0].level
),
155 output_level_(_output_level
),
156 max_output_file_size_(_target_file_size
),
157 max_compaction_bytes_(_max_compaction_bytes
),
158 immutable_cf_options_(_immutable_cf_options
),
159 mutable_cf_options_(_mutable_cf_options
),
160 input_version_(nullptr),
161 number_levels_(vstorage
->num_levels()),
163 output_path_id_(_output_path_id
),
164 output_compression_(_compression
),
165 deletion_compaction_(_deletion_compaction
),
166 inputs_(std::move(_inputs
)),
167 grandparents_(std::move(_grandparents
)),
169 bottommost_level_(IsBottommostLevel(output_level_
, vstorage
, inputs_
)),
170 is_full_compaction_(IsFullCompaction(vstorage
, inputs_
)),
171 is_manual_compaction_(_manual_compaction
),
172 compaction_reason_(_compaction_reason
) {
173 MarkFilesBeingCompacted(true);
174 if (is_manual_compaction_
) {
175 compaction_reason_
= CompactionReason::kManualCompaction
;
179 for (size_t i
= 1; i
< inputs_
.size(); ++i
) {
180 assert(inputs_
[i
].level
> inputs_
[i
- 1].level
);
184 // setup input_levels_
186 input_levels_
.resize(num_input_levels());
187 for (size_t which
= 0; which
< num_input_levels(); which
++) {
188 DoGenerateLevelFilesBrief(&input_levels_
[which
], inputs_
[which
].files
,
193 GetBoundaryKeys(vstorage
, inputs_
, &smallest_user_key_
, &largest_user_key_
);
196 Compaction::~Compaction() {
197 if (input_version_
!= nullptr) {
198 input_version_
->Unref();
200 if (cfd_
!= nullptr) {
207 bool Compaction::InputCompressionMatchesOutput() const {
208 int base_level
= input_vstorage_
->base_level();
209 bool matches
= (GetCompressionType(immutable_cf_options_
, input_vstorage_
,
210 mutable_cf_options_
, start_level_
,
211 base_level
) == output_compression_
);
213 TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
216 TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch");
220 bool Compaction::IsTrivialMove() const {
221 // Avoid a move if there is lots of overlapping grandparent data.
222 // Otherwise, the move could create a parent file that will require
223 // a very expensive merge later on.
224 // If start_level_== output_level_, the purpose is to force compaction
225 // filter to be applied to that level, and thus cannot be a trivial move.
227 // Check if start level have files with overlapping ranges
228 if (start_level_
== 0 && input_vstorage_
->level0_non_overlapping() == false) {
229 // We cannot move files from L0 to L1 if the files are overlapping
233 if (is_manual_compaction_
&&
234 (immutable_cf_options_
.compaction_filter
!= nullptr ||
235 immutable_cf_options_
.compaction_filter_factory
!= nullptr)) {
236 // This is a manual compaction and we have a compaction filter that should
237 // be executed, we cannot do a trivial move
241 // Used in universal compaction, where trivial move can be done if the
242 // input files are non overlapping
243 if ((immutable_cf_options_
.compaction_options_universal
.allow_trivial_move
) &&
244 (output_level_
!= 0)) {
245 return is_trivial_move_
;
248 if (!(start_level_
!= output_level_
&& num_input_levels() == 1 &&
249 input(0, 0)->fd
.GetPathId() == output_path_id() &&
250 InputCompressionMatchesOutput())) {
254 // assert inputs_.size() == 1
256 for (const auto& file
: inputs_
.front().files
) {
257 std::vector
<FileMetaData
*> file_grand_parents
;
258 if (output_level_
+ 1 >= number_levels_
) {
261 input_vstorage_
->GetOverlappingInputs(output_level_
+ 1, &file
->smallest
,
262 &file
->largest
, &file_grand_parents
);
263 const auto compaction_size
=
264 file
->fd
.GetFileSize() + TotalFileSize(file_grand_parents
);
265 if (compaction_size
> max_compaction_bytes_
) {
273 void Compaction::AddInputDeletions(VersionEdit
* out_edit
) {
274 for (size_t which
= 0; which
< num_input_levels(); which
++) {
275 for (size_t i
= 0; i
< inputs_
[which
].size(); i
++) {
276 out_edit
->DeleteFile(level(which
), inputs_
[which
][i
]->fd
.GetNumber());
281 bool Compaction::KeyNotExistsBeyondOutputLevel(
282 const Slice
& user_key
, std::vector
<size_t>* level_ptrs
) const {
283 assert(input_version_
!= nullptr);
284 assert(level_ptrs
!= nullptr);
285 assert(level_ptrs
->size() == static_cast<size_t>(number_levels_
));
286 assert(cfd_
->ioptions()->compaction_style
!= kCompactionStyleFIFO
);
287 if (cfd_
->ioptions()->compaction_style
== kCompactionStyleUniversal
) {
288 return bottommost_level_
;
290 // Maybe use binary search to find right entry instead of linear search?
291 const Comparator
* user_cmp
= cfd_
->user_comparator();
292 for (int lvl
= output_level_
+ 1; lvl
< number_levels_
; lvl
++) {
293 const std::vector
<FileMetaData
*>& files
= input_vstorage_
->LevelFiles(lvl
);
294 for (; level_ptrs
->at(lvl
) < files
.size(); level_ptrs
->at(lvl
)++) {
295 auto* f
= files
[level_ptrs
->at(lvl
)];
296 if (user_cmp
->Compare(user_key
, f
->largest
.user_key()) <= 0) {
297 // We've advanced far enough
298 if (user_cmp
->Compare(user_key
, f
->smallest
.user_key()) >= 0) {
299 // Key falls in this file's range, so definitely
300 // exists beyond output level
310 // Mark (or clear) each file that is being compacted
311 void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted
) {
312 for (size_t i
= 0; i
< num_input_levels(); i
++) {
313 for (size_t j
= 0; j
< inputs_
[i
].size(); j
++) {
314 assert(mark_as_compacted
? !inputs_
[i
][j
]->being_compacted
315 : inputs_
[i
][j
]->being_compacted
);
316 inputs_
[i
][j
]->being_compacted
= mark_as_compacted
;
322 // If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5,
323 // print: "3@0 + 2@3 + 1@4 files to L5"
324 const char* Compaction::InputLevelSummary(
325 InputLevelSummaryBuffer
* scratch
) const {
327 bool is_first
= true;
328 for (auto& input_level
: inputs_
) {
329 if (input_level
.empty()) {
334 snprintf(scratch
->buffer
+ len
, sizeof(scratch
->buffer
) - len
, " + ");
338 len
+= snprintf(scratch
->buffer
+ len
, sizeof(scratch
->buffer
) - len
,
339 "%" ROCKSDB_PRIszt
"@%d", input_level
.size(),
342 snprintf(scratch
->buffer
+ len
, sizeof(scratch
->buffer
) - len
,
343 " files to L%d", output_level());
345 return scratch
->buffer
;
348 uint64_t Compaction::CalculateTotalInputSize() const {
350 for (auto& input_level
: inputs_
) {
351 for (auto f
: input_level
.files
) {
352 size
+= f
->fd
.GetFileSize();
358 void Compaction::ReleaseCompactionFiles(Status status
) {
359 MarkFilesBeingCompacted(false);
360 cfd_
->compaction_picker()->ReleaseCompactionFiles(this, status
);
363 void Compaction::ResetNextCompactionIndex() {
364 assert(input_version_
!= nullptr);
365 input_vstorage_
->ResetNextCompactionIndex(start_level_
);
369 int InputSummary(const std::vector
<FileMetaData
*>& files
, char* output
,
373 for (size_t i
= 0; i
< files
.size(); i
++) {
374 int sz
= len
- write
;
377 AppendHumanBytes(files
.at(i
)->fd
.GetFileSize(), sztxt
, 16);
378 ret
= snprintf(output
+ write
, sz
, "%" PRIu64
"(%s) ",
379 files
.at(i
)->fd
.GetNumber(), sztxt
);
380 if (ret
< 0 || ret
>= sz
) break;
383 // if files.size() is non-zero, overwrite the last space
384 return write
- !!files
.size();
388 void Compaction::Summary(char* output
, int len
) {
390 snprintf(output
, len
, "Base version %" PRIu64
" Base level %d, inputs: [",
391 input_version_
->GetVersionNumber(), start_level_
);
392 if (write
< 0 || write
>= len
) {
396 for (size_t level_iter
= 0; level_iter
< num_input_levels(); ++level_iter
) {
397 if (level_iter
> 0) {
398 write
+= snprintf(output
+ write
, len
- write
, "], [");
399 if (write
< 0 || write
>= len
) {
404 InputSummary(inputs_
[level_iter
].files
, output
+ write
, len
- write
);
405 if (write
< 0 || write
>= len
) {
410 snprintf(output
+ write
, len
- write
, "]");
413 uint64_t Compaction::OutputFilePreallocationSize() const {
414 uint64_t preallocation_size
= 0;
416 if (cfd_
->ioptions()->compaction_style
== kCompactionStyleLevel
||
417 output_level() > 0) {
418 preallocation_size
= max_output_file_size_
;
420 // output_level() == 0
421 assert(num_input_levels() > 0);
422 for (const auto& f
: inputs_
[0].files
) {
423 preallocation_size
+= f
->fd
.GetFileSize();
426 // Over-estimate slightly so we don't end up just barely crossing
428 return preallocation_size
+ (preallocation_size
/ 10);
431 std::unique_ptr
<CompactionFilter
> Compaction::CreateCompactionFilter() const {
432 if (!cfd_
->ioptions()->compaction_filter_factory
) {
436 CompactionFilter::Context context
;
437 context
.is_full_compaction
= is_full_compaction_
;
438 context
.is_manual_compaction
= is_manual_compaction_
;
439 context
.column_family_id
= cfd_
->GetID();
440 return cfd_
->ioptions()->compaction_filter_factory
->CreateCompactionFilter(
444 bool Compaction::IsOutputLevelEmpty() const {
445 return inputs_
.back().level
!= output_level_
|| inputs_
.back().empty();
448 bool Compaction::ShouldFormSubcompactions() const {
449 if (immutable_cf_options_
.max_subcompactions
<= 1 || cfd_
== nullptr) {
452 if (cfd_
->ioptions()->compaction_style
== kCompactionStyleLevel
) {
453 return start_level_
== 0 && output_level_
> 0 && !IsOutputLevelEmpty();
454 } else if (cfd_
->ioptions()->compaction_style
== kCompactionStyleUniversal
) {
455 return number_levels_
> 1 && output_level_
> 0;
461 } // namespace rocksdb