]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/compaction.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / db / compaction.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "db/compaction.h"
11
12 #ifndef __STDC_FORMAT_MACROS
13 #define __STDC_FORMAT_MACROS
14 #endif
15
16 #include <inttypes.h>
17 #include <vector>
18
19 #include "db/column_family.h"
20 #include "rocksdb/compaction_filter.h"
21 #include "util/string_util.h"
22 #include "util/sync_point.h"
23
24 namespace rocksdb {
25
26 uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
27 uint64_t sum = 0;
28 for (size_t i = 0; i < files.size() && files[i]; i++) {
29 sum += files[i]->fd.GetFileSize();
30 }
31 return sum;
32 }
33
34 void Compaction::SetInputVersion(Version* _input_version) {
35 input_version_ = _input_version;
36 cfd_ = input_version_->cfd();
37
38 cfd_->Ref();
39 input_version_->Ref();
40 edit_.SetColumnFamily(cfd_->GetID());
41 }
42
43 void Compaction::GetBoundaryKeys(
44 VersionStorageInfo* vstorage,
45 const std::vector<CompactionInputFiles>& inputs, Slice* smallest_user_key,
46 Slice* largest_user_key) {
47 bool initialized = false;
48 const Comparator* ucmp = vstorage->InternalComparator()->user_comparator();
49 for (size_t i = 0; i < inputs.size(); ++i) {
50 if (inputs[i].files.empty()) {
51 continue;
52 }
53 if (inputs[i].level == 0) {
54 // we need to consider all files on level 0
55 for (const auto* f : inputs[i].files) {
56 const Slice& start_user_key = f->smallest.user_key();
57 if (!initialized ||
58 ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
59 *smallest_user_key = start_user_key;
60 }
61 const Slice& end_user_key = f->largest.user_key();
62 if (!initialized ||
63 ucmp->Compare(end_user_key, *largest_user_key) > 0) {
64 *largest_user_key = end_user_key;
65 }
66 initialized = true;
67 }
68 } else {
69 // we only need to consider the first and last file
70 const Slice& start_user_key = inputs[i].files[0]->smallest.user_key();
71 if (!initialized ||
72 ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
73 *smallest_user_key = start_user_key;
74 }
75 const Slice& end_user_key = inputs[i].files.back()->largest.user_key();
76 if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) {
77 *largest_user_key = end_user_key;
78 }
79 initialized = true;
80 }
81 }
82 }
83
84 // helper function to determine if compaction is creating files at the
85 // bottommost level
86 bool Compaction::IsBottommostLevel(
87 int output_level, VersionStorageInfo* vstorage,
88 const std::vector<CompactionInputFiles>& inputs) {
89 if (inputs[0].level == 0 &&
90 inputs[0].files.back() != vstorage->LevelFiles(0).back()) {
91 return false;
92 }
93
94 Slice smallest_key, largest_key;
95 GetBoundaryKeys(vstorage, inputs, &smallest_key, &largest_key);
96
97 // Checks whether there are files living beyond the output_level.
98 // If lower levels have files, it checks for overlap between files
99 // if the compaction process and those files.
100 // Bottomlevel optimizations can be made if there are no files in
101 // lower levels or if there is no overlap with the files in
102 // the lower levels.
103 for (int i = output_level + 1; i < vstorage->num_levels(); i++) {
104 // It is not the bottommost level if there are files in higher
105 // levels when the output level is 0 or if there are files in
106 // higher levels which overlap with files to be compacted.
107 // output_level == 0 means that we want it to be considered
108 // s the bottommost level only if the last file on the level
109 // is a part of the files to be compacted - this is verified by
110 // the first if condition in this function
111 if (vstorage->NumLevelFiles(i) > 0 &&
112 (output_level == 0 ||
113 vstorage->OverlapInLevel(i, &smallest_key, &largest_key))) {
114 return false;
115 }
116 }
117 return true;
118 }
119
120 // test function to validate the functionality of IsBottommostLevel()
121 // function -- determines if compaction with inputs and storage is bottommost
122 bool Compaction::TEST_IsBottommostLevel(
123 int output_level, VersionStorageInfo* vstorage,
124 const std::vector<CompactionInputFiles>& inputs) {
125 return IsBottommostLevel(output_level, vstorage, inputs);
126 }
127
128 bool Compaction::IsFullCompaction(
129 VersionStorageInfo* vstorage,
130 const std::vector<CompactionInputFiles>& inputs) {
131 size_t num_files_in_compaction = 0;
132 size_t total_num_files = 0;
133 for (int l = 0; l < vstorage->num_levels(); l++) {
134 total_num_files += vstorage->NumLevelFiles(l);
135 }
136 for (size_t i = 0; i < inputs.size(); i++) {
137 num_files_in_compaction += inputs[i].size();
138 }
139 return num_files_in_compaction == total_num_files;
140 }
141
142 Compaction::Compaction(VersionStorageInfo* vstorage,
143 const ImmutableCFOptions& _immutable_cf_options,
144 const MutableCFOptions& _mutable_cf_options,
145 std::vector<CompactionInputFiles> _inputs,
146 int _output_level, uint64_t _target_file_size,
147 uint64_t _max_compaction_bytes, uint32_t _output_path_id,
148 CompressionType _compression,
149 std::vector<FileMetaData*> _grandparents,
150 bool _manual_compaction, double _score,
151 bool _deletion_compaction,
152 CompactionReason _compaction_reason)
153 : input_vstorage_(vstorage),
154 start_level_(_inputs[0].level),
155 output_level_(_output_level),
156 max_output_file_size_(_target_file_size),
157 max_compaction_bytes_(_max_compaction_bytes),
158 immutable_cf_options_(_immutable_cf_options),
159 mutable_cf_options_(_mutable_cf_options),
160 input_version_(nullptr),
161 number_levels_(vstorage->num_levels()),
162 cfd_(nullptr),
163 output_path_id_(_output_path_id),
164 output_compression_(_compression),
165 deletion_compaction_(_deletion_compaction),
166 inputs_(std::move(_inputs)),
167 grandparents_(std::move(_grandparents)),
168 score_(_score),
169 bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
170 is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
171 is_manual_compaction_(_manual_compaction),
172 compaction_reason_(_compaction_reason) {
173 MarkFilesBeingCompacted(true);
174 if (is_manual_compaction_) {
175 compaction_reason_ = CompactionReason::kManualCompaction;
176 }
177
178 #ifndef NDEBUG
179 for (size_t i = 1; i < inputs_.size(); ++i) {
180 assert(inputs_[i].level > inputs_[i - 1].level);
181 }
182 #endif
183
184 // setup input_levels_
185 {
186 input_levels_.resize(num_input_levels());
187 for (size_t which = 0; which < num_input_levels(); which++) {
188 DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files,
189 &arena_);
190 }
191 }
192
193 GetBoundaryKeys(vstorage, inputs_, &smallest_user_key_, &largest_user_key_);
194 }
195
196 Compaction::~Compaction() {
197 if (input_version_ != nullptr) {
198 input_version_->Unref();
199 }
200 if (cfd_ != nullptr) {
201 if (cfd_->Unref()) {
202 delete cfd_;
203 }
204 }
205 }
206
207 bool Compaction::InputCompressionMatchesOutput() const {
208 int base_level = input_vstorage_->base_level();
209 bool matches = (GetCompressionType(immutable_cf_options_, input_vstorage_,
210 mutable_cf_options_, start_level_,
211 base_level) == output_compression_);
212 if (matches) {
213 TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
214 return true;
215 }
216 TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch");
217 return matches;
218 }
219
220 bool Compaction::IsTrivialMove() const {
221 // Avoid a move if there is lots of overlapping grandparent data.
222 // Otherwise, the move could create a parent file that will require
223 // a very expensive merge later on.
224 // If start_level_== output_level_, the purpose is to force compaction
225 // filter to be applied to that level, and thus cannot be a trivial move.
226
227 // Check if start level have files with overlapping ranges
228 if (start_level_ == 0 && input_vstorage_->level0_non_overlapping() == false) {
229 // We cannot move files from L0 to L1 if the files are overlapping
230 return false;
231 }
232
233 if (is_manual_compaction_ &&
234 (immutable_cf_options_.compaction_filter != nullptr ||
235 immutable_cf_options_.compaction_filter_factory != nullptr)) {
236 // This is a manual compaction and we have a compaction filter that should
237 // be executed, we cannot do a trivial move
238 return false;
239 }
240
241 // Used in universal compaction, where trivial move can be done if the
242 // input files are non overlapping
243 if ((immutable_cf_options_.compaction_options_universal.allow_trivial_move) &&
244 (output_level_ != 0)) {
245 return is_trivial_move_;
246 }
247
248 if (!(start_level_ != output_level_ && num_input_levels() == 1 &&
249 input(0, 0)->fd.GetPathId() == output_path_id() &&
250 InputCompressionMatchesOutput())) {
251 return false;
252 }
253
254 // assert inputs_.size() == 1
255
256 for (const auto& file : inputs_.front().files) {
257 std::vector<FileMetaData*> file_grand_parents;
258 if (output_level_ + 1 >= number_levels_) {
259 continue;
260 }
261 input_vstorage_->GetOverlappingInputs(output_level_ + 1, &file->smallest,
262 &file->largest, &file_grand_parents);
263 const auto compaction_size =
264 file->fd.GetFileSize() + TotalFileSize(file_grand_parents);
265 if (compaction_size > max_compaction_bytes_) {
266 return false;
267 }
268 }
269
270 return true;
271 }
272
273 void Compaction::AddInputDeletions(VersionEdit* out_edit) {
274 for (size_t which = 0; which < num_input_levels(); which++) {
275 for (size_t i = 0; i < inputs_[which].size(); i++) {
276 out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
277 }
278 }
279 }
280
281 bool Compaction::KeyNotExistsBeyondOutputLevel(
282 const Slice& user_key, std::vector<size_t>* level_ptrs) const {
283 assert(input_version_ != nullptr);
284 assert(level_ptrs != nullptr);
285 assert(level_ptrs->size() == static_cast<size_t>(number_levels_));
286 assert(cfd_->ioptions()->compaction_style != kCompactionStyleFIFO);
287 if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
288 return bottommost_level_;
289 }
290 // Maybe use binary search to find right entry instead of linear search?
291 const Comparator* user_cmp = cfd_->user_comparator();
292 for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
293 const std::vector<FileMetaData*>& files = input_vstorage_->LevelFiles(lvl);
294 for (; level_ptrs->at(lvl) < files.size(); level_ptrs->at(lvl)++) {
295 auto* f = files[level_ptrs->at(lvl)];
296 if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
297 // We've advanced far enough
298 if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
299 // Key falls in this file's range, so definitely
300 // exists beyond output level
301 return false;
302 }
303 break;
304 }
305 }
306 }
307 return true;
308 }
309
310 // Mark (or clear) each file that is being compacted
311 void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
312 for (size_t i = 0; i < num_input_levels(); i++) {
313 for (size_t j = 0; j < inputs_[i].size(); j++) {
314 assert(mark_as_compacted ? !inputs_[i][j]->being_compacted
315 : inputs_[i][j]->being_compacted);
316 inputs_[i][j]->being_compacted = mark_as_compacted;
317 }
318 }
319 }
320
321 // Sample output:
322 // If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5,
323 // print: "3@0 + 2@3 + 1@4 files to L5"
324 const char* Compaction::InputLevelSummary(
325 InputLevelSummaryBuffer* scratch) const {
326 int len = 0;
327 bool is_first = true;
328 for (auto& input_level : inputs_) {
329 if (input_level.empty()) {
330 continue;
331 }
332 if (!is_first) {
333 len +=
334 snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " + ");
335 } else {
336 is_first = false;
337 }
338 len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
339 "%" ROCKSDB_PRIszt "@%d", input_level.size(),
340 input_level.level);
341 }
342 snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
343 " files to L%d", output_level());
344
345 return scratch->buffer;
346 }
347
348 uint64_t Compaction::CalculateTotalInputSize() const {
349 uint64_t size = 0;
350 for (auto& input_level : inputs_) {
351 for (auto f : input_level.files) {
352 size += f->fd.GetFileSize();
353 }
354 }
355 return size;
356 }
357
358 void Compaction::ReleaseCompactionFiles(Status status) {
359 MarkFilesBeingCompacted(false);
360 cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
361 }
362
363 void Compaction::ResetNextCompactionIndex() {
364 assert(input_version_ != nullptr);
365 input_vstorage_->ResetNextCompactionIndex(start_level_);
366 }
367
368 namespace {
369 int InputSummary(const std::vector<FileMetaData*>& files, char* output,
370 int len) {
371 *output = '\0';
372 int write = 0;
373 for (size_t i = 0; i < files.size(); i++) {
374 int sz = len - write;
375 int ret;
376 char sztxt[16];
377 AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
378 ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
379 files.at(i)->fd.GetNumber(), sztxt);
380 if (ret < 0 || ret >= sz) break;
381 write += ret;
382 }
383 // if files.size() is non-zero, overwrite the last space
384 return write - !!files.size();
385 }
386 } // namespace
387
388 void Compaction::Summary(char* output, int len) {
389 int write =
390 snprintf(output, len, "Base version %" PRIu64 " Base level %d, inputs: [",
391 input_version_->GetVersionNumber(), start_level_);
392 if (write < 0 || write >= len) {
393 return;
394 }
395
396 for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
397 if (level_iter > 0) {
398 write += snprintf(output + write, len - write, "], [");
399 if (write < 0 || write >= len) {
400 return;
401 }
402 }
403 write +=
404 InputSummary(inputs_[level_iter].files, output + write, len - write);
405 if (write < 0 || write >= len) {
406 return;
407 }
408 }
409
410 snprintf(output + write, len - write, "]");
411 }
412
413 uint64_t Compaction::OutputFilePreallocationSize() const {
414 uint64_t preallocation_size = 0;
415
416 if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel ||
417 output_level() > 0) {
418 preallocation_size = max_output_file_size_;
419 } else {
420 // output_level() == 0
421 assert(num_input_levels() > 0);
422 for (const auto& f : inputs_[0].files) {
423 preallocation_size += f->fd.GetFileSize();
424 }
425 }
426 // Over-estimate slightly so we don't end up just barely crossing
427 // the threshold
428 return preallocation_size + (preallocation_size / 10);
429 }
430
431 std::unique_ptr<CompactionFilter> Compaction::CreateCompactionFilter() const {
432 if (!cfd_->ioptions()->compaction_filter_factory) {
433 return nullptr;
434 }
435
436 CompactionFilter::Context context;
437 context.is_full_compaction = is_full_compaction_;
438 context.is_manual_compaction = is_manual_compaction_;
439 context.column_family_id = cfd_->GetID();
440 return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter(
441 context);
442 }
443
444 bool Compaction::IsOutputLevelEmpty() const {
445 return inputs_.back().level != output_level_ || inputs_.back().empty();
446 }
447
448 bool Compaction::ShouldFormSubcompactions() const {
449 if (immutable_cf_options_.max_subcompactions <= 1 || cfd_ == nullptr) {
450 return false;
451 }
452 if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
453 return start_level_ == 0 && output_level_ > 0 && !IsOutputLevelEmpty();
454 } else if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
455 return number_levels_ > 1 && output_level_ > 0;
456 } else {
457 return false;
458 }
459 }
460
461 } // namespace rocksdb