]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/index_builder.cc
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / rocksdb / table / index_builder.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "table/index_builder.h"
11#include <assert.h>
12#include <inttypes.h>
13
14#include <list>
15#include <string>
16
17#include "rocksdb/comparator.h"
18#include "rocksdb/flush_block_policy.h"
19#include "table/format.h"
20#include "table/partitioned_filter_block.h"
21
22// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
23namespace rocksdb {
24// using namespace rocksdb;
25// Create a index builder based on its type.
26IndexBuilder* IndexBuilder::CreateIndexBuilder(
27 BlockBasedTableOptions::IndexType index_type,
28 const InternalKeyComparator* comparator,
29 const InternalKeySliceTransform* int_key_slice_transform,
11fdf7f2 30 const bool use_value_delta_encoding,
7c673cae 31 const BlockBasedTableOptions& table_opt) {
11fdf7f2 32 IndexBuilder* result = nullptr;
7c673cae
FG
33 switch (index_type) {
34 case BlockBasedTableOptions::kBinarySearch: {
11fdf7f2
TL
35 result = new ShortenedIndexBuilder(
36 comparator, table_opt.index_block_restart_interval,
37 table_opt.format_version, use_value_delta_encoding);
7c673cae 38 }
11fdf7f2 39 break;
7c673cae 40 case BlockBasedTableOptions::kHashSearch: {
11fdf7f2
TL
41 result = new HashIndexBuilder(comparator, int_key_slice_transform,
42 table_opt.index_block_restart_interval,
43 table_opt.format_version,
44 use_value_delta_encoding);
7c673cae 45 }
11fdf7f2 46 break;
7c673cae 47 case BlockBasedTableOptions::kTwoLevelIndexSearch: {
11fdf7f2
TL
48 result = PartitionedIndexBuilder::CreateIndexBuilder(
49 comparator, use_value_delta_encoding, table_opt);
7c673cae 50 }
11fdf7f2 51 break;
7c673cae
FG
52 default: {
53 assert(!"Do not recognize the index type ");
7c673cae 54 }
11fdf7f2 55 break;
7c673cae 56 }
11fdf7f2 57 return result;
7c673cae
FG
58}
59
60PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
61 const InternalKeyComparator* comparator,
11fdf7f2 62 const bool use_value_delta_encoding,
7c673cae 63 const BlockBasedTableOptions& table_opt) {
11fdf7f2
TL
64 return new PartitionedIndexBuilder(comparator, table_opt,
65 use_value_delta_encoding);
7c673cae
FG
66}
67
68PartitionedIndexBuilder::PartitionedIndexBuilder(
69 const InternalKeyComparator* comparator,
11fdf7f2
TL
70 const BlockBasedTableOptions& table_opt,
71 const bool use_value_delta_encoding)
7c673cae 72 : IndexBuilder(comparator),
11fdf7f2
TL
73 index_block_builder_(table_opt.index_block_restart_interval,
74 true /*use_delta_encoding*/,
75 use_value_delta_encoding),
76 index_block_builder_without_seq_(table_opt.index_block_restart_interval,
77 true /*use_delta_encoding*/,
78 use_value_delta_encoding),
7c673cae 79 sub_index_builder_(nullptr),
11fdf7f2
TL
80 table_opt_(table_opt),
81 // We start by false. After each partition we revise the value based on
82 // what the sub_index_builder has decided. If the feature is disabled
83 // entirely, this will be set to true after switching the first
84 // sub_index_builder. Otherwise, it could be set to true even one of the
85 // sub_index_builders could not safely exclude seq from the keys, then it
86 // wil be enforced on all sub_index_builders on ::Finish.
87 seperator_is_key_plus_seq_(false),
88 use_value_delta_encoding_(use_value_delta_encoding) {}
7c673cae
FG
89
90PartitionedIndexBuilder::~PartitionedIndexBuilder() {
91 delete sub_index_builder_;
92}
93
94void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
95 assert(sub_index_builder_ == nullptr);
96 sub_index_builder_ = new ShortenedIndexBuilder(
11fdf7f2
TL
97 comparator_, table_opt_.index_block_restart_interval,
98 table_opt_.format_version, use_value_delta_encoding_);
7c673cae
FG
99 flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
100 table_opt_.metadata_block_size, table_opt_.block_size_deviation,
11fdf7f2
TL
101 // Note: this is sub-optimal since sub_index_builder_ could later reset
102 // seperator_is_key_plus_seq_ but the probability of that is low.
103 sub_index_builder_->seperator_is_key_plus_seq_
104 ? sub_index_builder_->index_block_builder_
105 : sub_index_builder_->index_block_builder_without_seq_));
106 partition_cut_requested_ = false;
107}
108
109void PartitionedIndexBuilder::RequestPartitionCut() {
110 partition_cut_requested_ = true;
7c673cae
FG
111}
112
113void PartitionedIndexBuilder::AddIndexEntry(
114 std::string* last_key_in_current_block,
115 const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
116 // Note: to avoid two consecuitive flush in the same method call, we do not
117 // check flush policy when adding the last key
118 if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys
119 if (sub_index_builder_ == nullptr) {
120 MakeNewSubIndexBuilder();
121 }
122 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
123 first_key_in_next_block, block_handle);
11fdf7f2
TL
124 if (sub_index_builder_->seperator_is_key_plus_seq_) {
125 // then we need to apply it to all sub-index builders
126 seperator_is_key_plus_seq_ = true;
127 }
7c673cae
FG
128 sub_index_last_key_ = std::string(*last_key_in_current_block);
129 entries_.push_back(
130 {sub_index_last_key_,
131 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
132 sub_index_builder_ = nullptr;
133 cut_filter_block = true;
134 } else {
135 // apply flush policy only to non-empty sub_index_builder_
136 if (sub_index_builder_ != nullptr) {
137 std::string handle_encoding;
138 block_handle.EncodeTo(&handle_encoding);
139 bool do_flush =
11fdf7f2 140 partition_cut_requested_ ||
7c673cae
FG
141 flush_policy_->Update(*last_key_in_current_block, handle_encoding);
142 if (do_flush) {
143 entries_.push_back(
144 {sub_index_last_key_,
145 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
146 cut_filter_block = true;
147 sub_index_builder_ = nullptr;
148 }
149 }
150 if (sub_index_builder_ == nullptr) {
151 MakeNewSubIndexBuilder();
152 }
153 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
154 first_key_in_next_block, block_handle);
155 sub_index_last_key_ = std::string(*last_key_in_current_block);
11fdf7f2
TL
156 if (sub_index_builder_->seperator_is_key_plus_seq_) {
157 // then we need to apply it to all sub-index builders
158 seperator_is_key_plus_seq_ = true;
159 }
7c673cae
FG
160 }
161}
162
163Status PartitionedIndexBuilder::Finish(
164 IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
11fdf7f2
TL
165 if (partition_cnt_ == 0) {
166 partition_cnt_ = entries_.size();
167 }
7c673cae
FG
168 // It must be set to null after last key is added
169 assert(sub_index_builder_ == nullptr);
170 if (finishing_indexes == true) {
171 Entry& last_entry = entries_.front();
172 std::string handle_encoding;
173 last_partition_block_handle.EncodeTo(&handle_encoding);
11fdf7f2
TL
174 std::string handle_delta_encoding;
175 PutVarsignedint64(
176 &handle_delta_encoding,
177 last_partition_block_handle.size() - last_encoded_handle_.size());
178 last_encoded_handle_ = last_partition_block_handle;
179 const Slice handle_delta_encoding_slice(handle_delta_encoding);
180 index_block_builder_.Add(last_entry.key, handle_encoding,
181 &handle_delta_encoding_slice);
182 if (!seperator_is_key_plus_seq_) {
183 index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
184 handle_encoding,
185 &handle_delta_encoding_slice);
186 }
7c673cae
FG
187 entries_.pop_front();
188 }
189 // If there is no sub_index left, then return the 2nd level index.
190 if (UNLIKELY(entries_.empty())) {
11fdf7f2
TL
191 if (seperator_is_key_plus_seq_) {
192 index_blocks->index_block_contents = index_block_builder_.Finish();
193 } else {
194 index_blocks->index_block_contents =
195 index_block_builder_without_seq_.Finish();
196 }
197 top_level_index_size_ = index_blocks->index_block_contents.size();
198 index_size_ += top_level_index_size_;
7c673cae
FG
199 return Status::OK();
200 } else {
201 // Finish the next partition index in line and Incomplete() to indicate we
202 // expect more calls to Finish
203 Entry& entry = entries_.front();
11fdf7f2
TL
204 // Apply the policy to all sub-indexes
205 entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
7c673cae 206 auto s = entry.value->Finish(index_blocks);
11fdf7f2 207 index_size_ += index_blocks->index_block_contents.size();
7c673cae
FG
208 finishing_indexes = true;
209 return s.ok() ? Status::Incomplete() : s;
210 }
211}
212
11fdf7f2 213size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
7c673cae 214} // namespace rocksdb