]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based/index_builder.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / table / block_based / index_builder.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
f67539c2
TL
10#include "table/block_based/index_builder.h"
11
7c673cae 12#include <assert.h>
f67539c2 13#include <cinttypes>
7c673cae
FG
14
15#include <list>
16#include <string>
17
18#include "rocksdb/comparator.h"
19#include "rocksdb/flush_block_policy.h"
f67539c2 20#include "table/block_based/partitioned_filter_block.h"
7c673cae 21#include "table/format.h"
7c673cae
FG
22
23// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
f67539c2 24namespace ROCKSDB_NAMESPACE {
7c673cae
FG
25// using namespace rocksdb;
26// Create a index builder based on its type.
27IndexBuilder* IndexBuilder::CreateIndexBuilder(
28 BlockBasedTableOptions::IndexType index_type,
29 const InternalKeyComparator* comparator,
30 const InternalKeySliceTransform* int_key_slice_transform,
11fdf7f2 31 const bool use_value_delta_encoding,
7c673cae 32 const BlockBasedTableOptions& table_opt) {
11fdf7f2 33 IndexBuilder* result = nullptr;
7c673cae
FG
34 switch (index_type) {
35 case BlockBasedTableOptions::kBinarySearch: {
11fdf7f2
TL
36 result = new ShortenedIndexBuilder(
37 comparator, table_opt.index_block_restart_interval,
f67539c2
TL
38 table_opt.format_version, use_value_delta_encoding,
39 table_opt.index_shortening, /* include_first_key */ false);
20effc67
TL
40 break;
41 }
7c673cae 42 case BlockBasedTableOptions::kHashSearch: {
f67539c2
TL
43 // Currently kHashSearch is incompatible with index_block_restart_interval
44 // > 1
45 assert(table_opt.index_block_restart_interval == 1);
46 result = new HashIndexBuilder(
47 comparator, int_key_slice_transform,
48 table_opt.index_block_restart_interval, table_opt.format_version,
49 use_value_delta_encoding, table_opt.index_shortening);
20effc67
TL
50 break;
51 }
7c673cae 52 case BlockBasedTableOptions::kTwoLevelIndexSearch: {
11fdf7f2
TL
53 result = PartitionedIndexBuilder::CreateIndexBuilder(
54 comparator, use_value_delta_encoding, table_opt);
20effc67
TL
55 break;
56 }
f67539c2
TL
57 case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
58 result = new ShortenedIndexBuilder(
59 comparator, table_opt.index_block_restart_interval,
60 table_opt.format_version, use_value_delta_encoding,
61 table_opt.index_shortening, /* include_first_key */ true);
20effc67
TL
62 break;
63 }
7c673cae
FG
64 default: {
65 assert(!"Do not recognize the index type ");
20effc67
TL
66 break;
67 }
7c673cae 68 }
11fdf7f2 69 return result;
7c673cae
FG
70}
71
72PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
73 const InternalKeyComparator* comparator,
11fdf7f2 74 const bool use_value_delta_encoding,
7c673cae 75 const BlockBasedTableOptions& table_opt) {
11fdf7f2
TL
76 return new PartitionedIndexBuilder(comparator, table_opt,
77 use_value_delta_encoding);
7c673cae
FG
78}
79
80PartitionedIndexBuilder::PartitionedIndexBuilder(
81 const InternalKeyComparator* comparator,
11fdf7f2
TL
82 const BlockBasedTableOptions& table_opt,
83 const bool use_value_delta_encoding)
7c673cae 84 : IndexBuilder(comparator),
11fdf7f2
TL
85 index_block_builder_(table_opt.index_block_restart_interval,
86 true /*use_delta_encoding*/,
87 use_value_delta_encoding),
88 index_block_builder_without_seq_(table_opt.index_block_restart_interval,
89 true /*use_delta_encoding*/,
90 use_value_delta_encoding),
7c673cae 91 sub_index_builder_(nullptr),
11fdf7f2
TL
92 table_opt_(table_opt),
93 // We start by false. After each partition we revise the value based on
94 // what the sub_index_builder has decided. If the feature is disabled
95 // entirely, this will be set to true after switching the first
96 // sub_index_builder. Otherwise, it could be set to true even one of the
97 // sub_index_builders could not safely exclude seq from the keys, then it
98 // wil be enforced on all sub_index_builders on ::Finish.
99 seperator_is_key_plus_seq_(false),
100 use_value_delta_encoding_(use_value_delta_encoding) {}
7c673cae
FG
101
102PartitionedIndexBuilder::~PartitionedIndexBuilder() {
103 delete sub_index_builder_;
104}
105
106void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
107 assert(sub_index_builder_ == nullptr);
108 sub_index_builder_ = new ShortenedIndexBuilder(
11fdf7f2 109 comparator_, table_opt_.index_block_restart_interval,
f67539c2
TL
110 table_opt_.format_version, use_value_delta_encoding_,
111 table_opt_.index_shortening, /* include_first_key */ false);
20effc67
TL
112
113 // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if
114 // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by
115 // default on Creation) so that flush policy can point to
116 // sub_index_builder_->index_block_builder_
117 if (seperator_is_key_plus_seq_) {
118 sub_index_builder_->seperator_is_key_plus_seq_ = true;
119 }
120
7c673cae
FG
121 flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
122 table_opt_.metadata_block_size, table_opt_.block_size_deviation,
11fdf7f2
TL
123 // Note: this is sub-optimal since sub_index_builder_ could later reset
124 // seperator_is_key_plus_seq_ but the probability of that is low.
125 sub_index_builder_->seperator_is_key_plus_seq_
126 ? sub_index_builder_->index_block_builder_
127 : sub_index_builder_->index_block_builder_without_seq_));
128 partition_cut_requested_ = false;
129}
130
131void PartitionedIndexBuilder::RequestPartitionCut() {
132 partition_cut_requested_ = true;
7c673cae
FG
133}
134
135void PartitionedIndexBuilder::AddIndexEntry(
136 std::string* last_key_in_current_block,
137 const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
138 // Note: to avoid two consecuitive flush in the same method call, we do not
139 // check flush policy when adding the last key
140 if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys
141 if (sub_index_builder_ == nullptr) {
142 MakeNewSubIndexBuilder();
143 }
144 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
145 first_key_in_next_block, block_handle);
20effc67
TL
146 if (!seperator_is_key_plus_seq_ &&
147 sub_index_builder_->seperator_is_key_plus_seq_) {
148 // then we need to apply it to all sub-index builders and reset
149 // flush_policy to point to Block Builder of sub_index_builder_ that store
150 // internal keys.
11fdf7f2 151 seperator_is_key_plus_seq_ = true;
20effc67
TL
152 flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
153 table_opt_.metadata_block_size, table_opt_.block_size_deviation,
154 sub_index_builder_->index_block_builder_));
11fdf7f2 155 }
7c673cae
FG
156 sub_index_last_key_ = std::string(*last_key_in_current_block);
157 entries_.push_back(
158 {sub_index_last_key_,
159 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
160 sub_index_builder_ = nullptr;
161 cut_filter_block = true;
162 } else {
163 // apply flush policy only to non-empty sub_index_builder_
164 if (sub_index_builder_ != nullptr) {
165 std::string handle_encoding;
166 block_handle.EncodeTo(&handle_encoding);
167 bool do_flush =
11fdf7f2 168 partition_cut_requested_ ||
7c673cae
FG
169 flush_policy_->Update(*last_key_in_current_block, handle_encoding);
170 if (do_flush) {
171 entries_.push_back(
172 {sub_index_last_key_,
173 std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
174 cut_filter_block = true;
175 sub_index_builder_ = nullptr;
176 }
177 }
178 if (sub_index_builder_ == nullptr) {
179 MakeNewSubIndexBuilder();
180 }
181 sub_index_builder_->AddIndexEntry(last_key_in_current_block,
182 first_key_in_next_block, block_handle);
183 sub_index_last_key_ = std::string(*last_key_in_current_block);
20effc67
TL
184 if (!seperator_is_key_plus_seq_ &&
185 sub_index_builder_->seperator_is_key_plus_seq_) {
186 // then we need to apply it to all sub-index builders and reset
187 // flush_policy to point to Block Builder of sub_index_builder_ that store
188 // internal keys.
11fdf7f2 189 seperator_is_key_plus_seq_ = true;
20effc67
TL
190 flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
191 table_opt_.metadata_block_size, table_opt_.block_size_deviation,
192 sub_index_builder_->index_block_builder_));
11fdf7f2 193 }
7c673cae
FG
194 }
195}
196
197Status PartitionedIndexBuilder::Finish(
198 IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
11fdf7f2
TL
199 if (partition_cnt_ == 0) {
200 partition_cnt_ = entries_.size();
201 }
7c673cae
FG
202 // It must be set to null after last key is added
203 assert(sub_index_builder_ == nullptr);
204 if (finishing_indexes == true) {
205 Entry& last_entry = entries_.front();
206 std::string handle_encoding;
207 last_partition_block_handle.EncodeTo(&handle_encoding);
11fdf7f2
TL
208 std::string handle_delta_encoding;
209 PutVarsignedint64(
210 &handle_delta_encoding,
211 last_partition_block_handle.size() - last_encoded_handle_.size());
212 last_encoded_handle_ = last_partition_block_handle;
213 const Slice handle_delta_encoding_slice(handle_delta_encoding);
214 index_block_builder_.Add(last_entry.key, handle_encoding,
215 &handle_delta_encoding_slice);
216 if (!seperator_is_key_plus_seq_) {
217 index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
218 handle_encoding,
219 &handle_delta_encoding_slice);
220 }
7c673cae
FG
221 entries_.pop_front();
222 }
223 // If there is no sub_index left, then return the 2nd level index.
224 if (UNLIKELY(entries_.empty())) {
11fdf7f2
TL
225 if (seperator_is_key_plus_seq_) {
226 index_blocks->index_block_contents = index_block_builder_.Finish();
227 } else {
228 index_blocks->index_block_contents =
229 index_block_builder_without_seq_.Finish();
230 }
231 top_level_index_size_ = index_blocks->index_block_contents.size();
232 index_size_ += top_level_index_size_;
7c673cae
FG
233 return Status::OK();
234 } else {
235 // Finish the next partition index in line and Incomplete() to indicate we
236 // expect more calls to Finish
237 Entry& entry = entries_.front();
11fdf7f2
TL
238 // Apply the policy to all sub-indexes
239 entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
7c673cae 240 auto s = entry.value->Finish(index_blocks);
11fdf7f2 241 index_size_ += index_blocks->index_block_contents.size();
7c673cae
FG
242 finishing_indexes = true;
243 return s.ok() ? Status::Incomplete() : s;
244 }
245}
246
11fdf7f2 247size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
f67539c2 248} // namespace ROCKSDB_NAMESPACE