]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
f67539c2 TL |
10 | #include "table/block_based/index_builder.h" |
11 | ||
7c673cae | 12 | #include <assert.h> |
7c673cae | 13 | |
1e59de90 | 14 | #include <cinttypes> |
7c673cae FG |
15 | #include <list> |
16 | #include <string> | |
17 | ||
1e59de90 | 18 | #include "db/dbformat.h" |
7c673cae FG |
19 | #include "rocksdb/comparator.h" |
20 | #include "rocksdb/flush_block_policy.h" | |
f67539c2 | 21 | #include "table/block_based/partitioned_filter_block.h" |
7c673cae | 22 | #include "table/format.h" |
7c673cae | 23 | |
f67539c2 | 24 | namespace ROCKSDB_NAMESPACE { |
1e59de90 | 25 | |
7c673cae FG |
26 | // Create a index builder based on its type. |
27 | IndexBuilder* IndexBuilder::CreateIndexBuilder( | |
28 | BlockBasedTableOptions::IndexType index_type, | |
29 | const InternalKeyComparator* comparator, | |
30 | const InternalKeySliceTransform* int_key_slice_transform, | |
11fdf7f2 | 31 | const bool use_value_delta_encoding, |
7c673cae | 32 | const BlockBasedTableOptions& table_opt) { |
11fdf7f2 | 33 | IndexBuilder* result = nullptr; |
7c673cae FG |
34 | switch (index_type) { |
35 | case BlockBasedTableOptions::kBinarySearch: { | |
11fdf7f2 TL |
36 | result = new ShortenedIndexBuilder( |
37 | comparator, table_opt.index_block_restart_interval, | |
f67539c2 TL |
38 | table_opt.format_version, use_value_delta_encoding, |
39 | table_opt.index_shortening, /* include_first_key */ false); | |
20effc67 TL |
40 | break; |
41 | } | |
7c673cae | 42 | case BlockBasedTableOptions::kHashSearch: { |
f67539c2 TL |
43 | // Currently kHashSearch is incompatible with index_block_restart_interval |
44 | // > 1 | |
45 | assert(table_opt.index_block_restart_interval == 1); | |
46 | result = new HashIndexBuilder( | |
47 | comparator, int_key_slice_transform, | |
48 | table_opt.index_block_restart_interval, table_opt.format_version, | |
49 | use_value_delta_encoding, table_opt.index_shortening); | |
20effc67 TL |
50 | break; |
51 | } | |
7c673cae | 52 | case BlockBasedTableOptions::kTwoLevelIndexSearch: { |
11fdf7f2 TL |
53 | result = PartitionedIndexBuilder::CreateIndexBuilder( |
54 | comparator, use_value_delta_encoding, table_opt); | |
20effc67 TL |
55 | break; |
56 | } | |
f67539c2 TL |
57 | case BlockBasedTableOptions::kBinarySearchWithFirstKey: { |
58 | result = new ShortenedIndexBuilder( | |
59 | comparator, table_opt.index_block_restart_interval, | |
60 | table_opt.format_version, use_value_delta_encoding, | |
61 | table_opt.index_shortening, /* include_first_key */ true); | |
20effc67 TL |
62 | break; |
63 | } | |
7c673cae FG |
64 | default: { |
65 | assert(!"Do not recognize the index type "); | |
20effc67 TL |
66 | break; |
67 | } | |
7c673cae | 68 | } |
11fdf7f2 | 69 | return result; |
7c673cae FG |
70 | } |
71 | ||
1e59de90 TL |
72 | void ShortenedIndexBuilder::FindShortestInternalKeySeparator( |
73 | const Comparator& comparator, std::string* start, const Slice& limit) { | |
74 | // Attempt to shorten the user portion of the key | |
75 | Slice user_start = ExtractUserKey(*start); | |
76 | Slice user_limit = ExtractUserKey(limit); | |
77 | std::string tmp(user_start.data(), user_start.size()); | |
78 | comparator.FindShortestSeparator(&tmp, user_limit); | |
79 | if (tmp.size() <= user_start.size() && | |
80 | comparator.Compare(user_start, tmp) < 0) { | |
81 | // User key has become shorter physically, but larger logically. | |
82 | // Tack on the earliest possible number to the shortened user key. | |
83 | PutFixed64(&tmp, | |
84 | PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); | |
85 | assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0); | |
86 | assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0); | |
87 | start->swap(tmp); | |
88 | } | |
89 | } | |
90 | ||
91 | void ShortenedIndexBuilder::FindShortInternalKeySuccessor( | |
92 | const Comparator& comparator, std::string* key) { | |
93 | Slice user_key = ExtractUserKey(*key); | |
94 | std::string tmp(user_key.data(), user_key.size()); | |
95 | comparator.FindShortSuccessor(&tmp); | |
96 | if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) { | |
97 | // User key has become shorter physically, but larger logically. | |
98 | // Tack on the earliest possible number to the shortened user key. | |
99 | PutFixed64(&tmp, | |
100 | PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); | |
101 | assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0); | |
102 | key->swap(tmp); | |
103 | } | |
104 | } | |
105 | ||
7c673cae FG |
106 | PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder( |
107 | const InternalKeyComparator* comparator, | |
11fdf7f2 | 108 | const bool use_value_delta_encoding, |
7c673cae | 109 | const BlockBasedTableOptions& table_opt) { |
11fdf7f2 TL |
110 | return new PartitionedIndexBuilder(comparator, table_opt, |
111 | use_value_delta_encoding); | |
7c673cae FG |
112 | } |
113 | ||
114 | PartitionedIndexBuilder::PartitionedIndexBuilder( | |
115 | const InternalKeyComparator* comparator, | |
11fdf7f2 TL |
116 | const BlockBasedTableOptions& table_opt, |
117 | const bool use_value_delta_encoding) | |
7c673cae | 118 | : IndexBuilder(comparator), |
11fdf7f2 TL |
119 | index_block_builder_(table_opt.index_block_restart_interval, |
120 | true /*use_delta_encoding*/, | |
121 | use_value_delta_encoding), | |
122 | index_block_builder_without_seq_(table_opt.index_block_restart_interval, | |
123 | true /*use_delta_encoding*/, | |
124 | use_value_delta_encoding), | |
7c673cae | 125 | sub_index_builder_(nullptr), |
11fdf7f2 TL |
126 | table_opt_(table_opt), |
127 | // We start by false. After each partition we revise the value based on | |
128 | // what the sub_index_builder has decided. If the feature is disabled | |
129 | // entirely, this will be set to true after switching the first | |
130 | // sub_index_builder. Otherwise, it could be set to true even one of the | |
131 | // sub_index_builders could not safely exclude seq from the keys, then it | |
132 | // wil be enforced on all sub_index_builders on ::Finish. | |
133 | seperator_is_key_plus_seq_(false), | |
134 | use_value_delta_encoding_(use_value_delta_encoding) {} | |
7c673cae FG |
135 | |
136 | PartitionedIndexBuilder::~PartitionedIndexBuilder() { | |
137 | delete sub_index_builder_; | |
138 | } | |
139 | ||
140 | void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { | |
141 | assert(sub_index_builder_ == nullptr); | |
142 | sub_index_builder_ = new ShortenedIndexBuilder( | |
11fdf7f2 | 143 | comparator_, table_opt_.index_block_restart_interval, |
f67539c2 TL |
144 | table_opt_.format_version, use_value_delta_encoding_, |
145 | table_opt_.index_shortening, /* include_first_key */ false); | |
20effc67 TL |
146 | |
147 | // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if | |
148 | // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by | |
149 | // default on Creation) so that flush policy can point to | |
150 | // sub_index_builder_->index_block_builder_ | |
151 | if (seperator_is_key_plus_seq_) { | |
152 | sub_index_builder_->seperator_is_key_plus_seq_ = true; | |
153 | } | |
154 | ||
7c673cae FG |
155 | flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( |
156 | table_opt_.metadata_block_size, table_opt_.block_size_deviation, | |
11fdf7f2 TL |
157 | // Note: this is sub-optimal since sub_index_builder_ could later reset |
158 | // seperator_is_key_plus_seq_ but the probability of that is low. | |
159 | sub_index_builder_->seperator_is_key_plus_seq_ | |
160 | ? sub_index_builder_->index_block_builder_ | |
161 | : sub_index_builder_->index_block_builder_without_seq_)); | |
162 | partition_cut_requested_ = false; | |
163 | } | |
164 | ||
165 | void PartitionedIndexBuilder::RequestPartitionCut() { | |
166 | partition_cut_requested_ = true; | |
7c673cae FG |
167 | } |
168 | ||
169 | void PartitionedIndexBuilder::AddIndexEntry( | |
170 | std::string* last_key_in_current_block, | |
171 | const Slice* first_key_in_next_block, const BlockHandle& block_handle) { | |
172 | // Note: to avoid two consecuitive flush in the same method call, we do not | |
173 | // check flush policy when adding the last key | |
174 | if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys | |
175 | if (sub_index_builder_ == nullptr) { | |
176 | MakeNewSubIndexBuilder(); | |
177 | } | |
178 | sub_index_builder_->AddIndexEntry(last_key_in_current_block, | |
179 | first_key_in_next_block, block_handle); | |
20effc67 TL |
180 | if (!seperator_is_key_plus_seq_ && |
181 | sub_index_builder_->seperator_is_key_plus_seq_) { | |
182 | // then we need to apply it to all sub-index builders and reset | |
183 | // flush_policy to point to Block Builder of sub_index_builder_ that store | |
184 | // internal keys. | |
11fdf7f2 | 185 | seperator_is_key_plus_seq_ = true; |
20effc67 TL |
186 | flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( |
187 | table_opt_.metadata_block_size, table_opt_.block_size_deviation, | |
188 | sub_index_builder_->index_block_builder_)); | |
11fdf7f2 | 189 | } |
7c673cae FG |
190 | sub_index_last_key_ = std::string(*last_key_in_current_block); |
191 | entries_.push_back( | |
192 | {sub_index_last_key_, | |
193 | std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)}); | |
194 | sub_index_builder_ = nullptr; | |
195 | cut_filter_block = true; | |
196 | } else { | |
197 | // apply flush policy only to non-empty sub_index_builder_ | |
198 | if (sub_index_builder_ != nullptr) { | |
199 | std::string handle_encoding; | |
200 | block_handle.EncodeTo(&handle_encoding); | |
201 | bool do_flush = | |
11fdf7f2 | 202 | partition_cut_requested_ || |
7c673cae FG |
203 | flush_policy_->Update(*last_key_in_current_block, handle_encoding); |
204 | if (do_flush) { | |
205 | entries_.push_back( | |
206 | {sub_index_last_key_, | |
207 | std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)}); | |
208 | cut_filter_block = true; | |
209 | sub_index_builder_ = nullptr; | |
210 | } | |
211 | } | |
212 | if (sub_index_builder_ == nullptr) { | |
213 | MakeNewSubIndexBuilder(); | |
214 | } | |
215 | sub_index_builder_->AddIndexEntry(last_key_in_current_block, | |
216 | first_key_in_next_block, block_handle); | |
217 | sub_index_last_key_ = std::string(*last_key_in_current_block); | |
20effc67 TL |
218 | if (!seperator_is_key_plus_seq_ && |
219 | sub_index_builder_->seperator_is_key_plus_seq_) { | |
220 | // then we need to apply it to all sub-index builders and reset | |
221 | // flush_policy to point to Block Builder of sub_index_builder_ that store | |
222 | // internal keys. | |
11fdf7f2 | 223 | seperator_is_key_plus_seq_ = true; |
20effc67 TL |
224 | flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( |
225 | table_opt_.metadata_block_size, table_opt_.block_size_deviation, | |
226 | sub_index_builder_->index_block_builder_)); | |
11fdf7f2 | 227 | } |
7c673cae FG |
228 | } |
229 | } | |
230 | ||
231 | Status PartitionedIndexBuilder::Finish( | |
232 | IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) { | |
11fdf7f2 TL |
233 | if (partition_cnt_ == 0) { |
234 | partition_cnt_ = entries_.size(); | |
235 | } | |
7c673cae FG |
236 | // It must be set to null after last key is added |
237 | assert(sub_index_builder_ == nullptr); | |
238 | if (finishing_indexes == true) { | |
239 | Entry& last_entry = entries_.front(); | |
240 | std::string handle_encoding; | |
241 | last_partition_block_handle.EncodeTo(&handle_encoding); | |
11fdf7f2 TL |
242 | std::string handle_delta_encoding; |
243 | PutVarsignedint64( | |
244 | &handle_delta_encoding, | |
245 | last_partition_block_handle.size() - last_encoded_handle_.size()); | |
246 | last_encoded_handle_ = last_partition_block_handle; | |
247 | const Slice handle_delta_encoding_slice(handle_delta_encoding); | |
248 | index_block_builder_.Add(last_entry.key, handle_encoding, | |
249 | &handle_delta_encoding_slice); | |
250 | if (!seperator_is_key_plus_seq_) { | |
251 | index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key), | |
252 | handle_encoding, | |
253 | &handle_delta_encoding_slice); | |
254 | } | |
7c673cae FG |
255 | entries_.pop_front(); |
256 | } | |
257 | // If there is no sub_index left, then return the 2nd level index. | |
258 | if (UNLIKELY(entries_.empty())) { | |
11fdf7f2 TL |
259 | if (seperator_is_key_plus_seq_) { |
260 | index_blocks->index_block_contents = index_block_builder_.Finish(); | |
261 | } else { | |
262 | index_blocks->index_block_contents = | |
263 | index_block_builder_without_seq_.Finish(); | |
264 | } | |
265 | top_level_index_size_ = index_blocks->index_block_contents.size(); | |
266 | index_size_ += top_level_index_size_; | |
7c673cae FG |
267 | return Status::OK(); |
268 | } else { | |
269 | // Finish the next partition index in line and Incomplete() to indicate we | |
270 | // expect more calls to Finish | |
271 | Entry& entry = entries_.front(); | |
11fdf7f2 TL |
272 | // Apply the policy to all sub-indexes |
273 | entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_; | |
7c673cae | 274 | auto s = entry.value->Finish(index_blocks); |
11fdf7f2 | 275 | index_size_ += index_blocks->index_block_contents.size(); |
7c673cae FG |
276 | finishing_indexes = true; |
277 | return s.ok() ? Status::Incomplete() : s; | |
278 | } | |
279 | } | |
280 | ||
11fdf7f2 | 281 | size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; } |
f67539c2 | 282 | } // namespace ROCKSDB_NAMESPACE |