1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #include "table/block_based/block_based_table_iterator.h"
11 namespace ROCKSDB_NAMESPACE
{
12 void BlockBasedTableIterator::Seek(const Slice
& target
) { SeekImpl(&target
); }
14 void BlockBasedTableIterator::SeekToFirst() { SeekImpl(nullptr); }
16 void BlockBasedTableIterator::SeekImpl(const Slice
* target
) {
17 is_out_of_bound_
= false;
18 is_at_first_key_from_index_
= false;
19 if (target
&& !CheckPrefixMayMatch(*target
, IterDirection::kForward
)) {
24 bool need_seek_index
= true;
25 if (block_iter_points_to_real_block_
&& block_iter_
.Valid()) {
27 prev_block_offset_
= index_iter_
->value().handle
.offset();
30 // We can avoid an index seek if:
31 // 1. The new seek key is larger than the current key
32 // 2. The new seek key is within the upper bound of the block
33 // Since we don't necessarily know the internal key for either
34 // the current key or the upper bound, we check user keys and
35 // exclude the equality case. Considering internal keys can
36 // improve for the boundary cases, but it would complicate the
38 if (user_comparator_
.Compare(ExtractUserKey(*target
),
39 block_iter_
.user_key()) > 0 &&
40 user_comparator_
.Compare(ExtractUserKey(*target
),
41 index_iter_
->user_key()) < 0) {
42 need_seek_index
= false;
47 if (need_seek_index
) {
49 index_iter_
->Seek(*target
);
51 index_iter_
->SeekToFirst();
54 if (!index_iter_
->Valid()) {
60 IndexValue v
= index_iter_
->value();
61 const bool same_block
= block_iter_points_to_real_block_
&&
62 v
.handle
.offset() == prev_block_offset_
;
64 if (!v
.first_internal_key
.empty() && !same_block
&&
65 (!target
|| icomp_
.Compare(*target
, v
.first_internal_key
) <= 0) &&
66 allow_unprepared_value_
) {
67 // Index contains the first key of the block, and it's >= target.
68 // We can defer reading the block.
69 is_at_first_key_from_index_
= true;
70 // ResetDataIter() will invalidate block_iter_. Thus, there is no need to
71 // call CheckDataBlockWithinUpperBound() to check for iterate_upper_bound
72 // as that will be done later when the data block is actually read.
75 // Need to use the data block.
79 // When the user does a reseek, the iterate_upper_bound might have
80 // changed. CheckDataBlockWithinUpperBound() needs to be called
81 // explicitly if the reseek ends up in the same data block.
82 // If the reseek ends up in a different block, InitDataBlock() will do
83 // the iterator upper bound check.
84 CheckDataBlockWithinUpperBound();
88 block_iter_
.Seek(*target
);
90 block_iter_
.SeekToFirst();
98 assert(!Valid() || icomp_
.Compare(*target
, key()) <= 0);
102 void BlockBasedTableIterator::SeekForPrev(const Slice
& target
) {
103 is_out_of_bound_
= false;
104 is_at_first_key_from_index_
= false;
105 // For now totally disable prefix seek in auto prefix mode because we don't
107 if (!CheckPrefixMayMatch(target
, IterDirection::kBackward
)) {
112 SavePrevIndexValue();
114 // Call Seek() rather than SeekForPrev() in the index block, because the
115 // target data block will likely to contain the position for `target`, the
116 // same as Seek(), rather than than before.
117 // For example, if we have three data blocks, each containing two keys:
118 // [2, 4] [6, 8] [10, 12]
119 // (the keys in the index block would be [4, 8, 12])
120 // and the user calls SeekForPrev(7), we need to go to the second block,
121 // just like if they call Seek(7).
122 // The only case where the block is difference is when they seek to a position
123 // in the boundary. For example, if they SeekForPrev(5), we should go to the
124 // first block, rather than the second. However, we don't have the information
125 // to distinguish the two unless we read the second block. In this case, we'll
126 // end up with reading two blocks.
127 index_iter_
->Seek(target
);
129 if (!index_iter_
->Valid()) {
130 auto seek_status
= index_iter_
->status();
131 // Check for IO error
132 if (!seek_status
.IsNotFound() && !seek_status
.ok()) {
137 // With prefix index, Seek() returns NotFound if the prefix doesn't exist
138 if (seek_status
.IsNotFound()) {
139 // Any key less than the target is fine for prefix seek
143 index_iter_
->SeekToLast();
145 // Check for IO error
146 if (!index_iter_
->Valid()) {
154 block_iter_
.SeekForPrev(target
);
157 CheckDataBlockWithinUpperBound();
158 assert(!block_iter_
.Valid() ||
159 icomp_
.Compare(target
, block_iter_
.key()) >= 0);
162 void BlockBasedTableIterator::SeekToLast() {
163 is_out_of_bound_
= false;
164 is_at_first_key_from_index_
= false;
165 SavePrevIndexValue();
166 index_iter_
->SeekToLast();
167 if (!index_iter_
->Valid()) {
172 block_iter_
.SeekToLast();
174 CheckDataBlockWithinUpperBound();
177 void BlockBasedTableIterator::Next() {
178 if (is_at_first_key_from_index_
&& !MaterializeCurrentBlock()) {
181 assert(block_iter_points_to_real_block_
);
187 bool BlockBasedTableIterator::NextAndGetResult(IterateResult
* result
) {
189 bool is_valid
= Valid();
192 result
->bound_check_result
= UpperBoundCheckResult();
193 result
->value_prepared
= !is_at_first_key_from_index_
;
198 void BlockBasedTableIterator::Prev() {
199 if (is_at_first_key_from_index_
) {
200 is_at_first_key_from_index_
= false;
203 if (!index_iter_
->Valid()) {
208 block_iter_
.SeekToLast();
210 assert(block_iter_points_to_real_block_
);
217 void BlockBasedTableIterator::InitDataBlock() {
218 BlockHandle data_block_handle
= index_iter_
->value().handle
;
219 if (!block_iter_points_to_real_block_
||
220 data_block_handle
.offset() != prev_block_offset_
||
221 // if previous attempt of reading the block missed cache, try again
222 block_iter_
.status().IsIncomplete()) {
223 if (block_iter_points_to_real_block_
) {
226 auto* rep
= table_
->get_rep();
228 bool is_for_compaction
=
229 lookup_context_
.caller
== TableReaderCaller::kCompaction
;
230 // Prefetch additional data for range scans (iterators).
231 // Implicit auto readahead:
232 // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
233 // Explicit user requested readahead:
234 // Enabled from the very first IO when ReadOptions.readahead_size is set.
235 block_prefetcher_
.PrefetchIfNeeded(rep
, data_block_handle
,
236 read_options_
.readahead_size
,
240 table_
->NewDataBlockIterator
<DataBlockIter
>(
241 read_options_
, data_block_handle
, &block_iter_
, BlockType::kData
,
242 /*get_context=*/nullptr, &lookup_context_
, s
,
243 block_prefetcher_
.prefetch_buffer(),
244 /*for_compaction=*/is_for_compaction
);
245 block_iter_points_to_real_block_
= true;
246 CheckDataBlockWithinUpperBound();
250 bool BlockBasedTableIterator::MaterializeCurrentBlock() {
251 assert(is_at_first_key_from_index_
);
252 assert(!block_iter_points_to_real_block_
);
253 assert(index_iter_
->Valid());
255 is_at_first_key_from_index_
= false;
257 assert(block_iter_points_to_real_block_
);
259 if (!block_iter_
.status().ok()) {
263 block_iter_
.SeekToFirst();
265 if (!block_iter_
.Valid() ||
266 icomp_
.Compare(block_iter_
.key(),
267 index_iter_
->value().first_internal_key
) != 0) {
268 block_iter_
.Invalidate(Status::Corruption(
269 "first key in index doesn't match first key in block"));
276 void BlockBasedTableIterator::FindKeyForward() {
277 // This method's code is kept short to make it likely to be inlined.
279 assert(!is_out_of_bound_
);
280 assert(block_iter_points_to_real_block_
);
282 if (!block_iter_
.Valid()) {
283 // This is the only call site of FindBlockForward(), but it's extracted into
284 // a separate method to keep FindKeyForward() short and likely to be
285 // inlined. When transitioning to a different block, we call
286 // FindBlockForward(), which is much longer and is probably not inlined.
289 // This is the fast path that avoids a function call.
293 void BlockBasedTableIterator::FindBlockForward() {
294 // TODO the while loop inherits from two-level-iterator. We don't know
295 // whether a block can be empty so it can be replaced by an "if".
297 if (!block_iter_
.status().ok()) {
300 // Whether next data block is out of upper bound, if there is one.
301 const bool next_block_is_out_of_bound
=
302 read_options_
.iterate_upper_bound
!= nullptr &&
303 block_iter_points_to_real_block_
&&
304 block_upper_bound_check_
== BlockUpperBound::kUpperBoundInCurBlock
;
305 assert(!next_block_is_out_of_bound
||
306 user_comparator_
.CompareWithoutTimestamp(
307 *read_options_
.iterate_upper_bound
, /*a_has_ts=*/false,
308 index_iter_
->user_key(), /*b_has_ts=*/true) <= 0);
311 if (next_block_is_out_of_bound
) {
312 // The next block is out of bound. No need to read it.
313 TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
314 // We need to make sure this is not the last data block before setting
315 // is_out_of_bound_, since the index key for the last data block can be
316 // larger than smallest key of the next file on the same level.
317 if (index_iter_
->Valid()) {
318 is_out_of_bound_
= true;
323 if (!index_iter_
->Valid()) {
327 IndexValue v
= index_iter_
->value();
329 if (!v
.first_internal_key
.empty() && allow_unprepared_value_
) {
330 // Index contains the first key of the block. Defer reading the block.
331 is_at_first_key_from_index_
= true;
336 block_iter_
.SeekToFirst();
337 } while (!block_iter_
.Valid());
340 void BlockBasedTableIterator::FindKeyBackward() {
341 while (!block_iter_
.Valid()) {
342 if (!block_iter_
.status().ok()) {
349 if (index_iter_
->Valid()) {
351 block_iter_
.SeekToLast();
357 // We could have check lower bound here too, but we opt not to do it for
361 void BlockBasedTableIterator::CheckOutOfBound() {
362 if (read_options_
.iterate_upper_bound
!= nullptr &&
363 block_upper_bound_check_
!= BlockUpperBound::kUpperBoundBeyondCurBlock
&&
366 user_comparator_
.CompareWithoutTimestamp(
367 *read_options_
.iterate_upper_bound
, /*a_has_ts=*/false, user_key(),
368 /*b_has_ts=*/true) <= 0;
372 void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
373 if (read_options_
.iterate_upper_bound
!= nullptr &&
374 block_iter_points_to_real_block_
) {
375 block_upper_bound_check_
= (user_comparator_
.CompareWithoutTimestamp(
376 *read_options_
.iterate_upper_bound
,
377 /*a_has_ts=*/false, index_iter_
->user_key(),
378 /*b_has_ts=*/true) > 0)
379 ? BlockUpperBound::kUpperBoundBeyondCurBlock
380 : BlockUpperBound::kUpperBoundInCurBlock
;
383 } // namespace ROCKSDB_NAMESPACE