1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "table/block_based/block_based_table_reader.h"
11 #include "table/block_based/block_based_table_reader_impl.h"
12 #include "table/block_based/block_prefetcher.h"
13 #include "table/block_based/reader_common.h"
15 namespace ROCKSDB_NAMESPACE
{
16 // Iterates over the contents of BlockBasedTable.
17 class BlockBasedTableIterator
: public InternalIteratorBase
<Slice
> {
18 // compaction_readahead_size: its value will only be used if for_compaction =
20 // @param read_options Must outlive this iterator.
22 BlockBasedTableIterator(
23 const BlockBasedTable
* table
, const ReadOptions
& read_options
,
24 const InternalKeyComparator
& icomp
,
25 std::unique_ptr
<InternalIteratorBase
<IndexValue
>>&& index_iter
,
26 bool check_filter
, bool need_upper_bound_check
,
27 const SliceTransform
* prefix_extractor
, TableReaderCaller caller
,
28 size_t compaction_readahead_size
= 0, bool allow_unprepared_value
= false)
29 : index_iter_(std::move(index_iter
)),
31 read_options_(read_options
),
33 user_comparator_(icomp
.user_comparator()),
34 pinned_iters_mgr_(nullptr),
35 prefix_extractor_(prefix_extractor
),
36 lookup_context_(caller
),
38 compaction_readahead_size
,
39 table_
->get_rep()->table_options
.initial_auto_readahead_size
),
40 allow_unprepared_value_(allow_unprepared_value
),
41 block_iter_points_to_real_block_(false),
42 check_filter_(check_filter
),
43 need_upper_bound_check_(need_upper_bound_check
),
44 async_read_in_progress_(false) {}
46 ~BlockBasedTableIterator() {}
48 void Seek(const Slice
& target
) override
;
49 void SeekForPrev(const Slice
& target
) override
;
50 void SeekToFirst() override
;
51 void SeekToLast() override
;
52 void Next() final override
;
53 bool NextAndGetResult(IterateResult
* result
) override
;
55 bool Valid() const override
{
56 return !is_out_of_bound_
&&
57 (is_at_first_key_from_index_
||
58 (block_iter_points_to_real_block_
&& block_iter_
.Valid()));
60 Slice
key() const override
{
62 if (is_at_first_key_from_index_
) {
63 return index_iter_
->value().first_internal_key
;
65 return block_iter_
.key();
68 Slice
user_key() const override
{
70 if (is_at_first_key_from_index_
) {
71 return ExtractUserKey(index_iter_
->value().first_internal_key
);
73 return block_iter_
.user_key();
76 bool PrepareValue() override
{
79 if (!is_at_first_key_from_index_
) {
83 return const_cast<BlockBasedTableIterator
*>(this)
84 ->MaterializeCurrentBlock();
86 Slice
value() const override
{
87 // PrepareValue() must have been called.
88 assert(!is_at_first_key_from_index_
);
91 return block_iter_
.value();
93 Status
status() const override
{
94 // Prefix index set status to NotFound when the prefix does not exist
95 if (!index_iter_
->status().ok() && !index_iter_
->status().IsNotFound()) {
96 return index_iter_
->status();
97 } else if (block_iter_points_to_real_block_
) {
98 return block_iter_
.status();
99 } else if (async_read_in_progress_
) {
100 return Status::TryAgain();
106 inline IterBoundCheck
UpperBoundCheckResult() override
{
107 if (is_out_of_bound_
) {
108 return IterBoundCheck::kOutOfBound
;
109 } else if (block_upper_bound_check_
==
110 BlockUpperBound::kUpperBoundBeyondCurBlock
) {
111 assert(!is_out_of_bound_
);
112 return IterBoundCheck::kInbound
;
114 return IterBoundCheck::kUnknown
;
118 void SetPinnedItersMgr(PinnedIteratorsManager
* pinned_iters_mgr
) override
{
119 pinned_iters_mgr_
= pinned_iters_mgr
;
121 bool IsKeyPinned() const override
{
122 // Our key comes either from block_iter_'s current key
123 // or index_iter_'s current *value*.
124 return pinned_iters_mgr_
&& pinned_iters_mgr_
->PinningEnabled() &&
125 ((is_at_first_key_from_index_
&& index_iter_
->IsValuePinned()) ||
126 (block_iter_points_to_real_block_
&& block_iter_
.IsKeyPinned()));
128 bool IsValuePinned() const override
{
129 assert(!is_at_first_key_from_index_
);
132 // BlockIter::IsValuePinned() is always true. No need to check
133 return pinned_iters_mgr_
&& pinned_iters_mgr_
->PinningEnabled() &&
134 block_iter_points_to_real_block_
;
137 void ResetDataIter() {
138 if (block_iter_points_to_real_block_
) {
139 if (pinned_iters_mgr_
!= nullptr && pinned_iters_mgr_
->PinningEnabled()) {
140 block_iter_
.DelegateCleanupsTo(pinned_iters_mgr_
);
142 block_iter_
.Invalidate(Status::OK());
143 block_iter_points_to_real_block_
= false;
145 block_upper_bound_check_
= BlockUpperBound::kUnknown
;
148 void SavePrevIndexValue() {
149 if (block_iter_points_to_real_block_
) {
150 // Reseek. If they end up with the same data block, we shouldn't re-fetch
151 // the same data block.
152 prev_block_offset_
= index_iter_
->value().handle
.offset();
156 void GetReadaheadState(ReadaheadFileInfo
* readahead_file_info
) override
{
157 if (block_prefetcher_
.prefetch_buffer() != nullptr &&
158 read_options_
.adaptive_readahead
) {
159 block_prefetcher_
.prefetch_buffer()->GetReadaheadState(
160 &(readahead_file_info
->data_block_readahead_info
));
162 index_iter_
->GetReadaheadState(readahead_file_info
);
167 void SetReadaheadState(ReadaheadFileInfo
* readahead_file_info
) override
{
168 if (read_options_
.adaptive_readahead
) {
169 block_prefetcher_
.SetReadaheadState(
170 &(readahead_file_info
->data_block_readahead_info
));
172 index_iter_
->SetReadaheadState(readahead_file_info
);
177 std::unique_ptr
<InternalIteratorBase
<IndexValue
>> index_iter_
;
180 enum class IterDirection
{
184 // This enum indicates whether the upper bound falls into current block
187 // | cur block | <-- (1)
190 // --- <boundary key> ---
193 // | next block | <-- (4)
196 // When the block is smaller than <boundary key>, kUpperBoundInCurBlock
197 // is the value to use. The examples are (1) or (2) in the graph. It means
198 // all keys in the next block or beyond will be out of bound. Keys within
199 // the current block may or may not be out of bound.
200 // When the block is larger or equal to <boundary key>,
201 // kUpperBoundBeyondCurBlock is to be used. The examples are (3) and (4)
202 // in the graph. It means that all keys in the current block is within the
203 // upper bound and keys in the next block may or may not be within the uppder
205 // If the boundary key hasn't been checked against the upper bound,
206 // kUnknown can be used.
207 enum class BlockUpperBound
{
208 kUpperBoundInCurBlock
,
209 kUpperBoundBeyondCurBlock
,
213 const BlockBasedTable
* table_
;
214 const ReadOptions
& read_options_
;
215 const InternalKeyComparator
& icomp_
;
216 UserComparatorWrapper user_comparator_
;
217 PinnedIteratorsManager
* pinned_iters_mgr_
;
218 DataBlockIter block_iter_
;
219 const SliceTransform
* prefix_extractor_
;
220 uint64_t prev_block_offset_
= std::numeric_limits
<uint64_t>::max();
221 BlockCacheLookupContext lookup_context_
;
223 BlockPrefetcher block_prefetcher_
;
225 const bool allow_unprepared_value_
;
226 // True if block_iter_ is initialized and points to the same block
227 // as index iterator.
228 bool block_iter_points_to_real_block_
;
229 // See InternalIteratorBase::IsOutOfBound().
230 bool is_out_of_bound_
= false;
231 // How current data block's boundary key with the next block is compared with
232 // iterate upper bound.
233 BlockUpperBound block_upper_bound_check_
= BlockUpperBound::kUnknown
;
234 // True if we're standing at the first key of a block, and we haven't loaded
235 // that block yet. A call to PrepareValue() will trigger loading the block.
236 bool is_at_first_key_from_index_
= false;
238 // TODO(Zhongyi): pick a better name
239 bool need_upper_bound_check_
;
241 bool async_read_in_progress_
;
243 // If `target` is null, seek to first.
244 void SeekImpl(const Slice
* target
, bool async_prefetch
);
246 void InitDataBlock();
247 void AsyncInitDataBlock(bool is_first_pass
);
248 bool MaterializeCurrentBlock();
249 void FindKeyForward();
250 void FindBlockForward();
251 void FindKeyBackward();
252 void CheckOutOfBound();
254 // Check if data block is fully within iterate_upper_bound.
256 // Note MyRocks may update iterate bounds between seek. To workaround it,
257 // we need to check and update data_block_within_upper_bound_ accordingly.
258 void CheckDataBlockWithinUpperBound();
260 bool CheckPrefixMayMatch(const Slice
& ikey
, IterDirection direction
) {
261 if (need_upper_bound_check_
&& direction
== IterDirection::kBackward
) {
262 // Upper bound check isn't sufficient for backward direction to
263 // guarantee the same result as total order, so disable prefix
267 if (check_filter_
&& !table_
->PrefixRangeMayMatch(
268 ikey
, read_options_
, prefix_extractor_
,
269 need_upper_bound_check_
, &lookup_context_
)) {
270 // TODO remember the iterator is invalidated because of prefix
271 // match. This can avoid the upper level file iterator to falsely
272 // believe the position is the end of the SST file and move to
273 // the first key of the next file.
280 } // namespace ROCKSDB_NAMESPACE