]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based/partitioned_index_iterator.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / table / block_based / partitioned_index_iterator.cc
CommitLineData
20effc67
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9#include "table/block_based/partitioned_index_iterator.h"
10
11namespace ROCKSDB_NAMESPACE {
12void PartitionedIndexIterator::Seek(const Slice& target) { SeekImpl(&target); }
13
14void PartitionedIndexIterator::SeekToFirst() { SeekImpl(nullptr); }
15
16void PartitionedIndexIterator::SeekImpl(const Slice* target) {
17 SavePrevIndexValue();
18
19 if (target) {
20 index_iter_->Seek(*target);
21 } else {
22 index_iter_->SeekToFirst();
23 }
24
25 if (!index_iter_->Valid()) {
26 ResetPartitionedIndexIter();
27 return;
28 }
29
30 InitPartitionedIndexBlock();
31
32 if (target) {
33 block_iter_.Seek(*target);
34 } else {
35 block_iter_.SeekToFirst();
36 }
37 FindKeyForward();
38
39 // We could check upper bound here, but that would be too complicated
40 // and checking index upper bound is less useful than for data blocks.
41
42 if (target) {
43 assert(!Valid() || (table_->get_rep()->index_key_includes_seq
44 ? (icomp_.Compare(*target, key()) <= 0)
45 : (user_comparator_.Compare(ExtractUserKey(*target),
46 key()) <= 0)));
47 }
48}
49
50void PartitionedIndexIterator::SeekToLast() {
51 SavePrevIndexValue();
52 index_iter_->SeekToLast();
53 if (!index_iter_->Valid()) {
54 ResetPartitionedIndexIter();
55 return;
56 }
57 InitPartitionedIndexBlock();
58 block_iter_.SeekToLast();
59 FindKeyBackward();
60}
61
62void PartitionedIndexIterator::Next() {
63 assert(block_iter_points_to_real_block_);
64 block_iter_.Next();
65 FindKeyForward();
66}
67
68void PartitionedIndexIterator::Prev() {
69 assert(block_iter_points_to_real_block_);
70 block_iter_.Prev();
71
72 FindKeyBackward();
73}
74
75void PartitionedIndexIterator::InitPartitionedIndexBlock() {
76 BlockHandle partitioned_index_handle = index_iter_->value().handle;
77 if (!block_iter_points_to_real_block_ ||
78 partitioned_index_handle.offset() != prev_block_offset_ ||
79 // if previous attempt of reading the block missed cache, try again
80 block_iter_.status().IsIncomplete()) {
81 if (block_iter_points_to_real_block_) {
82 ResetPartitionedIndexIter();
83 }
84 auto* rep = table_->get_rep();
85 bool is_for_compaction =
86 lookup_context_.caller == TableReaderCaller::kCompaction;
87 // Prefetch additional data for range scans (iterators).
88 // Implicit auto readahead:
89 // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
90 // Explicit user requested readahead:
91 // Enabled from the very first IO when ReadOptions.readahead_size is set.
1e59de90
TL
92 block_prefetcher_.PrefetchIfNeeded(
93 rep, partitioned_index_handle, read_options_.readahead_size,
94 is_for_compaction, /*no_sequential_checking=*/false,
95 read_options_.rate_limiter_priority);
20effc67
TL
96 Status s;
97 table_->NewDataBlockIterator<IndexBlockIter>(
98 read_options_, partitioned_index_handle, &block_iter_,
99 BlockType::kIndex,
1e59de90 100 /*get_context=*/nullptr, &lookup_context_,
20effc67 101 block_prefetcher_.prefetch_buffer(),
1e59de90 102 /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
20effc67
TL
103 block_iter_points_to_real_block_ = true;
104 // We could check upper bound here but it is complicated to reason about
105 // upper bound in index iterator. On the other than, in large scans, index
106 // iterators are moved much less frequently compared to data blocks. So
107 // the upper bound check is skipped for simplicity.
108 }
109}
110
111void PartitionedIndexIterator::FindKeyForward() {
112 // This method's code is kept short to make it likely to be inlined.
113
114 assert(block_iter_points_to_real_block_);
115
116 if (!block_iter_.Valid()) {
117 // This is the only call site of FindBlockForward(), but it's extracted into
118 // a separate method to keep FindKeyForward() short and likely to be
119 // inlined. When transitioning to a different block, we call
120 // FindBlockForward(), which is much longer and is probably not inlined.
121 FindBlockForward();
122 } else {
123 // This is the fast path that avoids a function call.
124 }
125}
126
127void PartitionedIndexIterator::FindBlockForward() {
128 // TODO the while loop inherits from two-level-iterator. We don't know
129 // whether a block can be empty so it can be replaced by an "if".
130 do {
131 if (!block_iter_.status().ok()) {
132 return;
133 }
134 ResetPartitionedIndexIter();
135 index_iter_->Next();
136
137 if (!index_iter_->Valid()) {
138 return;
139 }
140
141 InitPartitionedIndexBlock();
142 block_iter_.SeekToFirst();
143 } while (!block_iter_.Valid());
144}
145
146void PartitionedIndexIterator::FindKeyBackward() {
147 while (!block_iter_.Valid()) {
148 if (!block_iter_.status().ok()) {
149 return;
150 }
151
152 ResetPartitionedIndexIter();
153 index_iter_->Prev();
154
155 if (index_iter_->Valid()) {
156 InitPartitionedIndexBlock();
157 block_iter_.SeekToLast();
158 } else {
159 return;
160 }
161 }
162}
163} // namespace ROCKSDB_NAMESPACE