]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/block_based/full_filter_block.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / table / block_based / full_filter_block.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include "table/block_based/full_filter_block.h"
7 #include <array>
8
9 #include "monitoring/perf_context_imp.h"
10 #include "port/malloc.h"
11 #include "port/port.h"
12 #include "rocksdb/filter_policy.h"
13 #include "table/block_based/block_based_table_reader.h"
14 #include "util/coding.h"
15
16 namespace ROCKSDB_NAMESPACE {
17
18 FullFilterBlockBuilder::FullFilterBlockBuilder(
19 const SliceTransform* _prefix_extractor, bool whole_key_filtering,
20 FilterBitsBuilder* filter_bits_builder)
21 : prefix_extractor_(_prefix_extractor),
22 whole_key_filtering_(whole_key_filtering),
23 last_whole_key_recorded_(false),
24 last_prefix_recorded_(false),
25 num_added_(0) {
26 assert(filter_bits_builder != nullptr);
27 filter_bits_builder_.reset(filter_bits_builder);
28 }
29
30 void FullFilterBlockBuilder::Add(const Slice& key) {
31 const bool add_prefix = prefix_extractor_ && prefix_extractor_->InDomain(key);
32 if (whole_key_filtering_) {
33 if (!add_prefix) {
34 AddKey(key);
35 } else {
36 // if both whole_key and prefix are added to bloom then we will have whole
37 // key and prefix addition being interleaved and thus cannot rely on the
38 // bits builder to properly detect the duplicates by comparing with the
39 // last item.
40 Slice last_whole_key = Slice(last_whole_key_str_);
41 if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
42 AddKey(key);
43 last_whole_key_recorded_ = true;
44 last_whole_key_str_.assign(key.data(), key.size());
45 }
46 }
47 }
48 if (add_prefix) {
49 AddPrefix(key);
50 }
51 }
52
53 // Add key to filter if needed
54 inline void FullFilterBlockBuilder::AddKey(const Slice& key) {
55 filter_bits_builder_->AddKey(key);
56 num_added_++;
57 }
58
59 // Add prefix to filter if needed
60 void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
61 Slice prefix = prefix_extractor_->Transform(key);
62 if (whole_key_filtering_) {
63 // if both whole_key and prefix are added to bloom then we will have whole
64 // key and prefix addition being interleaved and thus cannot rely on the
65 // bits builder to properly detect the duplicates by comparing with the last
66 // item.
67 Slice last_prefix = Slice(last_prefix_str_);
68 if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
69 AddKey(prefix);
70 last_prefix_recorded_ = true;
71 last_prefix_str_.assign(prefix.data(), prefix.size());
72 }
73 } else {
74 AddKey(prefix);
75 }
76 }
77
78 void FullFilterBlockBuilder::Reset() {
79 last_whole_key_recorded_ = false;
80 last_prefix_recorded_ = false;
81 }
82
83 Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
84 Status* status) {
85 Reset();
86 // In this impl we ignore BlockHandle
87 *status = Status::OK();
88 if (num_added_ != 0) {
89 num_added_ = 0;
90 return filter_bits_builder_->Finish(&filter_data_);
91 }
92 return Slice();
93 }
94
95 FullFilterBlockReader::FullFilterBlockReader(
96 const BlockBasedTable* t,
97 CachableEntry<ParsedFullFilterBlock>&& filter_block)
98 : FilterBlockReaderCommon(t, std::move(filter_block)) {
99 const SliceTransform* const prefix_extractor = table_prefix_extractor();
100 if (prefix_extractor) {
101 full_length_enabled_ =
102 prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_);
103 }
104 }
105
106 bool FullFilterBlockReader::KeyMayMatch(
107 const Slice& key, const SliceTransform* /*prefix_extractor*/,
108 uint64_t block_offset, const bool no_io,
109 const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
110 BlockCacheLookupContext* lookup_context) {
111 #ifdef NDEBUG
112 (void)block_offset;
113 #endif
114 assert(block_offset == kNotValid);
115 if (!whole_key_filtering()) {
116 return true;
117 }
118 return MayMatch(key, no_io, get_context, lookup_context);
119 }
120
121 std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
122 const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
123 bool use_cache, bool prefetch, bool pin,
124 BlockCacheLookupContext* lookup_context) {
125 assert(table);
126 assert(table->get_rep());
127 assert(!pin || prefetch);
128
129 CachableEntry<ParsedFullFilterBlock> filter_block;
130 if (prefetch || !use_cache) {
131 const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
132 use_cache, nullptr /* get_context */,
133 lookup_context, &filter_block);
134 if (!s.ok()) {
135 return std::unique_ptr<FilterBlockReader>();
136 }
137
138 if (use_cache && !pin) {
139 filter_block.Reset();
140 }
141 }
142
143 return std::unique_ptr<FilterBlockReader>(
144 new FullFilterBlockReader(table, std::move(filter_block)));
145 }
146
147 bool FullFilterBlockReader::PrefixMayMatch(
148 const Slice& prefix, const SliceTransform* /* prefix_extractor */,
149 uint64_t block_offset, const bool no_io,
150 const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
151 BlockCacheLookupContext* lookup_context) {
152 #ifdef NDEBUG
153 (void)block_offset;
154 #endif
155 assert(block_offset == kNotValid);
156 return MayMatch(prefix, no_io, get_context, lookup_context);
157 }
158
159 bool FullFilterBlockReader::MayMatch(
160 const Slice& entry, bool no_io, GetContext* get_context,
161 BlockCacheLookupContext* lookup_context) const {
162 CachableEntry<ParsedFullFilterBlock> filter_block;
163
164 const Status s =
165 GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block);
166 if (!s.ok()) {
167 return true;
168 }
169
170 assert(filter_block.GetValue());
171
172 FilterBitsReader* const filter_bits_reader =
173 filter_block.GetValue()->filter_bits_reader();
174
175 if (filter_bits_reader) {
176 if (filter_bits_reader->MayMatch(entry)) {
177 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
178 return true;
179 } else {
180 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
181 return false;
182 }
183 }
184 return true; // remain the same with block_based filter
185 }
186
187 void FullFilterBlockReader::KeysMayMatch(
188 MultiGetRange* range, const SliceTransform* /*prefix_extractor*/,
189 uint64_t block_offset, const bool no_io,
190 BlockCacheLookupContext* lookup_context) {
191 #ifdef NDEBUG
192 (void)range;
193 (void)block_offset;
194 #endif
195 assert(block_offset == kNotValid);
196 if (!whole_key_filtering()) {
197 // Simply return. Don't skip any key - consider all keys as likely to be
198 // present
199 return;
200 }
201 MayMatch(range, no_io, nullptr, lookup_context);
202 }
203
204 void FullFilterBlockReader::PrefixesMayMatch(
205 MultiGetRange* range, const SliceTransform* prefix_extractor,
206 uint64_t block_offset, const bool no_io,
207 BlockCacheLookupContext* lookup_context) {
208 #ifdef NDEBUG
209 (void)range;
210 (void)block_offset;
211 #endif
212 assert(block_offset == kNotValid);
213 MayMatch(range, no_io, prefix_extractor, lookup_context);
214 }
215
216 void FullFilterBlockReader::MayMatch(
217 MultiGetRange* range, bool no_io, const SliceTransform* prefix_extractor,
218 BlockCacheLookupContext* lookup_context) const {
219 CachableEntry<ParsedFullFilterBlock> filter_block;
220
221 const Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context,
222 lookup_context, &filter_block);
223 if (!s.ok()) {
224 return;
225 }
226
227 assert(filter_block.GetValue());
228
229 FilterBitsReader* const filter_bits_reader =
230 filter_block.GetValue()->filter_bits_reader();
231
232 if (!filter_bits_reader) {
233 return;
234 }
235
236 // We need to use an array instead of autovector for may_match since
237 // &may_match[0] doesn't work for autovector<bool> (compiler error). So
238 // declare both keys and may_match as arrays, which is also slightly less
239 // expensive compared to autovector
240 std::array<Slice*, MultiGetContext::MAX_BATCH_SIZE> keys;
241 std::array<bool, MultiGetContext::MAX_BATCH_SIZE> may_match = {{true}};
242 autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> prefixes;
243 int num_keys = 0;
244 MultiGetRange filter_range(*range, range->begin(), range->end());
245 for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
246 if (!prefix_extractor) {
247 keys[num_keys++] = &iter->ukey;
248 } else if (prefix_extractor->InDomain(iter->ukey)) {
249 prefixes.emplace_back(prefix_extractor->Transform(iter->ukey));
250 keys[num_keys++] = &prefixes.back();
251 } else {
252 filter_range.SkipKey(iter);
253 }
254 }
255
256 filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]);
257
258 int i = 0;
259 for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
260 if (!may_match[i]) {
261 // Update original MultiGet range to skip this key. The filter_range
262 // was temporarily used just to skip keys not in prefix_extractor domain
263 range->SkipKey(iter);
264 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
265 } else {
266 // PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
267 PerfContext* perf_ctx = get_perf_context();
268 perf_ctx->bloom_sst_hit_count++;
269 }
270 ++i;
271 }
272 }
273
274 size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
275 size_t usage = ApproximateFilterBlockMemoryUsage();
276 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
277 usage += malloc_usable_size(const_cast<FullFilterBlockReader*>(this));
278 #else
279 usage += sizeof(*this);
280 #endif // ROCKSDB_MALLOC_USABLE_SIZE
281 return usage;
282 }
283
284 bool FullFilterBlockReader::RangeMayExist(
285 const Slice* iterate_upper_bound, const Slice& user_key,
286 const SliceTransform* prefix_extractor, const Comparator* comparator,
287 const Slice* const const_ikey_ptr, bool* filter_checked,
288 bool need_upper_bound_check, BlockCacheLookupContext* lookup_context) {
289 if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) {
290 *filter_checked = false;
291 return true;
292 }
293 Slice prefix = prefix_extractor->Transform(user_key);
294 if (need_upper_bound_check &&
295 !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
296 *filter_checked = false;
297 return true;
298 } else {
299 *filter_checked = true;
300 return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false,
301 const_ikey_ptr, /* get_context */ nullptr,
302 lookup_context);
303 }
304 }
305
306 bool FullFilterBlockReader::IsFilterCompatible(
307 const Slice* iterate_upper_bound, const Slice& prefix,
308 const Comparator* comparator) const {
309 // Try to reuse the bloom filter in the SST table if prefix_extractor in
310 // mutable_cf_options has changed. If range [user_key, upper_bound) all
311 // share the same prefix then we may still be able to use the bloom filter.
312 const SliceTransform* const prefix_extractor = table_prefix_extractor();
313 if (iterate_upper_bound != nullptr && prefix_extractor) {
314 if (!prefix_extractor->InDomain(*iterate_upper_bound)) {
315 return false;
316 }
317 Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound);
318 // first check if user_key and upper_bound all share the same prefix
319 if (!comparator->Equal(prefix, upper_bound_xform)) {
320 // second check if user_key's prefix is the immediate predecessor of
321 // upper_bound and have the same length. If so, we know for sure all
322 // keys in the range [user_key, upper_bound) share the same prefix.
323 // Also need to make sure upper_bound are full length to ensure
324 // correctness
325 if (!full_length_enabled_ ||
326 iterate_upper_bound->size() != prefix_extractor_full_length_ ||
327 !comparator->IsSameLengthImmediateSuccessor(prefix,
328 *iterate_upper_bound)) {
329 return false;
330 }
331 }
332 return true;
333 } else {
334 return false;
335 }
336 }
337
338 } // namespace ROCKSDB_NAMESPACE