1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "table/block_based/block_based_filter_block.h"
13 #include "db/dbformat.h"
14 #include "monitoring/perf_context_imp.h"
15 #include "rocksdb/filter_policy.h"
16 #include "table/block_based/block_based_table_reader.h"
17 #include "util/coding.h"
18 #include "util/string_util.h"
20 namespace ROCKSDB_NAMESPACE
{
24 void AppendItem(std::string
* props
, const std::string
& key
,
25 const std::string
& value
) {
27 std::string
value_str("");
29 const size_t dataLength
= 64;
30 const size_t tabLength
= 2;
31 const size_t offLength
= 16;
33 value_str
.append(&value
[i
], std::min(size_t(dataLength
), value
.size()));
35 while (i
< value
.size()) {
36 value_str
.append("\n");
37 value_str
.append(offLength
, cspace
);
38 value_str
.append(&value
[i
], std::min(size_t(dataLength
), value
.size() - i
));
42 std::string
result("");
43 if (key
.size() < (offLength
- tabLength
))
44 result
.append(size_t((offLength
- tabLength
)) - key
.size(), cspace
);
47 props
->append(result
+ ": " + value_str
+ "\n");
51 void AppendItem(std::string
* props
, const TKey
& key
, const std::string
& value
) {
52 std::string key_str
= ROCKSDB_NAMESPACE::ToString(key
);
53 AppendItem(props
, key_str
, value
);
57 // See doc/table_format.txt for an explanation of the filter block format.
59 // Generate new filter every 2KB of data
60 static const size_t kFilterBaseLg
= 11;
61 static const size_t kFilterBase
= 1 << kFilterBaseLg
;
63 BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
64 const SliceTransform
* prefix_extractor
,
65 const BlockBasedTableOptions
& table_opt
)
66 : policy_(table_opt
.filter_policy
.get()),
67 prefix_extractor_(prefix_extractor
),
68 whole_key_filtering_(table_opt
.whole_key_filtering
),
69 prev_prefix_start_(0),
75 void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset
) {
76 uint64_t filter_index
= (block_offset
/ kFilterBase
);
77 assert(filter_index
>= filter_offsets_
.size());
78 while (filter_index
> filter_offsets_
.size()) {
83 void BlockBasedFilterBlockBuilder::Add(const Slice
& key
) {
84 if (prefix_extractor_
&& prefix_extractor_
->InDomain(key
)) {
88 if (whole_key_filtering_
) {
93 // Add key to filter if needed
94 inline void BlockBasedFilterBlockBuilder::AddKey(const Slice
& key
) {
96 start_
.push_back(entries_
.size());
97 entries_
.append(key
.data(), key
.size());
100 // Add prefix to filter if needed
101 inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice
& key
) {
102 // get slice for most recently added entry
104 if (prev_prefix_size_
> 0) {
105 prev
= Slice(entries_
.data() + prev_prefix_start_
, prev_prefix_size_
);
108 Slice prefix
= prefix_extractor_
->Transform(key
);
109 // insert prefix only when it's different from the previous prefix.
110 if (prev
.size() == 0 || prefix
!= prev
) {
111 prev_prefix_start_
= entries_
.size();
112 prev_prefix_size_
= prefix
.size();
117 Slice
BlockBasedFilterBlockBuilder::Finish(const BlockHandle
& /*tmp*/,
119 // In this impl we ignore BlockHandle
120 *status
= Status::OK();
121 if (!start_
.empty()) {
125 // Append array of per-filter offsets
126 const uint32_t array_offset
= static_cast<uint32_t>(result_
.size());
127 for (size_t i
= 0; i
< filter_offsets_
.size(); i
++) {
128 PutFixed32(&result_
, filter_offsets_
[i
]);
131 PutFixed32(&result_
, array_offset
);
132 result_
.push_back(kFilterBaseLg
); // Save encoding parameter in result
133 return Slice(result_
);
136 void BlockBasedFilterBlockBuilder::GenerateFilter() {
137 const size_t num_entries
= start_
.size();
138 if (num_entries
== 0) {
139 // Fast path if there are no keys for this filter
140 filter_offsets_
.push_back(static_cast<uint32_t>(result_
.size()));
144 // Make list of keys from flattened key structure
145 start_
.push_back(entries_
.size()); // Simplify length computation
146 tmp_entries_
.resize(num_entries
);
147 for (size_t i
= 0; i
< num_entries
; i
++) {
148 const char* base
= entries_
.data() + start_
[i
];
149 size_t length
= start_
[i
+ 1] - start_
[i
];
150 tmp_entries_
[i
] = Slice(base
, length
);
153 // Generate filter for current set of keys and append to result_.
154 filter_offsets_
.push_back(static_cast<uint32_t>(result_
.size()));
155 policy_
->CreateFilter(&tmp_entries_
[0], static_cast<int>(num_entries
),
158 tmp_entries_
.clear();
161 prev_prefix_start_
= 0;
162 prev_prefix_size_
= 0;
165 BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
166 const BlockBasedTable
* t
, CachableEntry
<BlockContents
>&& filter_block
)
167 : FilterBlockReaderCommon(t
, std::move(filter_block
)) {
169 assert(table()->get_rep());
170 assert(table()->get_rep()->filter_policy
);
173 std::unique_ptr
<FilterBlockReader
> BlockBasedFilterBlockReader::Create(
174 const BlockBasedTable
* table
, const ReadOptions
& ro
,
175 FilePrefetchBuffer
* prefetch_buffer
, bool use_cache
, bool prefetch
,
176 bool pin
, BlockCacheLookupContext
* lookup_context
) {
178 assert(table
->get_rep());
179 assert(!pin
|| prefetch
);
181 CachableEntry
<BlockContents
> filter_block
;
182 if (prefetch
|| !use_cache
) {
183 const Status s
= ReadFilterBlock(table
, prefetch_buffer
, ro
, use_cache
,
184 nullptr /* get_context */, lookup_context
,
187 IGNORE_STATUS_IF_ERROR(s
);
188 return std::unique_ptr
<FilterBlockReader
>();
191 if (use_cache
&& !pin
) {
192 filter_block
.Reset();
196 return std::unique_ptr
<FilterBlockReader
>(
197 new BlockBasedFilterBlockReader(table
, std::move(filter_block
)));
200 bool BlockBasedFilterBlockReader::KeyMayMatch(
201 const Slice
& key
, const SliceTransform
* /* prefix_extractor */,
202 uint64_t block_offset
, const bool no_io
,
203 const Slice
* const /*const_ikey_ptr*/, GetContext
* get_context
,
204 BlockCacheLookupContext
* lookup_context
) {
205 assert(block_offset
!= kNotValid
);
206 if (!whole_key_filtering()) {
209 return MayMatch(key
, block_offset
, no_io
, get_context
, lookup_context
);
212 bool BlockBasedFilterBlockReader::PrefixMayMatch(
213 const Slice
& prefix
, const SliceTransform
* /* prefix_extractor */,
214 uint64_t block_offset
, const bool no_io
,
215 const Slice
* const /*const_ikey_ptr*/, GetContext
* get_context
,
216 BlockCacheLookupContext
* lookup_context
) {
217 assert(block_offset
!= kNotValid
);
218 return MayMatch(prefix
, block_offset
, no_io
, get_context
, lookup_context
);
221 bool BlockBasedFilterBlockReader::ParseFieldsFromBlock(
222 const BlockContents
& contents
, const char** data
, const char** offset
,
223 size_t* num
, size_t* base_lg
) {
229 const size_t n
= contents
.data
.size();
230 if (n
< 5) { // 1 byte for base_lg and 4 for start of offset array
234 const uint32_t last_word
= DecodeFixed32(contents
.data
.data() + n
- 5);
235 if (last_word
> n
- 5) {
239 *data
= contents
.data
.data();
240 *offset
= (*data
) + last_word
;
241 *num
= (n
- 5 - last_word
) / 4;
242 *base_lg
= contents
.data
[n
- 1];
247 bool BlockBasedFilterBlockReader::MayMatch(
248 const Slice
& entry
, uint64_t block_offset
, bool no_io
,
249 GetContext
* get_context
, BlockCacheLookupContext
* lookup_context
) const {
250 CachableEntry
<BlockContents
> filter_block
;
253 GetOrReadFilterBlock(no_io
, get_context
, lookup_context
, &filter_block
);
258 assert(filter_block
.GetValue());
260 const char* data
= nullptr;
261 const char* offset
= nullptr;
264 if (!ParseFieldsFromBlock(*filter_block
.GetValue(), &data
, &offset
, &num
,
266 return true; // Errors are treated as potential matches
269 const uint64_t index
= block_offset
>> base_lg
;
271 const uint32_t start
= DecodeFixed32(offset
+ index
* 4);
272 const uint32_t limit
= DecodeFixed32(offset
+ index
* 4 + 4);
273 if (start
<= limit
&& limit
<= (uint32_t)(offset
- data
)) {
274 const Slice filter
= Slice(data
+ start
, limit
- start
);
277 assert(table()->get_rep());
278 const FilterPolicy
* const policy
= table()->get_rep()->filter_policy
;
280 const bool may_match
= policy
->KeyMayMatch(entry
, filter
);
282 PERF_COUNTER_ADD(bloom_sst_hit_count
, 1);
285 PERF_COUNTER_ADD(bloom_sst_miss_count
, 1);
288 } else if (start
== limit
) {
289 // Empty filters do not match any entries
293 return true; // Errors are treated as potential matches
296 size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
297 size_t usage
= ApproximateFilterBlockMemoryUsage();
298 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
299 usage
+= malloc_usable_size(const_cast<BlockBasedFilterBlockReader
*>(this));
301 usage
+= sizeof(*this);
302 #endif // ROCKSDB_MALLOC_USABLE_SIZE
306 std::string
BlockBasedFilterBlockReader::ToString() const {
307 CachableEntry
<BlockContents
> filter_block
;
310 GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */,
311 nullptr /* lookup_context */, &filter_block
);
313 return std::string("Unable to retrieve filter block");
316 assert(filter_block
.GetValue());
318 const char* data
= nullptr;
319 const char* offset
= nullptr;
322 if (!ParseFieldsFromBlock(*filter_block
.GetValue(), &data
, &offset
, &num
,
324 return std::string("Error parsing filter block");
328 result
.reserve(1024);
330 std::string
s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
331 AppendItem(&result
, s_fb
, ROCKSDB_NAMESPACE::ToString(num
));
332 AppendItem(&result
, s_bo
, s_hd
);
334 for (size_t index
= 0; index
< num
; index
++) {
335 uint32_t start
= DecodeFixed32(offset
+ index
* 4);
336 uint32_t limit
= DecodeFixed32(offset
+ index
* 4 + 4);
338 if (start
!= limit
) {
339 result
.append(" filter block # " +
340 ROCKSDB_NAMESPACE::ToString(index
+ 1) + "\n");
341 Slice filter
= Slice(data
+ start
, limit
- start
);
342 AppendItem(&result
, start
, filter
.ToString(true));
348 } // namespace ROCKSDB_NAMESPACE