1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "rocksdb/filter_policy.h"
12 #include "rocksdb/slice.h"
13 #include "table/block_based_filter_block.h"
14 #include "table/full_filter_bits_builder.h"
15 #include "table/full_filter_block.h"
16 #include "util/coding.h"
17 #include "util/hash.h"
21 class BlockBasedFilterBlockBuilder
;
22 class FullFilterBlockBuilder
;
24 FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key
,
25 const size_t num_probes
)
26 : bits_per_key_(bits_per_key
), num_probes_(num_probes
) {
27 assert(bits_per_key_
);
30 FullFilterBitsBuilder::~FullFilterBitsBuilder() {}
32 void FullFilterBitsBuilder::AddKey(const Slice
& key
) {
33 uint32_t hash
= BloomHash(key
);
34 if (hash_entries_
.size() == 0 || hash
!= hash_entries_
.back()) {
35 hash_entries_
.push_back(hash
);
39 Slice
FullFilterBitsBuilder::Finish(std::unique_ptr
<const char[]>* buf
) {
40 uint32_t total_bits
, num_lines
;
41 char* data
= ReserveSpace(static_cast<int>(hash_entries_
.size()),
42 &total_bits
, &num_lines
);
45 if (total_bits
!= 0 && num_lines
!= 0) {
46 for (auto h
: hash_entries_
) {
47 AddHash(h
, data
, num_lines
, total_bits
);
50 data
[total_bits
/8] = static_cast<char>(num_probes_
);
51 EncodeFixed32(data
+ total_bits
/8 + 1, static_cast<uint32_t>(num_lines
));
53 const char* const_data
= data
;
54 buf
->reset(const_data
);
55 hash_entries_
.clear();
57 return Slice(data
, total_bits
/ 8 + 5);
60 uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits
) {
62 (total_bits
+ CACHE_LINE_SIZE
* 8 - 1) / (CACHE_LINE_SIZE
* 8);
64 // Make num_lines an odd number to make sure more bits are involved
65 // when determining which block.
66 if (num_lines
% 2 == 0) {
69 return num_lines
* (CACHE_LINE_SIZE
* 8);
72 uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry
,
74 uint32_t* num_lines
) {
75 assert(bits_per_key_
);
77 uint32_t total_bits_tmp
= num_entry
* static_cast<uint32_t>(bits_per_key_
);
79 *total_bits
= GetTotalBitsForLocality(total_bits_tmp
);
80 *num_lines
= *total_bits
/ (CACHE_LINE_SIZE
* 8);
81 assert(*total_bits
> 0 && *total_bits
% 8 == 0);
83 // filter is empty, just leave space for metadata
88 // Reserve space for Filter
89 uint32_t sz
= *total_bits
/ 8;
90 sz
+= 5; // 4 bytes for num_lines, 1 byte for num_probes
94 char* FullFilterBitsBuilder::ReserveSpace(const int num_entry
,
96 uint32_t* num_lines
) {
97 uint32_t sz
= CalculateSpace(num_entry
, total_bits
, num_lines
);
98 char* data
= new char[sz
];
103 int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space
) {
104 assert(bits_per_key_
);
106 uint32_t dont_care1
, dont_care2
;
107 int high
= (int) (space
* 8 / bits_per_key_
+ 1);
110 for (; n
>= low
; n
--) {
111 uint32_t sz
= CalculateSpace(n
, &dont_care1
, &dont_care2
);
116 assert(n
< high
); // High should be an overestimation
120 inline void FullFilterBitsBuilder::AddHash(uint32_t h
, char* data
,
121 uint32_t num_lines
, uint32_t total_bits
) {
125 assert(num_lines
> 0 && total_bits
> 0);
127 const uint32_t delta
= (h
>> 17) | (h
<< 15); // Rotate right 17 bits
128 uint32_t b
= (h
% num_lines
) * (CACHE_LINE_SIZE
* 8);
130 for (uint32_t i
= 0; i
< num_probes_
; ++i
) {
131 // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
132 // to a simple operation by compiler.
133 const uint32_t bitpos
= b
+ (h
% (CACHE_LINE_SIZE
* 8));
134 data
[bitpos
/ 8] |= (1 << (bitpos
% 8));
141 class FullFilterBitsReader
: public FilterBitsReader
{
143 explicit FullFilterBitsReader(const Slice
& contents
)
144 : data_(const_cast<char*>(contents
.data())),
145 data_len_(static_cast<uint32_t>(contents
.size())),
148 log2_cache_line_size_(0) {
150 GetFilterMeta(contents
, &num_probes_
, &num_lines_
);
151 // Sanitize broken parameter
152 if (num_lines_
!= 0 && (data_len_
-5) % num_lines_
!= 0) {
155 } else if (num_lines_
!= 0) {
157 uint32_t num_lines_at_curr_cache_size
=
158 (data_len_
- 5) >> log2_cache_line_size_
;
159 if (num_lines_at_curr_cache_size
== 0) {
160 // The cache line size seems not a power of two. It's not supported
161 // and indicates a corruption so disable using this filter.
167 if (num_lines_at_curr_cache_size
== num_lines_
) {
170 ++log2_cache_line_size_
;
175 ~FullFilterBitsReader() override
{}
177 bool MayMatch(const Slice
& entry
) override
{
178 if (data_len_
<= 5) { // remain same with original filter
181 // Other Error params, including a broken filter, regarded as match
182 if (num_probes_
== 0 || num_lines_
== 0) return true;
183 uint32_t hash
= BloomHash(entry
);
184 return HashMayMatch(hash
, Slice(data_
, data_len_
),
185 num_probes_
, num_lines_
);
194 uint32_t log2_cache_line_size_
;
196 // Get num_probes, and num_lines from filter
197 // If filter format broken, set both to 0.
198 void GetFilterMeta(const Slice
& filter
, size_t* num_probes
,
199 uint32_t* num_lines
);
201 // "filter" contains the data appended by a preceding call to
202 // FilterBitsBuilder::Finish. This method must return true if the key was
203 // passed to FilterBitsBuilder::AddKey. This method may return true or false
204 // if the key was not on the list, but it should aim to return false with a
207 // hash: target to be checked
208 // filter: the whole filter, including meta data bytes
209 // num_probes: number of probes, read before hand
210 // num_lines: filter metadata, read before hand
211 // Before calling this function, need to ensure the input meta data
213 bool HashMayMatch(const uint32_t& hash
, const Slice
& filter
,
214 const size_t& num_probes
, const uint32_t& num_lines
);
217 FullFilterBitsReader(const FullFilterBitsReader
&);
218 void operator=(const FullFilterBitsReader
&);
221 void FullFilterBitsReader::GetFilterMeta(const Slice
& filter
,
222 size_t* num_probes
, uint32_t* num_lines
) {
223 uint32_t len
= static_cast<uint32_t>(filter
.size());
225 // filter is empty or broken
231 *num_probes
= filter
.data()[len
- 5];
232 *num_lines
= DecodeFixed32(filter
.data() + len
- 4);
235 bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash
,
236 const Slice
& filter
, const size_t& num_probes
,
237 const uint32_t& num_lines
) {
238 uint32_t len
= static_cast<uint32_t>(filter
.size());
239 if (len
<= 5) return false; // remain the same with original filter
241 // It is ensured the params are valid before calling it
242 assert(num_probes
!= 0);
243 assert(num_lines
!= 0 && (len
- 5) % num_lines
== 0);
244 const char* data
= filter
.data();
247 const uint32_t delta
= (h
>> 17) | (h
<< 15); // Rotate right 17 bits
248 // Left shift by an extra 3 to convert bytes to bits
249 uint32_t b
= (h
% num_lines
) << (log2_cache_line_size_
+ 3);
250 PREFETCH(&data
[b
/ 8], 0 /* rw */, 1 /* locality */);
251 PREFETCH(&data
[b
/ 8 + (1 << log2_cache_line_size_
) - 1], 0 /* rw */,
254 for (uint32_t i
= 0; i
< num_probes
; ++i
) {
255 // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
256 // to a simple and operation by compiler.
257 const uint32_t bitpos
= b
+ (h
& ((1 << (log2_cache_line_size_
+ 3)) - 1));
258 if (((data
[bitpos
/ 8]) & (1 << (bitpos
% 8))) == 0) {
268 // An implementation of filter policy
269 class BloomFilterPolicy
: public FilterPolicy
{
271 explicit BloomFilterPolicy(int bits_per_key
, bool use_block_based_builder
)
272 : bits_per_key_(bits_per_key
), hash_func_(BloomHash
),
273 use_block_based_builder_(use_block_based_builder
) {
277 ~BloomFilterPolicy() override
{}
279 const char* Name() const override
{ return "rocksdb.BuiltinBloomFilter"; }
281 void CreateFilter(const Slice
* keys
, int n
, std::string
* dst
) const override
{
282 // Compute bloom filter size (in both bits and bytes)
283 size_t bits
= n
* bits_per_key_
;
285 // For small n, we can see a very high false positive rate. Fix it
286 // by enforcing a minimum bloom filter length.
287 if (bits
< 64) bits
= 64;
289 size_t bytes
= (bits
+ 7) / 8;
292 const size_t init_size
= dst
->size();
293 dst
->resize(init_size
+ bytes
, 0);
294 dst
->push_back(static_cast<char>(num_probes_
)); // Remember # of probes
295 char* array
= &(*dst
)[init_size
];
296 for (size_t i
= 0; i
< (size_t)n
; i
++) {
297 // Use double-hashing to generate a sequence of hash values.
298 // See analysis in [Kirsch,Mitzenmacher 2006].
299 uint32_t h
= hash_func_(keys
[i
]);
300 const uint32_t delta
= (h
>> 17) | (h
<< 15); // Rotate right 17 bits
301 for (size_t j
= 0; j
< num_probes_
; j
++) {
302 const uint32_t bitpos
= h
% bits
;
303 array
[bitpos
/8] |= (1 << (bitpos
% 8));
309 bool KeyMayMatch(const Slice
& key
, const Slice
& bloom_filter
) const override
{
310 const size_t len
= bloom_filter
.size();
311 if (len
< 2) return false;
313 const char* array
= bloom_filter
.data();
314 const size_t bits
= (len
- 1) * 8;
316 // Use the encoded k so that we can read filters generated by
317 // bloom filters created using different parameters.
318 const size_t k
= array
[len
-1];
320 // Reserved for potentially new encodings for short bloom filters.
321 // Consider it a match.
325 uint32_t h
= hash_func_(key
);
326 const uint32_t delta
= (h
>> 17) | (h
<< 15); // Rotate right 17 bits
327 for (size_t j
= 0; j
< k
; j
++) {
328 const uint32_t bitpos
= h
% bits
;
329 if ((array
[bitpos
/8] & (1 << (bitpos
% 8))) == 0) return false;
335 FilterBitsBuilder
* GetFilterBitsBuilder() const override
{
336 if (use_block_based_builder_
) {
340 return new FullFilterBitsBuilder(bits_per_key_
, num_probes_
);
343 FilterBitsReader
* GetFilterBitsReader(const Slice
& contents
) const override
{
344 return new FullFilterBitsReader(contents
);
347 // If choose to use block based builder
348 bool UseBlockBasedBuilder() { return use_block_based_builder_
; }
351 size_t bits_per_key_
;
353 uint32_t (*hash_func_
)(const Slice
& key
);
355 const bool use_block_based_builder_
;
358 // We intentionally round down to reduce probing cost a little bit
359 num_probes_
= static_cast<size_t>(bits_per_key_
* 0.69); // 0.69 =~ ln(2)
360 if (num_probes_
< 1) num_probes_
= 1;
361 if (num_probes_
> 30) num_probes_
= 30;
367 const FilterPolicy
* NewBloomFilterPolicy(int bits_per_key
,
368 bool use_block_based_builder
) {
369 return new BloomFilterPolicy(bits_per_key
, use_block_based_builder
);
372 } // namespace rocksdb