1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
13 fprintf(stderr
, "Please install gflags to run this test... Skipping...\n");
22 #include "logging/logging.h"
23 #include "memory/arena.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags
;
33 DEFINE_int32(bits_per_key
, 10, "");
35 namespace ROCKSDB_NAMESPACE
{
37 static const int kVerbose
= 1;
39 static Slice
Key(int i
, char* buffer
) {
41 PutFixed32(&s
, static_cast<uint32_t>(i
));
42 memcpy(buffer
, s
.c_str(), sizeof(i
));
43 return Slice(buffer
, sizeof(i
));
46 static int NextLength(int length
) {
49 } else if (length
< 100) {
51 } else if (length
< 1000) {
59 class BlockBasedBloomTest
: public testing::Test
{
61 std::unique_ptr
<const FilterPolicy
> policy_
;
63 std::vector
<std::string
> keys_
;
66 BlockBasedBloomTest() { ResetPolicy(); }
73 void ResetPolicy(double bits_per_key
) {
74 policy_
.reset(new BloomFilterPolicy(bits_per_key
,
75 BloomFilterPolicy::kDeprecatedBlock
));
79 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key
); }
81 void Add(const Slice
& s
) {
82 keys_
.push_back(s
.ToString());
86 std::vector
<Slice
> key_slices
;
87 for (size_t i
= 0; i
< keys_
.size(); i
++) {
88 key_slices
.push_back(Slice(keys_
[i
]));
91 policy_
->CreateFilter(&key_slices
[0], static_cast<int>(key_slices
.size()),
94 if (kVerbose
>= 2) DumpFilter();
97 size_t FilterSize() const {
98 return filter_
.size();
101 Slice
FilterData() const { return Slice(filter_
); }
104 fprintf(stderr
, "F(");
105 for (size_t i
= 0; i
+1 < filter_
.size(); i
++) {
106 const unsigned int c
= static_cast<unsigned int>(filter_
[i
]);
107 for (int j
= 0; j
< 8; j
++) {
108 fprintf(stderr
, "%c", (c
& (1 <<j
)) ? '1' : '.');
111 fprintf(stderr
, ")\n");
114 bool Matches(const Slice
& s
) {
115 if (!keys_
.empty()) {
118 return policy_
->KeyMayMatch(s
, filter_
);
121 double FalsePositiveRate() {
122 char buffer
[sizeof(int)];
124 for (int i
= 0; i
< 10000; i
++) {
125 if (Matches(Key(i
+ 1000000000, buffer
))) {
129 return result
/ 10000.0;
133 TEST_F(BlockBasedBloomTest
, EmptyFilter
) {
134 ASSERT_TRUE(! Matches("hello"));
135 ASSERT_TRUE(! Matches("world"));
138 TEST_F(BlockBasedBloomTest
, Small
) {
141 ASSERT_TRUE(Matches("hello"));
142 ASSERT_TRUE(Matches("world"));
143 ASSERT_TRUE(! Matches("x"));
144 ASSERT_TRUE(! Matches("foo"));
147 TEST_F(BlockBasedBloomTest
, VaryingLengths
) {
148 char buffer
[sizeof(int)];
150 // Count number of filters that significantly exceed the false positive rate
151 int mediocre_filters
= 0;
152 int good_filters
= 0;
154 for (int length
= 1; length
<= 10000; length
= NextLength(length
)) {
156 for (int i
= 0; i
< length
; i
++) {
161 ASSERT_LE(FilterSize(), (size_t)((length
* 10 / 8) + 40)) << length
;
163 // All added keys must match
164 for (int i
= 0; i
< length
; i
++) {
165 ASSERT_TRUE(Matches(Key(i
, buffer
)))
166 << "Length " << length
<< "; key " << i
;
169 // Check false positive rate
170 double rate
= FalsePositiveRate();
172 fprintf(stderr
, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
173 rate
*100.0, length
, static_cast<int>(FilterSize()));
175 ASSERT_LE(rate
, 0.02); // Must not be over 2%
176 if (rate
> 0.0125) mediocre_filters
++; // Allowed, but not too often
180 fprintf(stderr
, "Filters: %d good, %d mediocre\n",
181 good_filters
, mediocre_filters
);
183 ASSERT_LE(mediocre_filters
, good_filters
/5);
186 // Ensure the implementation doesn't accidentally change in an
188 TEST_F(BlockBasedBloomTest
, Schema
) {
189 char buffer
[sizeof(int)];
191 ResetPolicy(8); // num_probes = 5
192 for (int key
= 0; key
< 87; key
++) {
193 Add(Key(key
, buffer
));
196 ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
198 ResetPolicy(9); // num_probes = 6
199 for (int key
= 0; key
< 87; key
++) {
200 Add(Key(key
, buffer
));
203 ASSERT_EQ(BloomHash(FilterData()), 969445585);
205 ResetPolicy(11); // num_probes = 7
206 for (int key
= 0; key
< 87; key
++) {
207 Add(Key(key
, buffer
));
210 ASSERT_EQ(BloomHash(FilterData()), 1694458207);
212 ResetPolicy(10); // num_probes = 6
213 for (int key
= 0; key
< 87; key
++) {
214 Add(Key(key
, buffer
));
217 ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
220 for (int key
= /*CHANGED*/ 1; key
< 87; key
++) {
221 Add(Key(key
, buffer
));
224 ASSERT_EQ(BloomHash(FilterData()), 1908442116);
227 for (int key
= 1; key
< /*CHANGED*/ 88; key
++) {
228 Add(Key(key
, buffer
));
231 ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
233 // With new fractional bits_per_key, check that we are rounding to
234 // whole bits per key for old Bloom filters.
235 ResetPolicy(9.5); // Treated as 10
236 for (int key
= 1; key
< 88; key
++) {
237 Add(Key(key
, buffer
));
240 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
242 ResetPolicy(10.499); // Treated as 10
243 for (int key
= 1; key
< 88; key
++) {
244 Add(Key(key
, buffer
));
247 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
252 // Different bits-per-byte
254 class FullBloomTest
: public testing::TestWithParam
<BloomFilterPolicy::Mode
> {
256 BlockBasedTableOptions table_options_
;
257 std::shared_ptr
<const FilterPolicy
>& policy_
;
258 std::unique_ptr
<FilterBitsBuilder
> bits_builder_
;
259 std::unique_ptr
<FilterBitsReader
> bits_reader_
;
260 std::unique_ptr
<const char[]> buf_
;
264 FullBloomTest() : policy_(table_options_
.filter_policy
), filter_size_(0) {
268 BuiltinFilterBitsBuilder
* GetBuiltinFilterBitsBuilder() {
269 // Throws on bad cast
270 return &dynamic_cast<BuiltinFilterBitsBuilder
&>(*bits_builder_
);
273 const BloomFilterPolicy
* GetBloomFilterPolicy() {
274 // Throws on bad cast
275 return &dynamic_cast<const BloomFilterPolicy
&>(*policy_
);
279 bits_builder_
.reset(BloomFilterPolicy::GetBuilderFromContext(
280 FilterBuildingContext(table_options_
)));
281 bits_reader_
.reset(nullptr);
286 void ResetPolicy(double bits_per_key
) {
287 policy_
.reset(new BloomFilterPolicy(bits_per_key
, GetParam()));
291 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key
); }
293 void Add(const Slice
& s
) {
294 bits_builder_
->AddKey(s
);
297 void OpenRaw(const Slice
& s
) {
298 bits_reader_
.reset(policy_
->GetFilterBitsReader(s
));
302 Slice filter
= bits_builder_
->Finish(&buf_
);
303 bits_reader_
.reset(policy_
->GetFilterBitsReader(filter
));
304 filter_size_
= filter
.size();
307 size_t FilterSize() const {
311 Slice
FilterData() { return Slice(buf_
.get(), filter_size_
); }
313 int GetNumProbesFromFilterData() {
314 assert(filter_size_
>= 5);
315 int8_t raw_num_probes
= static_cast<int8_t>(buf_
.get()[filter_size_
- 5]);
316 if (raw_num_probes
== -1) { // New bloom filter marker
317 return static_cast<uint8_t>(buf_
.get()[filter_size_
- 3]);
319 return raw_num_probes
;
323 bool Matches(const Slice
& s
) {
324 if (bits_reader_
== nullptr) {
327 return bits_reader_
->MayMatch(s
);
330 // Provides a kind of fingerprint on the Bloom filter's
331 // behavior, for reasonbly high FP rates.
332 uint64_t PackedMatches() {
333 char buffer
[sizeof(int)];
335 for (int i
= 0; i
< 64; i
++) {
336 if (Matches(Key(i
+ 12345, buffer
))) {
337 result
|= uint64_t{1} << i
;
343 // Provides a kind of fingerprint on the Bloom filter's
344 // behavior, for lower FP rates.
345 std::string
FirstFPs(int count
) {
346 char buffer
[sizeof(int)];
349 for (int i
= 0; i
< 1000000; i
++) {
350 // Pack four match booleans into each hexadecimal digit
351 if (Matches(Key(i
+ 1000000, buffer
))) {
353 rv
+= std::to_string(i
);
354 if (fp_count
== count
) {
363 double FalsePositiveRate() {
364 char buffer
[sizeof(int)];
366 for (int i
= 0; i
< 10000; i
++) {
367 if (Matches(Key(i
+ 1000000000, buffer
))) {
371 return result
/ 10000.0;
374 uint32_t SelectByImpl(uint32_t for_legacy_bloom
,
375 uint32_t for_fast_local_bloom
) {
376 switch (GetParam()) {
377 case BloomFilterPolicy::kLegacyBloom
:
378 return for_legacy_bloom
;
379 case BloomFilterPolicy::kFastLocalBloom
:
380 return for_fast_local_bloom
;
381 case BloomFilterPolicy::kDeprecatedBlock
:
382 case BloomFilterPolicy::kAuto
:
391 TEST_P(FullBloomTest
, FilterSize
) {
392 // In addition to checking the consistency of space computation, we are
393 // checking that denoted and computed doubles are interpreted as expected
394 // as bits_per_key values.
395 bool some_computed_less_than_denoted
= false;
396 // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
397 // maximum is 100 bits per key (100000 millibits).
399 std::vector
<std::pair
<double, int> >{{-HUGE_VAL
, 1000},
413 ResetPolicy(bpk
.first
);
414 auto bfp
= GetBloomFilterPolicy();
415 EXPECT_EQ(bpk
.second
, bfp
->GetMillibitsPerKey());
416 EXPECT_EQ((bpk
.second
+ 500) / 1000, bfp
->GetWholeBitsPerKey());
418 double computed
= bpk
.first
;
419 // This transforms e.g. 9.5 -> 9.499999999999998, which we still
420 // round to 10 for whole bits per key.
422 computed
/= 1234567.0;
423 computed
*= 1234567.0;
425 some_computed_less_than_denoted
|= (computed
< bpk
.first
);
426 ResetPolicy(computed
);
427 bfp
= GetBloomFilterPolicy();
428 EXPECT_EQ(bpk
.second
, bfp
->GetMillibitsPerKey());
429 EXPECT_EQ((bpk
.second
+ 500) / 1000, bfp
->GetWholeBitsPerKey());
431 auto bits_builder
= GetBuiltinFilterBitsBuilder();
432 for (int n
= 1; n
< 100; n
++) {
433 auto space
= bits_builder
->CalculateSpace(n
);
434 auto n2
= bits_builder
->CalculateNumEntry(space
);
436 auto space2
= bits_builder
->CalculateSpace(n2
);
437 EXPECT_EQ(space
, space2
);
440 // Check that the compiler hasn't optimized our computation into nothing
441 EXPECT_TRUE(some_computed_less_than_denoted
);
445 TEST_P(FullBloomTest
, FullEmptyFilter
) {
446 // Empty filter is not match, at this level
447 ASSERT_TRUE(!Matches("hello"));
448 ASSERT_TRUE(!Matches("world"));
451 TEST_P(FullBloomTest
, FullSmall
) {
454 ASSERT_TRUE(Matches("hello"));
455 ASSERT_TRUE(Matches("world"));
456 ASSERT_TRUE(!Matches("x"));
457 ASSERT_TRUE(!Matches("foo"));
460 TEST_P(FullBloomTest
, FullVaryingLengths
) {
461 char buffer
[sizeof(int)];
463 // Count number of filters that significantly exceed the false positive rate
464 int mediocre_filters
= 0;
465 int good_filters
= 0;
467 for (int length
= 1; length
<= 10000; length
= NextLength(length
)) {
469 for (int i
= 0; i
< length
; i
++) {
474 ASSERT_LE(FilterSize(),
475 (size_t)((length
* 10 / 8) + CACHE_LINE_SIZE
* 2 + 5));
477 // All added keys must match
478 for (int i
= 0; i
< length
; i
++) {
479 ASSERT_TRUE(Matches(Key(i
, buffer
)))
480 << "Length " << length
<< "; key " << i
;
483 // Check false positive rate
484 double rate
= FalsePositiveRate();
486 fprintf(stderr
, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
487 rate
*100.0, length
, static_cast<int>(FilterSize()));
489 ASSERT_LE(rate
, 0.02); // Must not be over 2%
491 mediocre_filters
++; // Allowed, but not too often
496 fprintf(stderr
, "Filters: %d good, %d mediocre\n",
497 good_filters
, mediocre_filters
);
499 ASSERT_LE(mediocre_filters
, good_filters
/5);
503 inline uint32_t SelectByCacheLineSize(uint32_t for64
, uint32_t for128
,
508 #if CACHE_LINE_SIZE == 64
510 #elif CACHE_LINE_SIZE == 128
512 #elif CACHE_LINE_SIZE == 256
515 #error "CACHE_LINE_SIZE unknown or unrecognized"
520 // Ensure the implementation doesn't accidentally change in an
521 // incompatible way. This test doesn't check the reading side
522 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
523 // ability to read filters generated using other cache line sizes.
525 TEST_P(FullBloomTest
, Schema
) {
526 char buffer
[sizeof(int)];
528 // Use enough keys so that changing bits / key by 1 is guaranteed to
529 // change number of allocated cache lines. So keys > max cache line bits.
531 ResetPolicy(2); // num_probes = 1
532 for (int key
= 0; key
< 2087; key
++) {
533 Add(Key(key
, buffer
));
536 EXPECT_EQ(GetNumProbesFromFilterData(), 1);
538 BloomHash(FilterData()),
539 SelectByImpl(SelectByCacheLineSize(1567096579, 1964771444, 2659542661U),
541 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
542 EXPECT_EQ("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
545 ResetPolicy(3); // num_probes = 2
546 for (int key
= 0; key
< 2087; key
++) {
547 Add(Key(key
, buffer
));
550 EXPECT_EQ(GetNumProbesFromFilterData(), 2);
552 BloomHash(FilterData()),
553 SelectByImpl(SelectByCacheLineSize(2707206547U, 2571983456U, 218344685),
555 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
556 EXPECT_EQ("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
559 ResetPolicy(5); // num_probes = 3
560 for (int key
= 0; key
< 2087; key
++) {
561 Add(Key(key
, buffer
));
564 EXPECT_EQ(GetNumProbesFromFilterData(), 3);
566 BloomHash(FilterData()),
567 SelectByImpl(SelectByCacheLineSize(515748486, 94611728, 2436112214U),
569 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
570 EXPECT_EQ("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
573 ResetPolicy(8); // num_probes = 5
574 for (int key
= 0; key
< 2087; key
++) {
575 Add(Key(key
, buffer
));
578 EXPECT_EQ(GetNumProbesFromFilterData(), 5);
580 BloomHash(FilterData()),
581 SelectByImpl(SelectByCacheLineSize(1302145999, 2811644657U, 756553699),
583 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
584 EXPECT_EQ("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
587 ResetPolicy(9); // num_probes = 6
588 for (int key
= 0; key
< 2087; key
++) {
589 Add(Key(key
, buffer
));
592 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
594 BloomHash(FilterData()),
595 SelectByImpl(SelectByCacheLineSize(2092755149, 661139132, 1182970461),
597 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
598 EXPECT_EQ("156,367,791,872,945,1015,1139,1159,1265,1435", FirstFPs(10));
601 ResetPolicy(11); // num_probes = 7
602 for (int key
= 0; key
< 2087; key
++) {
603 Add(Key(key
, buffer
));
606 EXPECT_EQ(GetNumProbesFromFilterData(), 7);
608 BloomHash(FilterData()),
609 SelectByImpl(SelectByCacheLineSize(3755609649U, 1812694762, 1449142939),
611 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
612 EXPECT_EQ("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
615 // This used to be 9 probes, but 8 is a better choice for speed,
616 // especially with SIMD groups of 8 probes, with essentially no
617 // change in FP rate.
618 // FP rate @ 9 probes, old Bloom: 0.4321%
619 // FP rate @ 9 probes, new Bloom: 0.1846%
620 // FP rate @ 8 probes, new Bloom: 0.1843%
621 ResetPolicy(14); // num_probes = 8 (new), 9 (old)
622 for (int key
= 0; key
< 2087; key
++) {
623 Add(Key(key
, buffer
));
626 EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(9, 8));
628 BloomHash(FilterData()),
629 SelectByImpl(SelectByCacheLineSize(178861123, 379087593, 2574136516U),
631 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
632 EXPECT_EQ("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
635 // This used to be 11 probes, but 9 is a better choice for speed
637 // FP rate @ 11 probes, old Bloom: 0.3571%
638 // FP rate @ 11 probes, new Bloom: 0.0884%
639 // FP rate @ 9 probes, new Bloom: 0.0843%
640 ResetPolicy(16); // num_probes = 9 (new), 11 (old)
641 for (int key
= 0; key
< 2087; key
++) {
642 Add(Key(key
, buffer
));
645 EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(11, 9));
647 BloomHash(FilterData()),
648 SelectByImpl(SelectByCacheLineSize(1129406313, 3049154394U, 1727750964),
650 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
651 EXPECT_EQ("3299,3611,3916,6620,7822,8079,8482,8942,10167", FirstFPs(9));
654 ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
655 for (int key
= 0; key
< 2087; key
++) {
656 Add(Key(key
, buffer
));
659 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
661 BloomHash(FilterData()),
662 SelectByImpl(SelectByCacheLineSize(1478976371, 2910591341U, 1182970461),
664 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
665 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
669 for (int key
= /*CHANGED*/ 1; key
< 2087; key
++) {
670 Add(Key(key
, buffer
));
673 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
675 BloomHash(FilterData()),
676 SelectByImpl(SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U),
678 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
679 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
683 for (int key
= 1; key
< /*CHANGED*/ 2088; key
++) {
684 Add(Key(key
, buffer
));
687 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
689 BloomHash(FilterData()),
690 SelectByImpl(SelectByCacheLineSize(2885052954U, 769447944, 4175124908U),
692 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
693 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
696 // With new fractional bits_per_key, check that we are rounding to
697 // whole bits per key for old Bloom filters but fractional for
700 for (int key
= 1; key
< 2088; key
++) {
701 Add(Key(key
, buffer
));
704 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
705 EXPECT_EQ(BloomHash(FilterData()),
706 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
708 /*CHANGED*/ 3166884174U));
709 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
710 EXPECT_EQ(/*CHANGED*/ "126,156,367,444,458,791,813,976,1015,1035",
715 for (int key
= 1; key
< 2088; key
++) {
716 Add(Key(key
, buffer
));
719 EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(6, 7));
720 EXPECT_EQ(BloomHash(FilterData()),
721 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
723 /*CHANGED*/ 4098502778U));
724 if (GetParam() == BloomFilterPolicy::kFastLocalBloom
) {
725 EXPECT_EQ(/*CHANGED*/ "16,236,240,472,1015,1045,1111,1409,1465,1612",
732 // A helper class for testing custom or corrupt filter bits as read by
733 // built-in FilterBitsReaders.
734 struct RawFilterTester
{
735 // Buffer, from which we always return a tail Slice, so the
736 // last five bytes are always the metadata bytes.
737 std::array
<char, 3000> data_
;
738 // Points five bytes from the end
741 RawFilterTester() : metadata_ptr_(&*(data_
.end() - 5)) {}
743 Slice
ResetNoFill(uint32_t len_without_metadata
, uint32_t num_lines
,
744 uint32_t num_probes
) {
745 metadata_ptr_
[0] = static_cast<char>(num_probes
);
746 EncodeFixed32(metadata_ptr_
+ 1, num_lines
);
747 uint32_t len
= len_without_metadata
+ /*metadata*/ 5;
748 assert(len
<= data_
.size());
749 return Slice(metadata_ptr_
- len_without_metadata
, len
);
752 Slice
Reset(uint32_t len_without_metadata
, uint32_t num_lines
,
753 uint32_t num_probes
, bool fill_ones
) {
754 data_
.fill(fill_ones
? 0xff : 0);
755 return ResetNoFill(len_without_metadata
, num_lines
, num_probes
);
758 Slice
ResetWeirdFill(uint32_t len_without_metadata
, uint32_t num_lines
,
759 uint32_t num_probes
) {
760 for (uint32_t i
= 0; i
< data_
.size(); ++i
) {
761 data_
[i
] = static_cast<char>(0x7b7b >> (i
% 7));
763 return ResetNoFill(len_without_metadata
, num_lines
, num_probes
);
767 TEST_P(FullBloomTest
, RawSchema
) {
769 // Two probes, about 3/4 bits set: ~50% "FP" rate
770 // One 256-byte cache line.
771 OpenRaw(cft
.ResetWeirdFill(256, 1, 2));
772 EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
774 // Two 128-byte cache lines.
775 OpenRaw(cft
.ResetWeirdFill(256, 2, 2));
776 EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
778 // Four 64-byte cache lines.
779 OpenRaw(cft
.ResetWeirdFill(256, 4, 2));
780 EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
783 TEST_P(FullBloomTest
, CorruptFilters
) {
786 for (bool fill
: {false, true}) {
787 // Good filter bits - returns same as fill
788 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 6, fill
));
789 ASSERT_EQ(fill
, Matches("hello"));
790 ASSERT_EQ(fill
, Matches("world"));
792 // Good filter bits - returns same as fill
793 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
* 3, 3, 6, fill
));
794 ASSERT_EQ(fill
, Matches("hello"));
795 ASSERT_EQ(fill
, Matches("world"));
797 // Good filter bits - returns same as fill
798 // 256 is unusual but legal cache line size
799 OpenRaw(cft
.Reset(256 * 3, 3, 6, fill
));
800 ASSERT_EQ(fill
, Matches("hello"));
801 ASSERT_EQ(fill
, Matches("world"));
803 // Good filter bits - returns same as fill
804 // 30 should be max num_probes
805 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 30, fill
));
806 ASSERT_EQ(fill
, Matches("hello"));
807 ASSERT_EQ(fill
, Matches("world"));
809 // Good filter bits - returns same as fill
810 // 1 should be min num_probes
811 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 1, fill
));
812 ASSERT_EQ(fill
, Matches("hello"));
813 ASSERT_EQ(fill
, Matches("world"));
815 // Type 1 trivial filter bits - returns true as if FP by zero probes
816 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 0, fill
));
817 ASSERT_TRUE(Matches("hello"));
818 ASSERT_TRUE(Matches("world"));
820 // Type 2 trivial filter bits - returns false as if built from zero keys
821 OpenRaw(cft
.Reset(0, 0, 6, fill
));
822 ASSERT_FALSE(Matches("hello"));
823 ASSERT_FALSE(Matches("world"));
825 // Type 2 trivial filter bits - returns false as if built from zero keys
826 OpenRaw(cft
.Reset(0, 37, 6, fill
));
827 ASSERT_FALSE(Matches("hello"));
828 ASSERT_FALSE(Matches("world"));
830 // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
831 OpenRaw(cft
.Reset(0, 0, 0, fill
));
832 ASSERT_FALSE(Matches("hello"));
833 ASSERT_FALSE(Matches("world"));
835 // Bad filter bits - returns true for safety
836 // No solution to 0 * x == CACHE_LINE_SIZE
837 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 0, 6, fill
));
838 ASSERT_TRUE(Matches("hello"));
839 ASSERT_TRUE(Matches("world"));
841 // Bad filter bits - returns true for safety
842 // Can't have 3 * x == 4 for integer x
843 OpenRaw(cft
.Reset(4, 3, 6, fill
));
844 ASSERT_TRUE(Matches("hello"));
845 ASSERT_TRUE(Matches("world"));
847 // Bad filter bits - returns true for safety
848 // 97 bytes is not a power of two, so not a legal cache line size
849 OpenRaw(cft
.Reset(97 * 3, 3, 6, fill
));
850 ASSERT_TRUE(Matches("hello"));
851 ASSERT_TRUE(Matches("world"));
853 // Bad filter bits - returns true for safety
854 // 65 bytes is not a power of two, so not a legal cache line size
855 OpenRaw(cft
.Reset(65 * 3, 3, 6, fill
));
856 ASSERT_TRUE(Matches("hello"));
857 ASSERT_TRUE(Matches("world"));
859 // Bad filter bits - returns false as if built from zero keys
860 // < 5 bytes overall means missing even metadata
861 OpenRaw(cft
.Reset(-1, 3, 6, fill
));
862 ASSERT_FALSE(Matches("hello"));
863 ASSERT_FALSE(Matches("world"));
865 OpenRaw(cft
.Reset(-5, 3, 6, fill
));
866 ASSERT_FALSE(Matches("hello"));
867 ASSERT_FALSE(Matches("world"));
869 // Dubious filter bits - returns same as fill (for now)
870 // 31 is not a useful num_probes, nor generated by RocksDB unless directly
871 // using filter bits API without BloomFilterPolicy.
872 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 31, fill
));
873 ASSERT_EQ(fill
, Matches("hello"));
874 ASSERT_EQ(fill
, Matches("world"));
876 // Dubious filter bits - returns same as fill (for now)
877 // Similar, with 127, largest positive char
878 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 127, fill
));
879 ASSERT_EQ(fill
, Matches("hello"));
880 ASSERT_EQ(fill
, Matches("world"));
882 // Dubious filter bits - returns true (for now)
883 // num_probes set to 128 / -128, lowest negative char
884 // NB: Bug in implementation interprets this as negative and has same
885 // effect as zero probes, but effectively reserves negative char values
887 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 128, fill
));
888 ASSERT_TRUE(Matches("hello"));
889 ASSERT_TRUE(Matches("world"));
891 // Dubious filter bits - returns true (for now)
892 // Similar, with 255 / -1
893 OpenRaw(cft
.Reset(CACHE_LINE_SIZE
, 1, 255, fill
));
894 ASSERT_TRUE(Matches("hello"));
895 ASSERT_TRUE(Matches("world"));
899 INSTANTIATE_TEST_CASE_P(Full
, FullBloomTest
,
900 testing::Values(BloomFilterPolicy::kLegacyBloom
,
901 BloomFilterPolicy::kFastLocalBloom
));
903 } // namespace ROCKSDB_NAMESPACE
905 int main(int argc
, char** argv
) {
906 ::testing::InitGoogleTest(&argc
, argv
);
907 ParseCommandLineFlags(&argc
, &argv
, true);
909 return RUN_ALL_TESTS();