]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/bloom_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / util / bloom_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef GFLAGS
11 #include <cstdio>
12 int main() {
13 fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14 return 0;
15 }
16 #else
17
18 #include <array>
19 #include <cmath>
20 #include <vector>
21
22 #include "memory/arena.h"
23 #include "port/jemalloc_helper.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
30
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
32
33 DEFINE_int32(bits_per_key, 10, "");
34
35 namespace ROCKSDB_NAMESPACE {
36
37 static const int kVerbose = 1;
38
39 static Slice Key(int i, char* buffer) {
40 std::string s;
41 PutFixed32(&s, static_cast<uint32_t>(i));
42 memcpy(buffer, s.c_str(), sizeof(i));
43 return Slice(buffer, sizeof(i));
44 }
45
46 static int NextLength(int length) {
47 if (length < 10) {
48 length += 1;
49 } else if (length < 100) {
50 length += 10;
51 } else if (length < 1000) {
52 length += 100;
53 } else {
54 length += 1000;
55 }
56 return length;
57 }
58
59 class BlockBasedBloomTest : public testing::Test {
60 private:
61 std::unique_ptr<const FilterPolicy> policy_;
62 std::string filter_;
63 std::vector<std::string> keys_;
64
65 public:
66 BlockBasedBloomTest() { ResetPolicy(); }
67
68 void Reset() {
69 keys_.clear();
70 filter_.clear();
71 }
72
73 void ResetPolicy(double bits_per_key) {
74 policy_.reset(new BloomFilterPolicy(bits_per_key,
75 BloomFilterPolicy::kDeprecatedBlock));
76 Reset();
77 }
78
79 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
80
81 void Add(const Slice& s) {
82 keys_.push_back(s.ToString());
83 }
84
85 void Build() {
86 std::vector<Slice> key_slices;
87 for (size_t i = 0; i < keys_.size(); i++) {
88 key_slices.push_back(Slice(keys_[i]));
89 }
90 filter_.clear();
91 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
92 &filter_);
93 keys_.clear();
94 if (kVerbose >= 2) DumpFilter();
95 }
96
97 size_t FilterSize() const {
98 return filter_.size();
99 }
100
101 Slice FilterData() const { return Slice(filter_); }
102
103 void DumpFilter() {
104 fprintf(stderr, "F(");
105 for (size_t i = 0; i+1 < filter_.size(); i++) {
106 const unsigned int c = static_cast<unsigned int>(filter_[i]);
107 for (int j = 0; j < 8; j++) {
108 fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
109 }
110 }
111 fprintf(stderr, ")\n");
112 }
113
114 bool Matches(const Slice& s) {
115 if (!keys_.empty()) {
116 Build();
117 }
118 return policy_->KeyMayMatch(s, filter_);
119 }
120
121 double FalsePositiveRate() {
122 char buffer[sizeof(int)];
123 int result = 0;
124 for (int i = 0; i < 10000; i++) {
125 if (Matches(Key(i + 1000000000, buffer))) {
126 result++;
127 }
128 }
129 return result / 10000.0;
130 }
131 };
132
133 TEST_F(BlockBasedBloomTest, EmptyFilter) {
134 ASSERT_TRUE(! Matches("hello"));
135 ASSERT_TRUE(! Matches("world"));
136 }
137
138 TEST_F(BlockBasedBloomTest, Small) {
139 Add("hello");
140 Add("world");
141 ASSERT_TRUE(Matches("hello"));
142 ASSERT_TRUE(Matches("world"));
143 ASSERT_TRUE(! Matches("x"));
144 ASSERT_TRUE(! Matches("foo"));
145 }
146
147 TEST_F(BlockBasedBloomTest, VaryingLengths) {
148 char buffer[sizeof(int)];
149
150 // Count number of filters that significantly exceed the false positive rate
151 int mediocre_filters = 0;
152 int good_filters = 0;
153
154 for (int length = 1; length <= 10000; length = NextLength(length)) {
155 Reset();
156 for (int i = 0; i < length; i++) {
157 Add(Key(i, buffer));
158 }
159 Build();
160
161 ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
162
163 // All added keys must match
164 for (int i = 0; i < length; i++) {
165 ASSERT_TRUE(Matches(Key(i, buffer)))
166 << "Length " << length << "; key " << i;
167 }
168
169 // Check false positive rate
170 double rate = FalsePositiveRate();
171 if (kVerbose >= 1) {
172 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
173 rate*100.0, length, static_cast<int>(FilterSize()));
174 }
175 ASSERT_LE(rate, 0.02); // Must not be over 2%
176 if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
177 else good_filters++;
178 }
179 if (kVerbose >= 1) {
180 fprintf(stderr, "Filters: %d good, %d mediocre\n",
181 good_filters, mediocre_filters);
182 }
183 ASSERT_LE(mediocre_filters, good_filters/5);
184 }
185
186 // Ensure the implementation doesn't accidentally change in an
187 // incompatible way
188 TEST_F(BlockBasedBloomTest, Schema) {
189 char buffer[sizeof(int)];
190
191 ResetPolicy(8); // num_probes = 5
192 for (int key = 0; key < 87; key++) {
193 Add(Key(key, buffer));
194 }
195 Build();
196 ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
197
198 ResetPolicy(9); // num_probes = 6
199 for (int key = 0; key < 87; key++) {
200 Add(Key(key, buffer));
201 }
202 Build();
203 ASSERT_EQ(BloomHash(FilterData()), 969445585U);
204
205 ResetPolicy(11); // num_probes = 7
206 for (int key = 0; key < 87; key++) {
207 Add(Key(key, buffer));
208 }
209 Build();
210 ASSERT_EQ(BloomHash(FilterData()), 1694458207U);
211
212 ResetPolicy(10); // num_probes = 6
213 for (int key = 0; key < 87; key++) {
214 Add(Key(key, buffer));
215 }
216 Build();
217 ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
218
219 ResetPolicy(10);
220 for (int key = /*CHANGED*/ 1; key < 87; key++) {
221 Add(Key(key, buffer));
222 }
223 Build();
224 ASSERT_EQ(BloomHash(FilterData()), 1908442116U);
225
226 ResetPolicy(10);
227 for (int key = 1; key < /*CHANGED*/ 88; key++) {
228 Add(Key(key, buffer));
229 }
230 Build();
231 ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
232
233 // With new fractional bits_per_key, check that we are rounding to
234 // whole bits per key for old Bloom filters.
235 ResetPolicy(9.5); // Treated as 10
236 for (int key = 1; key < 88; key++) {
237 Add(Key(key, buffer));
238 }
239 Build();
240 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
241
242 ResetPolicy(10.499); // Treated as 10
243 for (int key = 1; key < 88; key++) {
244 Add(Key(key, buffer));
245 }
246 Build();
247 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
248
249 ResetPolicy();
250 }
251
252 // Different bits-per-byte
253
254 class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
255 protected:
256 BlockBasedTableOptions table_options_;
257
258 private:
259 std::shared_ptr<const FilterPolicy>& policy_;
260 std::unique_ptr<FilterBitsBuilder> bits_builder_;
261 std::unique_ptr<FilterBitsReader> bits_reader_;
262 std::unique_ptr<const char[]> buf_;
263 size_t filter_size_;
264
265 public:
266 FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
267 ResetPolicy();
268 }
269
270 BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
271 // Throws on bad cast
272 return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_);
273 }
274
275 const BloomFilterPolicy* GetBloomFilterPolicy() {
276 // Throws on bad cast
277 return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
278 }
279
280 void Reset() {
281 bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
282 FilterBuildingContext(table_options_)));
283 bits_reader_.reset(nullptr);
284 buf_.reset(nullptr);
285 filter_size_ = 0;
286 }
287
288 void ResetPolicy(double bits_per_key) {
289 policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
290 Reset();
291 }
292
293 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
294
295 void Add(const Slice& s) {
296 bits_builder_->AddKey(s);
297 }
298
299 void OpenRaw(const Slice& s) {
300 bits_reader_.reset(policy_->GetFilterBitsReader(s));
301 }
302
303 void Build() {
304 Slice filter = bits_builder_->Finish(&buf_);
305 bits_reader_.reset(policy_->GetFilterBitsReader(filter));
306 filter_size_ = filter.size();
307 }
308
309 size_t FilterSize() const {
310 return filter_size_;
311 }
312
313 Slice FilterData() { return Slice(buf_.get(), filter_size_); }
314
315 int GetNumProbesFromFilterData() {
316 assert(filter_size_ >= 5);
317 int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
318 if (raw_num_probes == -1) { // New bloom filter marker
319 return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
320 } else {
321 return raw_num_probes;
322 }
323 }
324
325 bool Matches(const Slice& s) {
326 if (bits_reader_ == nullptr) {
327 Build();
328 }
329 return bits_reader_->MayMatch(s);
330 }
331
332 // Provides a kind of fingerprint on the Bloom filter's
333 // behavior, for reasonbly high FP rates.
334 uint64_t PackedMatches() {
335 char buffer[sizeof(int)];
336 uint64_t result = 0;
337 for (int i = 0; i < 64; i++) {
338 if (Matches(Key(i + 12345, buffer))) {
339 result |= uint64_t{1} << i;
340 }
341 }
342 return result;
343 }
344
345 // Provides a kind of fingerprint on the Bloom filter's
346 // behavior, for lower FP rates.
347 std::string FirstFPs(int count) {
348 char buffer[sizeof(int)];
349 std::string rv;
350 int fp_count = 0;
351 for (int i = 0; i < 1000000; i++) {
352 // Pack four match booleans into each hexadecimal digit
353 if (Matches(Key(i + 1000000, buffer))) {
354 ++fp_count;
355 rv += std::to_string(i);
356 if (fp_count == count) {
357 break;
358 }
359 rv += ',';
360 }
361 }
362 return rv;
363 }
364
365 double FalsePositiveRate() {
366 char buffer[sizeof(int)];
367 int result = 0;
368 for (int i = 0; i < 10000; i++) {
369 if (Matches(Key(i + 1000000000, buffer))) {
370 result++;
371 }
372 }
373 return result / 10000.0;
374 }
375
376 uint32_t SelectByImpl(uint32_t for_legacy_bloom,
377 uint32_t for_fast_local_bloom) {
378 switch (GetParam()) {
379 case BloomFilterPolicy::kLegacyBloom:
380 return for_legacy_bloom;
381 case BloomFilterPolicy::kFastLocalBloom:
382 return for_fast_local_bloom;
383 case BloomFilterPolicy::kDeprecatedBlock:
384 case BloomFilterPolicy::kAutoBloom:
385 case BloomFilterPolicy::kStandard128Ribbon:
386 /* N/A */;
387 }
388 // otherwise
389 assert(false);
390 return 0;
391 }
392 };
393
394 TEST_P(FullBloomTest, FilterSize) {
395 // In addition to checking the consistency of space computation, we are
396 // checking that denoted and computed doubles are interpreted as expected
397 // as bits_per_key values.
398 bool some_computed_less_than_denoted = false;
399 // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
400 // maximum is 100 bits per key (100000 millibits).
401 for (auto bpk :
402 std::vector<std::pair<double, int> >{{-HUGE_VAL, 1000},
403 {-INFINITY, 1000},
404 {0.0, 1000},
405 {1.234, 1234},
406 {3.456, 3456},
407 {9.5, 9500},
408 {10.0, 10000},
409 {10.499, 10499},
410 {21.345, 21345},
411 {99.999, 99999},
412 {1234.0, 100000},
413 {HUGE_VAL, 100000},
414 {INFINITY, 100000},
415 {NAN, 100000}}) {
416 ResetPolicy(bpk.first);
417 auto bfp = GetBloomFilterPolicy();
418 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
419 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
420
421 double computed = bpk.first;
422 // This transforms e.g. 9.5 -> 9.499999999999998, which we still
423 // round to 10 for whole bits per key.
424 computed += 0.5;
425 computed /= 1234567.0;
426 computed *= 1234567.0;
427 computed -= 0.5;
428 some_computed_less_than_denoted |= (computed < bpk.first);
429 ResetPolicy(computed);
430 bfp = GetBloomFilterPolicy();
431 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
432 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
433
434 auto bits_builder = GetBuiltinFilterBitsBuilder();
435 for (int n = 1; n < 100; n++) {
436 auto space = bits_builder->CalculateSpace(n);
437 auto n2 = bits_builder->CalculateNumEntry(space);
438 EXPECT_GE(n2, n);
439 auto space2 = bits_builder->CalculateSpace(n2);
440 EXPECT_EQ(space, space2);
441 }
442 }
443 // Check that the compiler hasn't optimized our computation into nothing
444 EXPECT_TRUE(some_computed_less_than_denoted);
445 ResetPolicy();
446 }
447
448 TEST_P(FullBloomTest, FullEmptyFilter) {
449 // Empty filter is not match, at this level
450 ASSERT_TRUE(!Matches("hello"));
451 ASSERT_TRUE(!Matches("world"));
452 }
453
454 TEST_P(FullBloomTest, FullSmall) {
455 Add("hello");
456 Add("world");
457 ASSERT_TRUE(Matches("hello"));
458 ASSERT_TRUE(Matches("world"));
459 ASSERT_TRUE(!Matches("x"));
460 ASSERT_TRUE(!Matches("foo"));
461 }
462
463 TEST_P(FullBloomTest, FullVaryingLengths) {
464 char buffer[sizeof(int)];
465
466 // Count number of filters that significantly exceed the false positive rate
467 int mediocre_filters = 0;
468 int good_filters = 0;
469
470 for (int length = 1; length <= 10000; length = NextLength(length)) {
471 Reset();
472 for (int i = 0; i < length; i++) {
473 Add(Key(i, buffer));
474 }
475 Build();
476
477 EXPECT_LE(FilterSize(),
478 (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
479
480 // All added keys must match
481 for (int i = 0; i < length; i++) {
482 ASSERT_TRUE(Matches(Key(i, buffer)))
483 << "Length " << length << "; key " << i;
484 }
485
486 // Check false positive rate
487 double rate = FalsePositiveRate();
488 if (kVerbose >= 1) {
489 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
490 rate*100.0, length, static_cast<int>(FilterSize()));
491 }
492 EXPECT_LE(rate, 0.02); // Must not be over 2%
493 if (rate > 0.0125)
494 mediocre_filters++; // Allowed, but not too often
495 else
496 good_filters++;
497 }
498 if (kVerbose >= 1) {
499 fprintf(stderr, "Filters: %d good, %d mediocre\n",
500 good_filters, mediocre_filters);
501 }
502 EXPECT_LE(mediocre_filters, good_filters / 5);
503 }
504
505 TEST_P(FullBloomTest, OptimizeForMemory) {
506 if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
507 // TODO Not yet implemented
508 return;
509 }
510 char buffer[sizeof(int)];
511 for (bool offm : {true, false}) {
512 table_options_.optimize_filters_for_memory = offm;
513 ResetPolicy();
514 Random32 rnd(12345);
515 uint64_t total_size = 0;
516 uint64_t total_mem = 0;
517 int64_t total_keys = 0;
518 double total_fp_rate = 0;
519 constexpr int nfilters = 100;
520 for (int i = 0; i < nfilters; ++i) {
521 int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
522 Reset();
523 for (int j = 0; j < nkeys; ++j) {
524 Add(Key(j, buffer));
525 }
526 Build();
527 size_t size = FilterData().size();
528 total_size += size;
529 // optimize_filters_for_memory currently depends on malloc_usable_size
530 // but we run the rest of the test to ensure no bad behavior without it.
531 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
532 size = malloc_usable_size(const_cast<char*>(FilterData().data()));
533 #endif // ROCKSDB_MALLOC_USABLE_SIZE
534 total_mem += size;
535 total_keys += nkeys;
536 total_fp_rate += FalsePositiveRate();
537 }
538 EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
539 EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
540
541 int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
542 EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
543
544 int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
545 if (GetParam() == BloomFilterPolicy::kLegacyBloom) {
546 // this config can add extra cache line to make odd number
547 blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
548 }
549
550 EXPECT_GE(total_mem, total_size);
551
552 // optimize_filters_for_memory not implemented with legacy Bloom
553 if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) {
554 // This value can include a small extra penalty for kExtraPadding
555 fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
556 (total_mem - total_size) * 100.0 / total_size);
557 // Less than 1% internal fragmentation
558 EXPECT_LE(total_mem, total_size * 101 / 100);
559 // Up to 2% storage penalty
560 EXPECT_LE(static_cast<int64_t>(total_size),
561 ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
562 } else {
563 fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
564 (total_mem - total_size) * 100.0 / total_size);
565 // TODO: add control checks for more allocators?
566 #ifdef ROCKSDB_JEMALLOC
567 fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
568 if (HasJemalloc()) {
569 // More than 5% internal fragmentation
570 EXPECT_GE(total_mem, total_size * 105 / 100);
571 }
572 #endif // ROCKSDB_JEMALLOC
573 // No storage penalty, just usual overhead
574 EXPECT_LE(static_cast<int64_t>(total_size),
575 ex_min_total_size + blocked_bloom_overhead);
576 }
577 }
578 }
579
580 namespace {
581 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
582 uint32_t for256) {
583 (void)for64;
584 (void)for128;
585 (void)for256;
586 #if CACHE_LINE_SIZE == 64
587 return for64;
588 #elif CACHE_LINE_SIZE == 128
589 return for128;
590 #elif CACHE_LINE_SIZE == 256
591 return for256;
592 #else
593 #error "CACHE_LINE_SIZE unknown or unrecognized"
594 #endif
595 }
596 } // namespace
597
598 // Ensure the implementation doesn't accidentally change in an
599 // incompatible way. This test doesn't check the reading side
600 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
601 // ability to read filters generated using other cache line sizes.
602 // See RawSchema.
603 TEST_P(FullBloomTest, Schema) {
604 if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
605 // TODO ASAP to ensure schema stability
606 return;
607 }
608 char buffer[sizeof(int)];
609
610 // Use enough keys so that changing bits / key by 1 is guaranteed to
611 // change number of allocated cache lines. So keys > max cache line bits.
612
613 ResetPolicy(2); // num_probes = 1
614 for (int key = 0; key < 2087; key++) {
615 Add(Key(key, buffer));
616 }
617 Build();
618 EXPECT_EQ(GetNumProbesFromFilterData(), 1);
619 EXPECT_EQ(
620 BloomHash(FilterData()),
621 SelectByImpl(SelectByCacheLineSize(1567096579, 1964771444, 2659542661U),
622 3817481309U));
623 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
624 EXPECT_EQ("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
625 }
626
627 ResetPolicy(3); // num_probes = 2
628 for (int key = 0; key < 2087; key++) {
629 Add(Key(key, buffer));
630 }
631 Build();
632 EXPECT_EQ(GetNumProbesFromFilterData(), 2);
633 EXPECT_EQ(
634 BloomHash(FilterData()),
635 SelectByImpl(SelectByCacheLineSize(2707206547U, 2571983456U, 218344685),
636 2807269961U));
637 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
638 EXPECT_EQ("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
639 }
640
641 ResetPolicy(5); // num_probes = 3
642 for (int key = 0; key < 2087; key++) {
643 Add(Key(key, buffer));
644 }
645 Build();
646 EXPECT_EQ(GetNumProbesFromFilterData(), 3);
647 EXPECT_EQ(
648 BloomHash(FilterData()),
649 SelectByImpl(SelectByCacheLineSize(515748486, 94611728, 2436112214U),
650 204628445));
651 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
652 EXPECT_EQ("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
653 }
654
655 ResetPolicy(8); // num_probes = 5
656 for (int key = 0; key < 2087; key++) {
657 Add(Key(key, buffer));
658 }
659 Build();
660 EXPECT_EQ(GetNumProbesFromFilterData(), 5);
661 EXPECT_EQ(
662 BloomHash(FilterData()),
663 SelectByImpl(SelectByCacheLineSize(1302145999, 2811644657U, 756553699),
664 355564975));
665 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
666 EXPECT_EQ("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
667 }
668
669 ResetPolicy(9); // num_probes = 6
670 for (int key = 0; key < 2087; key++) {
671 Add(Key(key, buffer));
672 }
673 Build();
674 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
675 EXPECT_EQ(
676 BloomHash(FilterData()),
677 SelectByImpl(SelectByCacheLineSize(2092755149, 661139132, 1182970461),
678 2137566013U));
679 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
680 EXPECT_EQ("156,367,791,872,945,1015,1139,1159,1265,1435", FirstFPs(10));
681 }
682
683 ResetPolicy(11); // num_probes = 7
684 for (int key = 0; key < 2087; key++) {
685 Add(Key(key, buffer));
686 }
687 Build();
688 EXPECT_EQ(GetNumProbesFromFilterData(), 7);
689 EXPECT_EQ(
690 BloomHash(FilterData()),
691 SelectByImpl(SelectByCacheLineSize(3755609649U, 1812694762, 1449142939),
692 2561502687U));
693 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
694 EXPECT_EQ("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
695 }
696
697 // This used to be 9 probes, but 8 is a better choice for speed,
698 // especially with SIMD groups of 8 probes, with essentially no
699 // change in FP rate.
700 // FP rate @ 9 probes, old Bloom: 0.4321%
701 // FP rate @ 9 probes, new Bloom: 0.1846%
702 // FP rate @ 8 probes, new Bloom: 0.1843%
703 ResetPolicy(14); // num_probes = 8 (new), 9 (old)
704 for (int key = 0; key < 2087; key++) {
705 Add(Key(key, buffer));
706 }
707 Build();
708 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
709 SelectByImpl(9, 8));
710 EXPECT_EQ(
711 BloomHash(FilterData()),
712 SelectByImpl(SelectByCacheLineSize(178861123, 379087593, 2574136516U),
713 3709876890U));
714 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
715 EXPECT_EQ("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
716 }
717
718 // This used to be 11 probes, but 9 is a better choice for speed
719 // AND accuracy.
720 // FP rate @ 11 probes, old Bloom: 0.3571%
721 // FP rate @ 11 probes, new Bloom: 0.0884%
722 // FP rate @ 9 probes, new Bloom: 0.0843%
723 ResetPolicy(16); // num_probes = 9 (new), 11 (old)
724 for (int key = 0; key < 2087; key++) {
725 Add(Key(key, buffer));
726 }
727 Build();
728 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
729 SelectByImpl(11, 9));
730 EXPECT_EQ(
731 BloomHash(FilterData()),
732 SelectByImpl(SelectByCacheLineSize(1129406313, 3049154394U, 1727750964),
733 1087138490));
734 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
735 EXPECT_EQ("3299,3611,3916,6620,7822,8079,8482,8942,10167", FirstFPs(9));
736 }
737
738 ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
739 for (int key = 0; key < 2087; key++) {
740 Add(Key(key, buffer));
741 }
742 Build();
743 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
744 EXPECT_EQ(
745 BloomHash(FilterData()),
746 SelectByImpl(SelectByCacheLineSize(1478976371, 2910591341U, 1182970461),
747 2498541272U));
748 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
749 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
750 }
751
752 ResetPolicy(10);
753 for (int key = /*CHANGED*/ 1; key < 2087; key++) {
754 Add(Key(key, buffer));
755 }
756 Build();
757 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
758 EXPECT_EQ(
759 BloomHash(FilterData()),
760 SelectByImpl(SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U),
761 2058382345U));
762 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
763 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
764 }
765
766 ResetPolicy(10);
767 for (int key = 1; key < /*CHANGED*/ 2088; key++) {
768 Add(Key(key, buffer));
769 }
770 Build();
771 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
772 EXPECT_EQ(
773 BloomHash(FilterData()),
774 SelectByImpl(SelectByCacheLineSize(2885052954U, 769447944, 4175124908U),
775 23699164));
776 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
777 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
778 }
779
780 // With new fractional bits_per_key, check that we are rounding to
781 // whole bits per key for old Bloom filters but fractional for
782 // new Bloom filter.
783 ResetPolicy(9.5);
784 for (int key = 1; key < 2088; key++) {
785 Add(Key(key, buffer));
786 }
787 Build();
788 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
789 EXPECT_EQ(BloomHash(FilterData()),
790 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
791 4175124908U),
792 /*CHANGED*/ 3166884174U));
793 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
794 EXPECT_EQ(/*CHANGED*/ "126,156,367,444,458,791,813,976,1015,1035",
795 FirstFPs(10));
796 }
797
798 ResetPolicy(10.499);
799 for (int key = 1; key < 2088; key++) {
800 Add(Key(key, buffer));
801 }
802 Build();
803 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
804 SelectByImpl(6, 7));
805 EXPECT_EQ(BloomHash(FilterData()),
806 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
807 4175124908U),
808 /*CHANGED*/ 4098502778U));
809 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
810 EXPECT_EQ(/*CHANGED*/ "16,236,240,472,1015,1045,1111,1409,1465,1612",
811 FirstFPs(10));
812 }
813
814 ResetPolicy();
815 }
816
817 // A helper class for testing custom or corrupt filter bits as read by
818 // built-in FilterBitsReaders.
819 struct RawFilterTester {
820 // Buffer, from which we always return a tail Slice, so the
821 // last five bytes are always the metadata bytes.
822 std::array<char, 3000> data_;
823 // Points five bytes from the end
824 char* metadata_ptr_;
825
826 RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
827
828 Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
829 uint32_t num_probes) {
830 metadata_ptr_[0] = static_cast<char>(num_probes);
831 EncodeFixed32(metadata_ptr_ + 1, num_lines);
832 uint32_t len = len_without_metadata + /*metadata*/ 5;
833 assert(len <= data_.size());
834 return Slice(metadata_ptr_ - len_without_metadata, len);
835 }
836
837 Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
838 uint32_t num_probes, bool fill_ones) {
839 data_.fill(fill_ones ? 0xff : 0);
840 return ResetNoFill(len_without_metadata, num_lines, num_probes);
841 }
842
843 Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
844 uint32_t num_probes) {
845 for (uint32_t i = 0; i < data_.size(); ++i) {
846 data_[i] = static_cast<char>(0x7b7b >> (i % 7));
847 }
848 return ResetNoFill(len_without_metadata, num_lines, num_probes);
849 }
850 };
851
852 TEST_P(FullBloomTest, RawSchema) {
853 RawFilterTester cft;
854 // Two probes, about 3/4 bits set: ~50% "FP" rate
855 // One 256-byte cache line.
856 OpenRaw(cft.ResetWeirdFill(256, 1, 2));
857 EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
858
859 // Two 128-byte cache lines.
860 OpenRaw(cft.ResetWeirdFill(256, 2, 2));
861 EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
862
863 // Four 64-byte cache lines.
864 OpenRaw(cft.ResetWeirdFill(256, 4, 2));
865 EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
866 }
867
868 TEST_P(FullBloomTest, CorruptFilters) {
869 RawFilterTester cft;
870
871 for (bool fill : {false, true}) {
872 // Good filter bits - returns same as fill
873 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
874 ASSERT_EQ(fill, Matches("hello"));
875 ASSERT_EQ(fill, Matches("world"));
876
877 // Good filter bits - returns same as fill
878 OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
879 ASSERT_EQ(fill, Matches("hello"));
880 ASSERT_EQ(fill, Matches("world"));
881
882 // Good filter bits - returns same as fill
883 // 256 is unusual but legal cache line size
884 OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
885 ASSERT_EQ(fill, Matches("hello"));
886 ASSERT_EQ(fill, Matches("world"));
887
888 // Good filter bits - returns same as fill
889 // 30 should be max num_probes
890 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
891 ASSERT_EQ(fill, Matches("hello"));
892 ASSERT_EQ(fill, Matches("world"));
893
894 // Good filter bits - returns same as fill
895 // 1 should be min num_probes
896 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
897 ASSERT_EQ(fill, Matches("hello"));
898 ASSERT_EQ(fill, Matches("world"));
899
900 // Type 1 trivial filter bits - returns true as if FP by zero probes
901 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
902 ASSERT_TRUE(Matches("hello"));
903 ASSERT_TRUE(Matches("world"));
904
905 // Type 2 trivial filter bits - returns false as if built from zero keys
906 OpenRaw(cft.Reset(0, 0, 6, fill));
907 ASSERT_FALSE(Matches("hello"));
908 ASSERT_FALSE(Matches("world"));
909
910 // Type 2 trivial filter bits - returns false as if built from zero keys
911 OpenRaw(cft.Reset(0, 37, 6, fill));
912 ASSERT_FALSE(Matches("hello"));
913 ASSERT_FALSE(Matches("world"));
914
915 // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
916 OpenRaw(cft.Reset(0, 0, 0, fill));
917 ASSERT_FALSE(Matches("hello"));
918 ASSERT_FALSE(Matches("world"));
919
920 // Bad filter bits - returns true for safety
921 // No solution to 0 * x == CACHE_LINE_SIZE
922 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
923 ASSERT_TRUE(Matches("hello"));
924 ASSERT_TRUE(Matches("world"));
925
926 // Bad filter bits - returns true for safety
927 // Can't have 3 * x == 4 for integer x
928 OpenRaw(cft.Reset(4, 3, 6, fill));
929 ASSERT_TRUE(Matches("hello"));
930 ASSERT_TRUE(Matches("world"));
931
932 // Bad filter bits - returns true for safety
933 // 97 bytes is not a power of two, so not a legal cache line size
934 OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
935 ASSERT_TRUE(Matches("hello"));
936 ASSERT_TRUE(Matches("world"));
937
938 // Bad filter bits - returns true for safety
939 // 65 bytes is not a power of two, so not a legal cache line size
940 OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
941 ASSERT_TRUE(Matches("hello"));
942 ASSERT_TRUE(Matches("world"));
943
944 // Bad filter bits - returns false as if built from zero keys
945 // < 5 bytes overall means missing even metadata
946 OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
947 ASSERT_FALSE(Matches("hello"));
948 ASSERT_FALSE(Matches("world"));
949
950 OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
951 ASSERT_FALSE(Matches("hello"));
952 ASSERT_FALSE(Matches("world"));
953
954 // Dubious filter bits - returns same as fill (for now)
955 // 31 is not a useful num_probes, nor generated by RocksDB unless directly
956 // using filter bits API without BloomFilterPolicy.
957 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
958 ASSERT_EQ(fill, Matches("hello"));
959 ASSERT_EQ(fill, Matches("world"));
960
961 // Dubious filter bits - returns same as fill (for now)
962 // Similar, with 127, largest positive char
963 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
964 ASSERT_EQ(fill, Matches("hello"));
965 ASSERT_EQ(fill, Matches("world"));
966
967 // Dubious filter bits - returns true (for now)
968 // num_probes set to 128 / -128, lowest negative char
969 // NB: Bug in implementation interprets this as negative and has same
970 // effect as zero probes, but effectively reserves negative char values
971 // for future use.
972 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
973 ASSERT_TRUE(Matches("hello"));
974 ASSERT_TRUE(Matches("world"));
975
976 // Dubious filter bits - returns true (for now)
977 // Similar, with 255 / -1
978 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
979 ASSERT_TRUE(Matches("hello"));
980 ASSERT_TRUE(Matches("world"));
981 }
982 }
983
984 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
985 testing::Values(BloomFilterPolicy::kLegacyBloom,
986 BloomFilterPolicy::kFastLocalBloom,
987 BloomFilterPolicy::kStandard128Ribbon));
988
989 } // namespace ROCKSDB_NAMESPACE
990
991 int main(int argc, char** argv) {
992 ::testing::InitGoogleTest(&argc, argv);
993 ParseCommandLineFlags(&argc, &argv, true);
994
995 return RUN_ALL_TESTS();
996 }
997
998 #endif // GFLAGS