]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/bloom_test.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rocksdb / util / bloom_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef GFLAGS
11 #include <cstdio>
12 int main() {
13 fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14 return 0;
15 }
16 #else
17
18 #include <array>
19 #include <cmath>
20 #include <vector>
21
22 #include "cache/cache_entry_roles.h"
23 #include "cache/cache_reservation_manager.h"
24 #include "memory/arena.h"
25 #include "port/jemalloc_helper.h"
26 #include "rocksdb/filter_policy.h"
27 #include "table/block_based/filter_policy_internal.h"
28 #include "test_util/testharness.h"
29 #include "test_util/testutil.h"
30 #include "util/gflags_compat.h"
31 #include "util/hash.h"
32
33 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
34
35 // The test is not fully designed for bits_per_key other than 10, but with
36 // this parameter you can easily explore the behavior of other bits_per_key.
37 // See also filter_bench.
38 DEFINE_int32(bits_per_key, 10, "");
39
40 namespace ROCKSDB_NAMESPACE {
41
42 namespace {
43 const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
44 const std::string kFastLocalBloom =
45 test::FastLocalBloomFilterPolicy::kClassName();
46 const std::string kStandard128Ribbon =
47 test::Standard128RibbonFilterPolicy::kClassName();
48 } // namespace
49
50 static const int kVerbose = 1;
51
52 static Slice Key(int i, char* buffer) {
53 std::string s;
54 PutFixed32(&s, static_cast<uint32_t>(i));
55 memcpy(buffer, s.c_str(), sizeof(i));
56 return Slice(buffer, sizeof(i));
57 }
58
59 static int NextLength(int length) {
60 if (length < 10) {
61 length += 1;
62 } else if (length < 100) {
63 length += 10;
64 } else if (length < 1000) {
65 length += 100;
66 } else {
67 length += 1000;
68 }
69 return length;
70 }
71
72 class FullBloomTest : public testing::TestWithParam<std::string> {
73 protected:
74 BlockBasedTableOptions table_options_;
75
76 private:
77 std::shared_ptr<const FilterPolicy>& policy_;
78 std::unique_ptr<FilterBitsBuilder> bits_builder_;
79 std::unique_ptr<FilterBitsReader> bits_reader_;
80 std::unique_ptr<const char[]> buf_;
81 size_t filter_size_;
82
83 public:
84 FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
85 ResetPolicy();
86 }
87
88 BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
89 // Throws on bad cast
90 return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get());
91 }
92
93 const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
94 // Throws on bad cast
95 return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
96 }
97
98 void Reset() {
99 bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
100 FilterBuildingContext(table_options_)));
101 bits_reader_.reset(nullptr);
102 buf_.reset(nullptr);
103 filter_size_ = 0;
104 }
105
106 void ResetPolicy(double bits_per_key) {
107 policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
108 Reset();
109 }
110
111 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
112
113 void Add(const Slice& s) { bits_builder_->AddKey(s); }
114
115 void OpenRaw(const Slice& s) {
116 bits_reader_.reset(policy_->GetFilterBitsReader(s));
117 }
118
119 void Build() {
120 Slice filter = bits_builder_->Finish(&buf_);
121 bits_reader_.reset(policy_->GetFilterBitsReader(filter));
122 filter_size_ = filter.size();
123 }
124
125 size_t FilterSize() const { return filter_size_; }
126
127 Slice FilterData() { return Slice(buf_.get(), filter_size_); }
128
129 int GetNumProbesFromFilterData() {
130 assert(filter_size_ >= 5);
131 int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
132 if (raw_num_probes == -1) { // New bloom filter marker
133 return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
134 } else {
135 return raw_num_probes;
136 }
137 }
138
139 int GetRibbonSeedFromFilterData() {
140 assert(filter_size_ >= 5);
141 // Check for ribbon marker
142 assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
143 return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
144 }
145
146 bool Matches(const Slice& s) {
147 if (bits_reader_ == nullptr) {
148 Build();
149 }
150 return bits_reader_->MayMatch(s);
151 }
152
153 // Provides a kind of fingerprint on the Bloom filter's
154 // behavior, for reasonbly high FP rates.
155 uint64_t PackedMatches() {
156 char buffer[sizeof(int)];
157 uint64_t result = 0;
158 for (int i = 0; i < 64; i++) {
159 if (Matches(Key(i + 12345, buffer))) {
160 result |= uint64_t{1} << i;
161 }
162 }
163 return result;
164 }
165
166 // Provides a kind of fingerprint on the Bloom filter's
167 // behavior, for lower FP rates.
168 std::string FirstFPs(int count) {
169 char buffer[sizeof(int)];
170 std::string rv;
171 int fp_count = 0;
172 for (int i = 0; i < 1000000; i++) {
173 // Pack four match booleans into each hexadecimal digit
174 if (Matches(Key(i + 1000000, buffer))) {
175 ++fp_count;
176 rv += std::to_string(i);
177 if (fp_count == count) {
178 break;
179 }
180 rv += ',';
181 }
182 }
183 return rv;
184 }
185
186 double FalsePositiveRate() {
187 char buffer[sizeof(int)];
188 int result = 0;
189 for (int i = 0; i < 10000; i++) {
190 if (Matches(Key(i + 1000000000, buffer))) {
191 result++;
192 }
193 }
194 return result / 10000.0;
195 }
196 };
197
198 TEST_P(FullBloomTest, FilterSize) {
199 // In addition to checking the consistency of space computation, we are
200 // checking that denoted and computed doubles are interpreted as expected
201 // as bits_per_key values.
202 bool some_computed_less_than_denoted = false;
203 // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded
204 // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1
205 // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key
206 // (100000 millibits).
207 for (auto bpk : std::vector<std::pair<double, int> >{{-HUGE_VAL, 0},
208 {-INFINITY, 0},
209 {0.0, 0},
210 {0.499, 0},
211 {0.5, 1000},
212 {1.234, 1234},
213 {3.456, 3456},
214 {9.5, 9500},
215 {10.0, 10000},
216 {10.499, 10499},
217 {21.345, 21345},
218 {99.999, 99999},
219 {1234.0, 100000},
220 {HUGE_VAL, 100000},
221 {INFINITY, 100000},
222 {NAN, 100000}}) {
223 ResetPolicy(bpk.first);
224 auto bfp = GetBloomLikeFilterPolicy();
225 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
226 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
227
228 double computed = bpk.first;
229 // This transforms e.g. 9.5 -> 9.499999999999998, which we still
230 // round to 10 for whole bits per key.
231 computed += 0.5;
232 computed /= 1234567.0;
233 computed *= 1234567.0;
234 computed -= 0.5;
235 some_computed_less_than_denoted |= (computed < bpk.first);
236 ResetPolicy(computed);
237 bfp = GetBloomLikeFilterPolicy();
238 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
239 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
240
241 auto bits_builder = GetBuiltinFilterBitsBuilder();
242 if (bpk.second == 0) {
243 ASSERT_EQ(bits_builder, nullptr);
244 continue;
245 }
246
247 size_t n = 1;
248 size_t space = 0;
249 for (; n < 1000000; n += 1 + n / 1000) {
250 // Ensure consistency between CalculateSpace and ApproximateNumEntries
251 space = bits_builder->CalculateSpace(n);
252 size_t n2 = bits_builder->ApproximateNumEntries(space);
253 EXPECT_GE(n2, n);
254 size_t space2 = bits_builder->CalculateSpace(n2);
255 if (n > 12000 && GetParam() == kStandard128Ribbon) {
256 // TODO(peterd): better approximation?
257 EXPECT_GE(space2, space);
258 EXPECT_LE(space2 * 0.998, space * 1.0);
259 } else {
260 EXPECT_EQ(space2, space);
261 }
262 }
263 // Until size_t overflow
264 for (; n < (n + n / 3); n += n / 3) {
265 // Ensure space computation is not overflowing; capped is OK
266 size_t space2 = bits_builder->CalculateSpace(n);
267 EXPECT_GE(space2, space);
268 space = space2;
269 }
270 }
271 // Check that the compiler hasn't optimized our computation into nothing
272 EXPECT_TRUE(some_computed_less_than_denoted);
273 ResetPolicy();
274 }
275
276 TEST_P(FullBloomTest, FullEmptyFilter) {
277 // Empty filter is not match, at this level
278 ASSERT_TRUE(!Matches("hello"));
279 ASSERT_TRUE(!Matches("world"));
280 }
281
282 TEST_P(FullBloomTest, FullSmall) {
283 Add("hello");
284 Add("world");
285 ASSERT_TRUE(Matches("hello"));
286 ASSERT_TRUE(Matches("world"));
287 ASSERT_TRUE(!Matches("x"));
288 ASSERT_TRUE(!Matches("foo"));
289 }
290
291 TEST_P(FullBloomTest, FullVaryingLengths) {
292 char buffer[sizeof(int)];
293
294 // Count number of filters that significantly exceed the false positive rate
295 int mediocre_filters = 0;
296 int good_filters = 0;
297
298 for (int length = 1; length <= 10000; length = NextLength(length)) {
299 Reset();
300 for (int i = 0; i < length; i++) {
301 Add(Key(i, buffer));
302 }
303 Build();
304
305 EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
306 CACHE_LINE_SIZE * 2 + 5));
307
308 // All added keys must match
309 for (int i = 0; i < length; i++) {
310 ASSERT_TRUE(Matches(Key(i, buffer)))
311 << "Length " << length << "; key " << i;
312 }
313
314 // Check false positive rate
315 double rate = FalsePositiveRate();
316 if (kVerbose >= 1) {
317 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
318 rate * 100.0, length, static_cast<int>(FilterSize()));
319 }
320 if (FLAGS_bits_per_key == 10) {
321 EXPECT_LE(rate, 0.02); // Must not be over 2%
322 if (rate > 0.0125) {
323 mediocre_filters++; // Allowed, but not too often
324 } else {
325 good_filters++;
326 }
327 }
328 }
329 if (kVerbose >= 1) {
330 fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
331 mediocre_filters);
332 }
333 EXPECT_LE(mediocre_filters, good_filters / 5);
334 }
335
336 TEST_P(FullBloomTest, OptimizeForMemory) {
337 char buffer[sizeof(int)];
338 for (bool offm : {true, false}) {
339 table_options_.optimize_filters_for_memory = offm;
340 ResetPolicy();
341 Random32 rnd(12345);
342 uint64_t total_size = 0;
343 uint64_t total_mem = 0;
344 int64_t total_keys = 0;
345 double total_fp_rate = 0;
346 constexpr int nfilters = 100;
347 for (int i = 0; i < nfilters; ++i) {
348 int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
349 Reset();
350 for (int j = 0; j < nkeys; ++j) {
351 Add(Key(j, buffer));
352 }
353 Build();
354 size_t size = FilterData().size();
355 total_size += size;
356 // optimize_filters_for_memory currently depends on malloc_usable_size
357 // but we run the rest of the test to ensure no bad behavior without it.
358 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
359 size = malloc_usable_size(const_cast<char*>(FilterData().data()));
360 #endif // ROCKSDB_MALLOC_USABLE_SIZE
361 total_mem += size;
362 total_keys += nkeys;
363 total_fp_rate += FalsePositiveRate();
364 }
365 if (FLAGS_bits_per_key == 10) {
366 EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
367 EXPECT_GE(total_fp_rate / double{nfilters},
368 CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008);
369 }
370
371 int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
372 if (GetParam() == kStandard128Ribbon) {
373 // ~ 30% savings vs. Bloom filter
374 ex_min_total_size = 7 * ex_min_total_size / 10;
375 }
376 EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
377
378 int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
379 if (GetParam() == kLegacyBloom) {
380 // this config can add extra cache line to make odd number
381 blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
382 }
383
384 EXPECT_GE(total_mem, total_size);
385
386 // optimize_filters_for_memory not implemented with legacy Bloom
387 if (offm && GetParam() != kLegacyBloom) {
388 // This value can include a small extra penalty for kExtraPadding
389 fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
390 (total_mem - total_size) * 100.0 / total_size);
391 // Less than 1% internal fragmentation
392 EXPECT_LE(total_mem, total_size * 101 / 100);
393 // Up to 2% storage penalty
394 EXPECT_LE(static_cast<int64_t>(total_size),
395 ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
396 } else {
397 fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
398 (total_mem - total_size) * 100.0 / total_size);
399 // TODO: add control checks for more allocators?
400 #ifdef ROCKSDB_JEMALLOC
401 fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
402 if (HasJemalloc()) {
403 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
404 // More than 5% internal fragmentation
405 EXPECT_GE(total_mem, total_size * 105 / 100);
406 #endif // ROCKSDB_MALLOC_USABLE_SIZE
407 }
408 #endif // ROCKSDB_JEMALLOC
409 // No storage penalty, just usual overhead
410 EXPECT_LE(static_cast<int64_t>(total_size),
411 ex_min_total_size + blocked_bloom_overhead);
412 }
413 }
414 }
415
416 class ChargeFilterConstructionTest : public testing::Test {};
417 TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) {
418 constexpr std::size_t kCacheCapacity =
419 8 * CacheReservationManagerImpl<
420 CacheEntryRole::kFilterConstruction>::GetDummyEntrySize();
421 constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8;
422
423 for (CacheEntryRoleOptions::Decision charge_filter_construction_mem :
424 {CacheEntryRoleOptions::Decision::kEnabled,
425 CacheEntryRoleOptions::Decision::kDisabled}) {
426 bool will_fall_back = charge_filter_construction_mem ==
427 CacheEntryRoleOptions::Decision::kEnabled;
428
429 BlockBasedTableOptions table_options;
430 table_options.cache_usage_options.options_overrides.insert(
431 {CacheEntryRole::kFilterConstruction,
432 {/*.charged = */ charge_filter_construction_mem}});
433 LRUCacheOptions lo;
434 lo.capacity = kCacheCapacity;
435 lo.num_shard_bits = 0; // 2^0 shard
436 lo.strict_capacity_limit = true;
437 std::shared_ptr<Cache> cache(NewLRUCache(lo));
438 table_options.block_cache = cache;
439 table_options.filter_policy =
440 BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
441 FilterBuildingContext ctx(table_options);
442 std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
443 table_options.filter_policy->GetBuilderWithContext(ctx));
444
445 char key_buffer[sizeof(int)];
446 for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) {
447 filter_bits_builder->AddKey(Key(static_cast<int>(i), key_buffer));
448 }
449
450 std::unique_ptr<const char[]> buf;
451 Slice filter = filter_bits_builder->Finish(&buf);
452
453 // To verify Ribbon Filter fallbacks to Bloom Filter properly
454 // based on cache charging result
455 // See BloomFilterPolicy::GetBloomBitsReader re: metadata
456 // -1 = Marker for newer Bloom implementations
457 // -2 = Marker for Standard128 Ribbon
458 if (will_fall_back) {
459 EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-1));
460 } else {
461 EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-2));
462 }
463
464 if (charge_filter_construction_mem ==
465 CacheEntryRoleOptions::Decision::kEnabled) {
466 const size_t dummy_entry_num = static_cast<std::size_t>(std::ceil(
467 filter.size() * 1.0 /
468 CacheReservationManagerImpl<
469 CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()));
470 EXPECT_GE(
471 cache->GetPinnedUsage(),
472 dummy_entry_num *
473 CacheReservationManagerImpl<
474 CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
475 EXPECT_LT(
476 cache->GetPinnedUsage(),
477 (dummy_entry_num + 1) *
478 CacheReservationManagerImpl<
479 CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
480 } else {
481 EXPECT_EQ(cache->GetPinnedUsage(), 0);
482 }
483 }
484 }
485
486 namespace {
487 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
488 uint32_t for256) {
489 (void)for64;
490 (void)for128;
491 (void)for256;
492 #if CACHE_LINE_SIZE == 64
493 return for64;
494 #elif CACHE_LINE_SIZE == 128
495 return for128;
496 #elif CACHE_LINE_SIZE == 256
497 return for256;
498 #else
499 #error "CACHE_LINE_SIZE unknown or unrecognized"
500 #endif
501 }
502 } // namespace
503
504 // Ensure the implementation doesn't accidentally change in an
505 // incompatible way. This test doesn't check the reading side
506 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
507 // ability to read filters generated using other cache line sizes.
508 // See RawSchema.
509 TEST_P(FullBloomTest, Schema) {
510 #define EXPECT_EQ_Bloom(a, b) \
511 { \
512 if (GetParam() != kStandard128Ribbon) { \
513 EXPECT_EQ(a, b); \
514 } \
515 }
516 #define EXPECT_EQ_Ribbon(a, b) \
517 { \
518 if (GetParam() == kStandard128Ribbon) { \
519 EXPECT_EQ(a, b); \
520 } \
521 }
522 #define EXPECT_EQ_FastBloom(a, b) \
523 { \
524 if (GetParam() == kFastLocalBloom) { \
525 EXPECT_EQ(a, b); \
526 } \
527 }
528 #define EXPECT_EQ_LegacyBloom(a, b) \
529 { \
530 if (GetParam() == kLegacyBloom) { \
531 EXPECT_EQ(a, b); \
532 } \
533 }
534 #define EXPECT_EQ_NotLegacy(a, b) \
535 { \
536 if (GetParam() != kLegacyBloom) { \
537 EXPECT_EQ(a, b); \
538 } \
539 }
540
541 char buffer[sizeof(int)];
542
543 // First do a small number of keys, where Ribbon config will fall back on
544 // fast Bloom filter and generate the same data
545 ResetPolicy(5); // num_probes = 3
546 for (int key = 0; key < 87; key++) {
547 Add(Key(key, buffer));
548 }
549 Build();
550 EXPECT_EQ(GetNumProbesFromFilterData(), 3);
551
552 EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
553
554 EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
555
556 // Now use enough keys so that changing bits / key by 1 is guaranteed to
557 // change number of allocated cache lines. So keys > max cache line bits.
558
559 // Note that the first attempted Ribbon seed is determined by the hash
560 // of the first key added (for pseudorandomness in practice, determinism in
561 // testing)
562
563 ResetPolicy(2); // num_probes = 1
564 for (int key = 0; key < 2087; key++) {
565 Add(Key(key, buffer));
566 }
567 Build();
568 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
569 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
570
571 EXPECT_EQ_LegacyBloom(
572 BloomHash(FilterData()),
573 SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
574 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
575 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U);
576
577 EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
578 EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
579
580 ResetPolicy(3); // num_probes = 2
581 for (int key = 0; key < 2087; key++) {
582 Add(Key(key, buffer));
583 }
584 Build();
585 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
586 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
587
588 EXPECT_EQ_LegacyBloom(
589 BloomHash(FilterData()),
590 SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
591 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
592 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U);
593
594 EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
595 EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
596
597 ResetPolicy(5); // num_probes = 3
598 for (int key = 0; key < 2087; key++) {
599 Add(Key(key, buffer));
600 }
601 Build();
602 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
603 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
604
605 EXPECT_EQ_LegacyBloom(
606 BloomHash(FilterData()),
607 SelectByCacheLineSize(515748486, 94611728, 2436112214U));
608 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U);
609 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
610
611 EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
612 EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
613
614 ResetPolicy(8); // num_probes = 5
615 for (int key = 0; key < 2087; key++) {
616 Add(Key(key, buffer));
617 }
618 Build();
619 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
620 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
621
622 EXPECT_EQ_LegacyBloom(
623 BloomHash(FilterData()),
624 SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
625 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U);
626 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
627
628 EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
629 EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
630
631 ResetPolicy(9); // num_probes = 6
632 for (int key = 0; key < 2087; key++) {
633 Add(Key(key, buffer));
634 }
635 Build();
636 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
637 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
638
639 EXPECT_EQ_LegacyBloom(
640 BloomHash(FilterData()),
641 SelectByCacheLineSize(2092755149, 661139132, 1182970461));
642 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
643 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U);
644
645 EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
646 EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
647
648 ResetPolicy(11); // num_probes = 7
649 for (int key = 0; key < 2087; key++) {
650 Add(Key(key, buffer));
651 }
652 Build();
653 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
654 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
655
656 EXPECT_EQ_LegacyBloom(
657 BloomHash(FilterData()),
658 SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
659 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
660 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
661
662 EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
663 EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
664
665 // This used to be 9 probes, but 8 is a better choice for speed,
666 // especially with SIMD groups of 8 probes, with essentially no
667 // change in FP rate.
668 // FP rate @ 9 probes, old Bloom: 0.4321%
669 // FP rate @ 9 probes, new Bloom: 0.1846%
670 // FP rate @ 8 probes, new Bloom: 0.1843%
671 ResetPolicy(14); // num_probes = 8 (new), 9 (old)
672 for (int key = 0; key < 2087; key++) {
673 Add(Key(key, buffer));
674 }
675 Build();
676 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
677 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
678 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
679
680 EXPECT_EQ_LegacyBloom(
681 BloomHash(FilterData()),
682 SelectByCacheLineSize(178861123, 379087593, 2574136516U));
683 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
684 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U);
685
686 EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
687 EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
688
689 // This used to be 11 probes, but 9 is a better choice for speed
690 // AND accuracy.
691 // FP rate @ 11 probes, old Bloom: 0.3571%
692 // FP rate @ 11 probes, new Bloom: 0.0884%
693 // FP rate @ 9 probes, new Bloom: 0.0843%
694 ResetPolicy(16); // num_probes = 9 (new), 11 (old)
695 for (int key = 0; key < 2087; key++) {
696 Add(Key(key, buffer));
697 }
698 Build();
699 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
700 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
701 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
702
703 EXPECT_EQ_LegacyBloom(
704 BloomHash(FilterData()),
705 SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
706 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U);
707 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U);
708
709 EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
710 EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
711
712 ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
713 for (int key = 0; key < 2087; key++) {
714 Add(Key(key, buffer));
715 }
716 Build();
717 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
718 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
719
720 EXPECT_EQ_LegacyBloom(
721 BloomHash(FilterData()),
722 SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
723 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
724 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U);
725
726 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
727 EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
728
729 ResetPolicy(10);
730 for (int key = /*CHANGED*/ 1; key < 2087; key++) {
731 Add(Key(key, buffer));
732 }
733 Build();
734 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
735 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
736
737 EXPECT_EQ_LegacyBloom(
738 BloomHash(FilterData()),
739 SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
740 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
741 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
742
743 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
744 EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
745
746 ResetPolicy(10);
747 for (int key = 1; key < /*CHANGED*/ 2088; key++) {
748 Add(Key(key, buffer));
749 }
750 Build();
751 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
752 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
753
754 EXPECT_EQ_LegacyBloom(
755 BloomHash(FilterData()),
756 SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
757 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U);
758 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U);
759
760 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
761 EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
762
763 // With new fractional bits_per_key, check that we are rounding to
764 // whole bits per key for old Bloom filters but fractional for
765 // new Bloom filter.
766 ResetPolicy(9.5);
767 for (int key = 1; key < 2088; key++) {
768 Add(Key(key, buffer));
769 }
770 Build();
771 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
772 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
773
774 EXPECT_EQ_LegacyBloom(
775 BloomHash(FilterData()),
776 /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
777 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
778 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U);
779
780 EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
781 EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
782
783 ResetPolicy(10.499);
784 for (int key = 1; key < 2088; key++) {
785 Add(Key(key, buffer));
786 }
787 Build();
788 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
789 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
790 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
791
792 EXPECT_EQ_LegacyBloom(
793 BloomHash(FilterData()),
794 /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
795 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
796 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U);
797
798 EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
799 EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
800
801 ResetPolicy();
802 }
803
804 // A helper class for testing custom or corrupt filter bits as read by
805 // built-in FilterBitsReaders.
806 struct RawFilterTester {
807 // Buffer, from which we always return a tail Slice, so the
808 // last five bytes are always the metadata bytes.
809 std::array<char, 3000> data_;
810 // Points five bytes from the end
811 char* metadata_ptr_;
812
813 RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
814
815 Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
816 uint32_t num_probes) {
817 metadata_ptr_[0] = static_cast<char>(num_probes);
818 EncodeFixed32(metadata_ptr_ + 1, num_lines);
819 uint32_t len = len_without_metadata + /*metadata*/ 5;
820 assert(len <= data_.size());
821 return Slice(metadata_ptr_ - len_without_metadata, len);
822 }
823
824 Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
825 uint32_t num_probes, bool fill_ones) {
826 data_.fill(fill_ones ? 0xff : 0);
827 return ResetNoFill(len_without_metadata, num_lines, num_probes);
828 }
829
830 Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
831 uint32_t num_probes) {
832 for (uint32_t i = 0; i < data_.size(); ++i) {
833 data_[i] = static_cast<char>(0x7b7b >> (i % 7));
834 }
835 return ResetNoFill(len_without_metadata, num_lines, num_probes);
836 }
837 };
838
839 TEST_P(FullBloomTest, RawSchema) {
840 RawFilterTester cft;
841 // Legacy Bloom configurations
842 // Two probes, about 3/4 bits set: ~50% "FP" rate
843 // One 256-byte cache line.
844 OpenRaw(cft.ResetWeirdFill(256, 1, 2));
845 EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
846
847 // Two 128-byte cache lines.
848 OpenRaw(cft.ResetWeirdFill(256, 2, 2));
849 EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
850
851 // Four 64-byte cache lines.
852 OpenRaw(cft.ResetWeirdFill(256, 4, 2));
853 EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
854
855 // Fast local Bloom configurations (marker 255 -> -1)
856 // Two probes, about 3/4 bits set: ~50% "FP" rate
857 // Four 64-byte cache lines.
858 OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
859 EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
860
861 // Ribbon configurations (marker 254 -> -2)
862
863 // Even though the builder never builds configurations this
864 // small (preferring Bloom), we can test that the configuration
865 // can be read, for possible future-proofing.
866
867 // 256 slots, one result column = 32 bytes (2 blocks, seed 0)
868 // ~50% FP rate:
869 // 0b0101010111110101010000110000011011011111100100001110010011101010
870 OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
871 EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
872
873 // 256 slots, three-to-four result columns = 112 bytes
874 // ~ 1 in 10 FP rate:
875 // 0b0000000000100000000000000000000001000001000000010000101000000000
876 OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
877 EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
878 }
879
880 TEST_P(FullBloomTest, CorruptFilters) {
881 RawFilterTester cft;
882
883 for (bool fill : {false, true}) {
884 // Legacy Bloom configurations
885 // Good filter bits - returns same as fill
886 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
887 ASSERT_EQ(fill, Matches("hello"));
888 ASSERT_EQ(fill, Matches("world"));
889
890 // Good filter bits - returns same as fill
891 OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
892 ASSERT_EQ(fill, Matches("hello"));
893 ASSERT_EQ(fill, Matches("world"));
894
895 // Good filter bits - returns same as fill
896 // 256 is unusual but legal cache line size
897 OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
898 ASSERT_EQ(fill, Matches("hello"));
899 ASSERT_EQ(fill, Matches("world"));
900
901 // Good filter bits - returns same as fill
902 // 30 should be max num_probes
903 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
904 ASSERT_EQ(fill, Matches("hello"));
905 ASSERT_EQ(fill, Matches("world"));
906
907 // Good filter bits - returns same as fill
908 // 1 should be min num_probes
909 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
910 ASSERT_EQ(fill, Matches("hello"));
911 ASSERT_EQ(fill, Matches("world"));
912
913 // Type 1 trivial filter bits - returns true as if FP by zero probes
914 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
915 ASSERT_TRUE(Matches("hello"));
916 ASSERT_TRUE(Matches("world"));
917
918 // Type 2 trivial filter bits - returns false as if built from zero keys
919 OpenRaw(cft.Reset(0, 0, 6, fill));
920 ASSERT_FALSE(Matches("hello"));
921 ASSERT_FALSE(Matches("world"));
922
923 // Type 2 trivial filter bits - returns false as if built from zero keys
924 OpenRaw(cft.Reset(0, 37, 6, fill));
925 ASSERT_FALSE(Matches("hello"));
926 ASSERT_FALSE(Matches("world"));
927
928 // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
929 OpenRaw(cft.Reset(0, 0, 0, fill));
930 ASSERT_FALSE(Matches("hello"));
931 ASSERT_FALSE(Matches("world"));
932
933 // Bad filter bits - returns true for safety
934 // No solution to 0 * x == CACHE_LINE_SIZE
935 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
936 ASSERT_TRUE(Matches("hello"));
937 ASSERT_TRUE(Matches("world"));
938
939 // Bad filter bits - returns true for safety
940 // Can't have 3 * x == 4 for integer x
941 OpenRaw(cft.Reset(4, 3, 6, fill));
942 ASSERT_TRUE(Matches("hello"));
943 ASSERT_TRUE(Matches("world"));
944
945 // Bad filter bits - returns true for safety
946 // 97 bytes is not a power of two, so not a legal cache line size
947 OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
948 ASSERT_TRUE(Matches("hello"));
949 ASSERT_TRUE(Matches("world"));
950
951 // Bad filter bits - returns true for safety
952 // 65 bytes is not a power of two, so not a legal cache line size
953 OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
954 ASSERT_TRUE(Matches("hello"));
955 ASSERT_TRUE(Matches("world"));
956
957 // Bad filter bits - returns false as if built from zero keys
958 // < 5 bytes overall means missing even metadata
959 OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
960 ASSERT_FALSE(Matches("hello"));
961 ASSERT_FALSE(Matches("world"));
962
963 OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
964 ASSERT_FALSE(Matches("hello"));
965 ASSERT_FALSE(Matches("world"));
966
967 // Dubious filter bits - returns same as fill (for now)
968 // 31 is not a useful num_probes, nor generated by RocksDB unless directly
969 // using filter bits API without BloomFilterPolicy.
970 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
971 ASSERT_EQ(fill, Matches("hello"));
972 ASSERT_EQ(fill, Matches("world"));
973
974 // Dubious filter bits - returns same as fill (for now)
975 // Similar, with 127, largest positive char
976 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
977 ASSERT_EQ(fill, Matches("hello"));
978 ASSERT_EQ(fill, Matches("world"));
979
980 // Dubious filter bits - returns true (for now)
981 // num_probes set to 128 / -128, lowest negative char
982 // NB: Bug in implementation interprets this as negative and has same
983 // effect as zero probes, but effectively reserves negative char values
984 // for future use.
985 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
986 ASSERT_TRUE(Matches("hello"));
987 ASSERT_TRUE(Matches("world"));
988
989 // Dubious filter bits - returns true (for now)
990 // Similar, with 253 / -3
991 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
992 ASSERT_TRUE(Matches("hello"));
993 ASSERT_TRUE(Matches("world"));
994
995 // #########################################################
996 // Fast local Bloom configurations (marker 255 -> -1)
997 // Good config with six probes
998 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
999 ASSERT_EQ(fill, Matches("hello"));
1000 ASSERT_EQ(fill, Matches("world"));
1001
1002 // Becomes bad/reserved config (always true) if any other byte set
1003 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
1004 ASSERT_TRUE(Matches("hello"));
1005 ASSERT_TRUE(Matches("world"));
1006
1007 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
1008 ASSERT_TRUE(Matches("hello"));
1009 ASSERT_TRUE(Matches("world"));
1010
1011 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
1012 ASSERT_TRUE(Matches("hello"));
1013 ASSERT_TRUE(Matches("world"));
1014
1015 // Good config, max 30 probes
1016 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
1017 ASSERT_EQ(fill, Matches("hello"));
1018 ASSERT_EQ(fill, Matches("world"));
1019
1020 // Bad/reserved config (always true) if more than 30
1021 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
1022 ASSERT_TRUE(Matches("hello"));
1023 ASSERT_TRUE(Matches("world"));
1024
1025 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
1026 ASSERT_TRUE(Matches("hello"));
1027 ASSERT_TRUE(Matches("world"));
1028
1029 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
1030 ASSERT_TRUE(Matches("hello"));
1031 ASSERT_TRUE(Matches("world"));
1032
1033 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
1034 ASSERT_TRUE(Matches("hello"));
1035 ASSERT_TRUE(Matches("world"));
1036 }
1037
1038 // #########################################################
1039 // Ribbon configurations (marker 254 -> -2)
1040 // ("fill" doesn't work to detect good configurations, we just
1041 // have to rely on TN probability)
1042
1043 // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
1044 // seed = 123
1045 OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
1046 ASSERT_FALSE(Matches("hello"));
1047 ASSERT_FALSE(Matches("world"));
1048
1049 // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
1050 OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
1051 ASSERT_FALSE(Matches("hello"));
1052 ASSERT_FALSE(Matches("world"));
1053
1054 // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
1055 // -> average close to 0 columns
1056 OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
1057 // *Almost* all FPs
1058 ASSERT_TRUE(Matches("hello"));
1059 ASSERT_TRUE(Matches("world"));
1060 // Need many queries to find a "true negative"
1061 for (int i = 0; Matches(std::to_string(i)); ++i) {
1062 ASSERT_LT(i, 1000);
1063 }
1064
1065 // Bad: 1 block not allowed (for implementation detail reasons)
1066 OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
1067 ASSERT_TRUE(Matches("hello"));
1068 ASSERT_TRUE(Matches("world"));
1069
1070 // Bad: 0 blocks not allowed
1071 OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
1072 ASSERT_TRUE(Matches("hello"));
1073 ASSERT_TRUE(Matches("world"));
1074 }
1075
1076 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
1077 testing::Values(kLegacyBloom, kFastLocalBloom,
1078 kStandard128Ribbon));
1079
1080 static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
1081 union {
1082 uint64_t key_value = 0;
1083 char key_bytes[8];
1084 };
1085
1086 const unsigned kNumKeys = 1000;
1087
1088 Slice key_slice{key_bytes, 8};
1089 for (key_value = 0; key_value < kNumKeys; ++key_value) {
1090 builder->AddKey(key_slice);
1091 }
1092
1093 std::unique_ptr<const char[]> buf;
1094 auto filter = builder->Finish(&buf);
1095 return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys);
1096 }
1097
1098 static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) {
1099 if (levelish == -1) {
1100 // Flush is treated as level -1 for this option but actually level 0
1101 ctx->level_at_creation = 0;
1102 ctx->reason = TableFileCreationReason::kFlush;
1103 } else {
1104 ctx->level_at_creation = levelish;
1105 ctx->reason = TableFileCreationReason::kCompaction;
1106 }
1107 }
1108
1109 TEST(RibbonTest, RibbonTestLevelThreshold) {
1110 BlockBasedTableOptions opts;
1111 FilterBuildingContext ctx(opts);
1112 // A few settings
1113 for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal,
1114 kCompactionStyleFIFO, kCompactionStyleNone}) {
1115 ctx.compaction_style = cs;
1116 for (int bloom_before_level : {-1, 0, 1, 10}) {
1117 std::vector<std::unique_ptr<const FilterPolicy> > policies;
1118 policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level));
1119
1120 if (bloom_before_level == 0) {
1121 // Also test new API default
1122 policies.emplace_back(NewRibbonFilterPolicy(10));
1123 }
1124
1125 for (std::unique_ptr<const FilterPolicy>& policy : policies) {
1126 // Claim to be generating filter for this level
1127 SetTestingLevel(bloom_before_level, &ctx);
1128
1129 std::unique_ptr<FilterBitsBuilder> builder{
1130 policy->GetBuilderWithContext(ctx)};
1131
1132 // Must be Ribbon (more space efficient than 10 bits per key)
1133 ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
1134
1135 if (bloom_before_level >= 0) {
1136 // Claim to be generating filter for previous level
1137 SetTestingLevel(bloom_before_level - 1, &ctx);
1138
1139 builder.reset(policy->GetBuilderWithContext(ctx));
1140
1141 if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) {
1142 // Level is considered.
1143 // Must be Bloom (~ 10 bits per key)
1144 ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
1145 } else {
1146 // Level is ignored under non-traditional compaction styles.
1147 // Must be Ribbon (more space efficient than 10 bits per key)
1148 ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
1149 }
1150 }
1151
1152 // Like SST file writer
1153 ctx.level_at_creation = -1;
1154 ctx.reason = TableFileCreationReason::kMisc;
1155
1156 builder.reset(policy->GetBuilderWithContext(ctx));
1157
1158 // Must be Ribbon (more space efficient than 10 bits per key)
1159 ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
1160 }
1161 }
1162 }
1163 }
1164
1165 } // namespace ROCKSDB_NAMESPACE
1166
1167 int main(int argc, char** argv) {
1168 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1169 ::testing::InitGoogleTest(&argc, argv);
1170 ParseCommandLineFlags(&argc, &argv, true);
1171
1172 return RUN_ALL_TESTS();
1173 }
1174
1175 #endif // GFLAGS