]>
Commit | Line | Data |
---|---|---|
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | // | |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #ifndef GFLAGS | |
11 | #include <cstdio> | |
12 | int main() { | |
13 | fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); | |
14 | return 0; | |
15 | } | |
16 | #else | |
17 | ||
18 | #include <array> | |
19 | #include <cmath> | |
20 | #include <vector> | |
21 | ||
22 | #include "cache/cache_entry_roles.h" | |
23 | #include "cache/cache_reservation_manager.h" | |
24 | #include "memory/arena.h" | |
25 | #include "port/jemalloc_helper.h" | |
26 | #include "rocksdb/filter_policy.h" | |
27 | #include "table/block_based/filter_policy_internal.h" | |
28 | #include "test_util/testharness.h" | |
29 | #include "test_util/testutil.h" | |
30 | #include "util/gflags_compat.h" | |
31 | #include "util/hash.h" | |
32 | ||
33 | using GFLAGS_NAMESPACE::ParseCommandLineFlags; | |
34 | ||
35 | // The test is not fully designed for bits_per_key other than 10, but with | |
36 | // this parameter you can easily explore the behavior of other bits_per_key. | |
37 | // See also filter_bench. | |
38 | DEFINE_int32(bits_per_key, 10, ""); | |
39 | ||
40 | namespace ROCKSDB_NAMESPACE { | |
41 | ||
42 | namespace { | |
43 | const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); | |
44 | const std::string kFastLocalBloom = | |
45 | test::FastLocalBloomFilterPolicy::kClassName(); | |
46 | const std::string kStandard128Ribbon = | |
47 | test::Standard128RibbonFilterPolicy::kClassName(); | |
48 | } // namespace | |
49 | ||
50 | static const int kVerbose = 1; | |
51 | ||
52 | static Slice Key(int i, char* buffer) { | |
53 | std::string s; | |
54 | PutFixed32(&s, static_cast<uint32_t>(i)); | |
55 | memcpy(buffer, s.c_str(), sizeof(i)); | |
56 | return Slice(buffer, sizeof(i)); | |
57 | } | |
58 | ||
59 | static int NextLength(int length) { | |
60 | if (length < 10) { | |
61 | length += 1; | |
62 | } else if (length < 100) { | |
63 | length += 10; | |
64 | } else if (length < 1000) { | |
65 | length += 100; | |
66 | } else { | |
67 | length += 1000; | |
68 | } | |
69 | return length; | |
70 | } | |
71 | ||
72 | class FullBloomTest : public testing::TestWithParam<std::string> { | |
73 | protected: | |
74 | BlockBasedTableOptions table_options_; | |
75 | ||
76 | private: | |
77 | std::shared_ptr<const FilterPolicy>& policy_; | |
78 | std::unique_ptr<FilterBitsBuilder> bits_builder_; | |
79 | std::unique_ptr<FilterBitsReader> bits_reader_; | |
80 | std::unique_ptr<const char[]> buf_; | |
81 | size_t filter_size_; | |
82 | ||
83 | public: | |
84 | FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) { | |
85 | ResetPolicy(); | |
86 | } | |
87 | ||
88 | BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() { | |
89 | // Throws on bad cast | |
90 | return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get()); | |
91 | } | |
92 | ||
93 | const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() { | |
94 | // Throws on bad cast | |
95 | return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_); | |
96 | } | |
97 | ||
98 | void Reset() { | |
99 | bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext( | |
100 | FilterBuildingContext(table_options_))); | |
101 | bits_reader_.reset(nullptr); | |
102 | buf_.reset(nullptr); | |
103 | filter_size_ = 0; | |
104 | } | |
105 | ||
106 | void ResetPolicy(double bits_per_key) { | |
107 | policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key); | |
108 | Reset(); | |
109 | } | |
110 | ||
111 | void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); } | |
112 | ||
113 | void Add(const Slice& s) { bits_builder_->AddKey(s); } | |
114 | ||
115 | void OpenRaw(const Slice& s) { | |
116 | bits_reader_.reset(policy_->GetFilterBitsReader(s)); | |
117 | } | |
118 | ||
119 | void Build() { | |
120 | Slice filter = bits_builder_->Finish(&buf_); | |
121 | bits_reader_.reset(policy_->GetFilterBitsReader(filter)); | |
122 | filter_size_ = filter.size(); | |
123 | } | |
124 | ||
125 | size_t FilterSize() const { return filter_size_; } | |
126 | ||
127 | Slice FilterData() { return Slice(buf_.get(), filter_size_); } | |
128 | ||
129 | int GetNumProbesFromFilterData() { | |
130 | assert(filter_size_ >= 5); | |
131 | int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]); | |
132 | if (raw_num_probes == -1) { // New bloom filter marker | |
133 | return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]); | |
134 | } else { | |
135 | return raw_num_probes; | |
136 | } | |
137 | } | |
138 | ||
139 | int GetRibbonSeedFromFilterData() { | |
140 | assert(filter_size_ >= 5); | |
141 | // Check for ribbon marker | |
142 | assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5])); | |
143 | return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]); | |
144 | } | |
145 | ||
146 | bool Matches(const Slice& s) { | |
147 | if (bits_reader_ == nullptr) { | |
148 | Build(); | |
149 | } | |
150 | return bits_reader_->MayMatch(s); | |
151 | } | |
152 | ||
153 | // Provides a kind of fingerprint on the Bloom filter's | |
154 | // behavior, for reasonbly high FP rates. | |
155 | uint64_t PackedMatches() { | |
156 | char buffer[sizeof(int)]; | |
157 | uint64_t result = 0; | |
158 | for (int i = 0; i < 64; i++) { | |
159 | if (Matches(Key(i + 12345, buffer))) { | |
160 | result |= uint64_t{1} << i; | |
161 | } | |
162 | } | |
163 | return result; | |
164 | } | |
165 | ||
166 | // Provides a kind of fingerprint on the Bloom filter's | |
167 | // behavior, for lower FP rates. | |
168 | std::string FirstFPs(int count) { | |
169 | char buffer[sizeof(int)]; | |
170 | std::string rv; | |
171 | int fp_count = 0; | |
172 | for (int i = 0; i < 1000000; i++) { | |
173 | // Pack four match booleans into each hexadecimal digit | |
174 | if (Matches(Key(i + 1000000, buffer))) { | |
175 | ++fp_count; | |
176 | rv += std::to_string(i); | |
177 | if (fp_count == count) { | |
178 | break; | |
179 | } | |
180 | rv += ','; | |
181 | } | |
182 | } | |
183 | return rv; | |
184 | } | |
185 | ||
186 | double FalsePositiveRate() { | |
187 | char buffer[sizeof(int)]; | |
188 | int result = 0; | |
189 | for (int i = 0; i < 10000; i++) { | |
190 | if (Matches(Key(i + 1000000000, buffer))) { | |
191 | result++; | |
192 | } | |
193 | } | |
194 | return result / 10000.0; | |
195 | } | |
196 | }; | |
197 | ||
198 | TEST_P(FullBloomTest, FilterSize) { | |
199 | // In addition to checking the consistency of space computation, we are | |
200 | // checking that denoted and computed doubles are interpreted as expected | |
201 | // as bits_per_key values. | |
202 | bool some_computed_less_than_denoted = false; | |
203 | // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded | |
204 | // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1 | |
205 | // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key | |
206 | // (100000 millibits). | |
207 | for (auto bpk : std::vector<std::pair<double, int> >{{-HUGE_VAL, 0}, | |
208 | {-INFINITY, 0}, | |
209 | {0.0, 0}, | |
210 | {0.499, 0}, | |
211 | {0.5, 1000}, | |
212 | {1.234, 1234}, | |
213 | {3.456, 3456}, | |
214 | {9.5, 9500}, | |
215 | {10.0, 10000}, | |
216 | {10.499, 10499}, | |
217 | {21.345, 21345}, | |
218 | {99.999, 99999}, | |
219 | {1234.0, 100000}, | |
220 | {HUGE_VAL, 100000}, | |
221 | {INFINITY, 100000}, | |
222 | {NAN, 100000}}) { | |
223 | ResetPolicy(bpk.first); | |
224 | auto bfp = GetBloomLikeFilterPolicy(); | |
225 | EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); | |
226 | EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); | |
227 | ||
228 | double computed = bpk.first; | |
229 | // This transforms e.g. 9.5 -> 9.499999999999998, which we still | |
230 | // round to 10 for whole bits per key. | |
231 | computed += 0.5; | |
232 | computed /= 1234567.0; | |
233 | computed *= 1234567.0; | |
234 | computed -= 0.5; | |
235 | some_computed_less_than_denoted |= (computed < bpk.first); | |
236 | ResetPolicy(computed); | |
237 | bfp = GetBloomLikeFilterPolicy(); | |
238 | EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); | |
239 | EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); | |
240 | ||
241 | auto bits_builder = GetBuiltinFilterBitsBuilder(); | |
242 | if (bpk.second == 0) { | |
243 | ASSERT_EQ(bits_builder, nullptr); | |
244 | continue; | |
245 | } | |
246 | ||
247 | size_t n = 1; | |
248 | size_t space = 0; | |
249 | for (; n < 1000000; n += 1 + n / 1000) { | |
250 | // Ensure consistency between CalculateSpace and ApproximateNumEntries | |
251 | space = bits_builder->CalculateSpace(n); | |
252 | size_t n2 = bits_builder->ApproximateNumEntries(space); | |
253 | EXPECT_GE(n2, n); | |
254 | size_t space2 = bits_builder->CalculateSpace(n2); | |
255 | if (n > 12000 && GetParam() == kStandard128Ribbon) { | |
256 | // TODO(peterd): better approximation? | |
257 | EXPECT_GE(space2, space); | |
258 | EXPECT_LE(space2 * 0.998, space * 1.0); | |
259 | } else { | |
260 | EXPECT_EQ(space2, space); | |
261 | } | |
262 | } | |
263 | // Until size_t overflow | |
264 | for (; n < (n + n / 3); n += n / 3) { | |
265 | // Ensure space computation is not overflowing; capped is OK | |
266 | size_t space2 = bits_builder->CalculateSpace(n); | |
267 | EXPECT_GE(space2, space); | |
268 | space = space2; | |
269 | } | |
270 | } | |
271 | // Check that the compiler hasn't optimized our computation into nothing | |
272 | EXPECT_TRUE(some_computed_less_than_denoted); | |
273 | ResetPolicy(); | |
274 | } | |
275 | ||
276 | TEST_P(FullBloomTest, FullEmptyFilter) { | |
277 | // Empty filter is not match, at this level | |
278 | ASSERT_TRUE(!Matches("hello")); | |
279 | ASSERT_TRUE(!Matches("world")); | |
280 | } | |
281 | ||
282 | TEST_P(FullBloomTest, FullSmall) { | |
283 | Add("hello"); | |
284 | Add("world"); | |
285 | ASSERT_TRUE(Matches("hello")); | |
286 | ASSERT_TRUE(Matches("world")); | |
287 | ASSERT_TRUE(!Matches("x")); | |
288 | ASSERT_TRUE(!Matches("foo")); | |
289 | } | |
290 | ||
291 | TEST_P(FullBloomTest, FullVaryingLengths) { | |
292 | char buffer[sizeof(int)]; | |
293 | ||
294 | // Count number of filters that significantly exceed the false positive rate | |
295 | int mediocre_filters = 0; | |
296 | int good_filters = 0; | |
297 | ||
298 | for (int length = 1; length <= 10000; length = NextLength(length)) { | |
299 | Reset(); | |
300 | for (int i = 0; i < length; i++) { | |
301 | Add(Key(i, buffer)); | |
302 | } | |
303 | Build(); | |
304 | ||
305 | EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + | |
306 | CACHE_LINE_SIZE * 2 + 5)); | |
307 | ||
308 | // All added keys must match | |
309 | for (int i = 0; i < length; i++) { | |
310 | ASSERT_TRUE(Matches(Key(i, buffer))) | |
311 | << "Length " << length << "; key " << i; | |
312 | } | |
313 | ||
314 | // Check false positive rate | |
315 | double rate = FalsePositiveRate(); | |
316 | if (kVerbose >= 1) { | |
317 | fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", | |
318 | rate * 100.0, length, static_cast<int>(FilterSize())); | |
319 | } | |
320 | if (FLAGS_bits_per_key == 10) { | |
321 | EXPECT_LE(rate, 0.02); // Must not be over 2% | |
322 | if (rate > 0.0125) { | |
323 | mediocre_filters++; // Allowed, but not too often | |
324 | } else { | |
325 | good_filters++; | |
326 | } | |
327 | } | |
328 | } | |
329 | if (kVerbose >= 1) { | |
330 | fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters, | |
331 | mediocre_filters); | |
332 | } | |
333 | EXPECT_LE(mediocre_filters, good_filters / 5); | |
334 | } | |
335 | ||
336 | TEST_P(FullBloomTest, OptimizeForMemory) { | |
337 | char buffer[sizeof(int)]; | |
338 | for (bool offm : {true, false}) { | |
339 | table_options_.optimize_filters_for_memory = offm; | |
340 | ResetPolicy(); | |
341 | Random32 rnd(12345); | |
342 | uint64_t total_size = 0; | |
343 | uint64_t total_mem = 0; | |
344 | int64_t total_keys = 0; | |
345 | double total_fp_rate = 0; | |
346 | constexpr int nfilters = 100; | |
347 | for (int i = 0; i < nfilters; ++i) { | |
348 | int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100; | |
349 | Reset(); | |
350 | for (int j = 0; j < nkeys; ++j) { | |
351 | Add(Key(j, buffer)); | |
352 | } | |
353 | Build(); | |
354 | size_t size = FilterData().size(); | |
355 | total_size += size; | |
356 | // optimize_filters_for_memory currently depends on malloc_usable_size | |
357 | // but we run the rest of the test to ensure no bad behavior without it. | |
358 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
359 | size = malloc_usable_size(const_cast<char*>(FilterData().data())); | |
360 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
361 | total_mem += size; | |
362 | total_keys += nkeys; | |
363 | total_fp_rate += FalsePositiveRate(); | |
364 | } | |
365 | if (FLAGS_bits_per_key == 10) { | |
366 | EXPECT_LE(total_fp_rate / double{nfilters}, 0.011); | |
367 | EXPECT_GE(total_fp_rate / double{nfilters}, | |
368 | CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008); | |
369 | } | |
370 | ||
371 | int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8; | |
372 | if (GetParam() == kStandard128Ribbon) { | |
373 | // ~ 30% savings vs. Bloom filter | |
374 | ex_min_total_size = 7 * ex_min_total_size / 10; | |
375 | } | |
376 | EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size); | |
377 | ||
378 | int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5); | |
379 | if (GetParam() == kLegacyBloom) { | |
380 | // this config can add extra cache line to make odd number | |
381 | blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE; | |
382 | } | |
383 | ||
384 | EXPECT_GE(total_mem, total_size); | |
385 | ||
386 | // optimize_filters_for_memory not implemented with legacy Bloom | |
387 | if (offm && GetParam() != kLegacyBloom) { | |
388 | // This value can include a small extra penalty for kExtraPadding | |
389 | fprintf(stderr, "Internal fragmentation (optimized): %g%%\n", | |
390 | (total_mem - total_size) * 100.0 / total_size); | |
391 | // Less than 1% internal fragmentation | |
392 | EXPECT_LE(total_mem, total_size * 101 / 100); | |
393 | // Up to 2% storage penalty | |
394 | EXPECT_LE(static_cast<int64_t>(total_size), | |
395 | ex_min_total_size * 102 / 100 + blocked_bloom_overhead); | |
396 | } else { | |
397 | fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n", | |
398 | (total_mem - total_size) * 100.0 / total_size); | |
399 | // TODO: add control checks for more allocators? | |
400 | #ifdef ROCKSDB_JEMALLOC | |
401 | fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc()); | |
402 | if (HasJemalloc()) { | |
403 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
404 | // More than 5% internal fragmentation | |
405 | EXPECT_GE(total_mem, total_size * 105 / 100); | |
406 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
407 | } | |
408 | #endif // ROCKSDB_JEMALLOC | |
409 | // No storage penalty, just usual overhead | |
410 | EXPECT_LE(static_cast<int64_t>(total_size), | |
411 | ex_min_total_size + blocked_bloom_overhead); | |
412 | } | |
413 | } | |
414 | } | |
415 | ||
416 | class ChargeFilterConstructionTest : public testing::Test {}; | |
417 | TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) { | |
418 | constexpr std::size_t kCacheCapacity = | |
419 | 8 * CacheReservationManagerImpl< | |
420 | CacheEntryRole::kFilterConstruction>::GetDummyEntrySize(); | |
421 | constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8; | |
422 | ||
423 | for (CacheEntryRoleOptions::Decision charge_filter_construction_mem : | |
424 | {CacheEntryRoleOptions::Decision::kEnabled, | |
425 | CacheEntryRoleOptions::Decision::kDisabled}) { | |
426 | bool will_fall_back = charge_filter_construction_mem == | |
427 | CacheEntryRoleOptions::Decision::kEnabled; | |
428 | ||
429 | BlockBasedTableOptions table_options; | |
430 | table_options.cache_usage_options.options_overrides.insert( | |
431 | {CacheEntryRole::kFilterConstruction, | |
432 | {/*.charged = */ charge_filter_construction_mem}}); | |
433 | LRUCacheOptions lo; | |
434 | lo.capacity = kCacheCapacity; | |
435 | lo.num_shard_bits = 0; // 2^0 shard | |
436 | lo.strict_capacity_limit = true; | |
437 | std::shared_ptr<Cache> cache(NewLRUCache(lo)); | |
438 | table_options.block_cache = cache; | |
439 | table_options.filter_policy = | |
440 | BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key); | |
441 | FilterBuildingContext ctx(table_options); | |
442 | std::unique_ptr<FilterBitsBuilder> filter_bits_builder( | |
443 | table_options.filter_policy->GetBuilderWithContext(ctx)); | |
444 | ||
445 | char key_buffer[sizeof(int)]; | |
446 | for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) { | |
447 | filter_bits_builder->AddKey(Key(static_cast<int>(i), key_buffer)); | |
448 | } | |
449 | ||
450 | std::unique_ptr<const char[]> buf; | |
451 | Slice filter = filter_bits_builder->Finish(&buf); | |
452 | ||
453 | // To verify Ribbon Filter fallbacks to Bloom Filter properly | |
454 | // based on cache charging result | |
455 | // See BloomFilterPolicy::GetBloomBitsReader re: metadata | |
456 | // -1 = Marker for newer Bloom implementations | |
457 | // -2 = Marker for Standard128 Ribbon | |
458 | if (will_fall_back) { | |
459 | EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-1)); | |
460 | } else { | |
461 | EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-2)); | |
462 | } | |
463 | ||
464 | if (charge_filter_construction_mem == | |
465 | CacheEntryRoleOptions::Decision::kEnabled) { | |
466 | const size_t dummy_entry_num = static_cast<std::size_t>(std::ceil( | |
467 | filter.size() * 1.0 / | |
468 | CacheReservationManagerImpl< | |
469 | CacheEntryRole::kFilterConstruction>::GetDummyEntrySize())); | |
470 | EXPECT_GE( | |
471 | cache->GetPinnedUsage(), | |
472 | dummy_entry_num * | |
473 | CacheReservationManagerImpl< | |
474 | CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()); | |
475 | EXPECT_LT( | |
476 | cache->GetPinnedUsage(), | |
477 | (dummy_entry_num + 1) * | |
478 | CacheReservationManagerImpl< | |
479 | CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()); | |
480 | } else { | |
481 | EXPECT_EQ(cache->GetPinnedUsage(), 0); | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | namespace { | |
487 | inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128, | |
488 | uint32_t for256) { | |
489 | (void)for64; | |
490 | (void)for128; | |
491 | (void)for256; | |
492 | #if CACHE_LINE_SIZE == 64 | |
493 | return for64; | |
494 | #elif CACHE_LINE_SIZE == 128 | |
495 | return for128; | |
496 | #elif CACHE_LINE_SIZE == 256 | |
497 | return for256; | |
498 | #else | |
499 | #error "CACHE_LINE_SIZE unknown or unrecognized" | |
500 | #endif | |
501 | } | |
502 | } // namespace | |
503 | ||
504 | // Ensure the implementation doesn't accidentally change in an | |
505 | // incompatible way. This test doesn't check the reading side | |
506 | // (FirstFPs/PackedMatches) for LegacyBloom because it requires the | |
507 | // ability to read filters generated using other cache line sizes. | |
508 | // See RawSchema. | |
509 | TEST_P(FullBloomTest, Schema) { | |
510 | #define EXPECT_EQ_Bloom(a, b) \ | |
511 | { \ | |
512 | if (GetParam() != kStandard128Ribbon) { \ | |
513 | EXPECT_EQ(a, b); \ | |
514 | } \ | |
515 | } | |
516 | #define EXPECT_EQ_Ribbon(a, b) \ | |
517 | { \ | |
518 | if (GetParam() == kStandard128Ribbon) { \ | |
519 | EXPECT_EQ(a, b); \ | |
520 | } \ | |
521 | } | |
522 | #define EXPECT_EQ_FastBloom(a, b) \ | |
523 | { \ | |
524 | if (GetParam() == kFastLocalBloom) { \ | |
525 | EXPECT_EQ(a, b); \ | |
526 | } \ | |
527 | } | |
528 | #define EXPECT_EQ_LegacyBloom(a, b) \ | |
529 | { \ | |
530 | if (GetParam() == kLegacyBloom) { \ | |
531 | EXPECT_EQ(a, b); \ | |
532 | } \ | |
533 | } | |
534 | #define EXPECT_EQ_NotLegacy(a, b) \ | |
535 | { \ | |
536 | if (GetParam() != kLegacyBloom) { \ | |
537 | EXPECT_EQ(a, b); \ | |
538 | } \ | |
539 | } | |
540 | ||
541 | char buffer[sizeof(int)]; | |
542 | ||
543 | // First do a small number of keys, where Ribbon config will fall back on | |
544 | // fast Bloom filter and generate the same data | |
545 | ResetPolicy(5); // num_probes = 3 | |
546 | for (int key = 0; key < 87; key++) { | |
547 | Add(Key(key, buffer)); | |
548 | } | |
549 | Build(); | |
550 | EXPECT_EQ(GetNumProbesFromFilterData(), 3); | |
551 | ||
552 | EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U); | |
553 | ||
554 | EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10)); | |
555 | ||
556 | // Now use enough keys so that changing bits / key by 1 is guaranteed to | |
557 | // change number of allocated cache lines. So keys > max cache line bits. | |
558 | ||
559 | // Note that the first attempted Ribbon seed is determined by the hash | |
560 | // of the first key added (for pseudorandomness in practice, determinism in | |
561 | // testing) | |
562 | ||
563 | ResetPolicy(2); // num_probes = 1 | |
564 | for (int key = 0; key < 2087; key++) { | |
565 | Add(Key(key, buffer)); | |
566 | } | |
567 | Build(); | |
568 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1); | |
569 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
570 | ||
571 | EXPECT_EQ_LegacyBloom( | |
572 | BloomHash(FilterData()), | |
573 | SelectByCacheLineSize(1567096579, 1964771444, 2659542661U)); | |
574 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U); | |
575 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U); | |
576 | ||
577 | EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10)); | |
578 | EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10)); | |
579 | ||
580 | ResetPolicy(3); // num_probes = 2 | |
581 | for (int key = 0; key < 2087; key++) { | |
582 | Add(Key(key, buffer)); | |
583 | } | |
584 | Build(); | |
585 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2); | |
586 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
587 | ||
588 | EXPECT_EQ_LegacyBloom( | |
589 | BloomHash(FilterData()), | |
590 | SelectByCacheLineSize(2707206547U, 2571983456U, 218344685)); | |
591 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U); | |
592 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U); | |
593 | ||
594 | EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10)); | |
595 | EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10)); | |
596 | ||
597 | ResetPolicy(5); // num_probes = 3 | |
598 | for (int key = 0; key < 2087; key++) { | |
599 | Add(Key(key, buffer)); | |
600 | } | |
601 | Build(); | |
602 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3); | |
603 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
604 | ||
605 | EXPECT_EQ_LegacyBloom( | |
606 | BloomHash(FilterData()), | |
607 | SelectByCacheLineSize(515748486, 94611728, 2436112214U)); | |
608 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U); | |
609 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U); | |
610 | ||
611 | EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10)); | |
612 | EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10)); | |
613 | ||
614 | ResetPolicy(8); // num_probes = 5 | |
615 | for (int key = 0; key < 2087; key++) { | |
616 | Add(Key(key, buffer)); | |
617 | } | |
618 | Build(); | |
619 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5); | |
620 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
621 | ||
622 | EXPECT_EQ_LegacyBloom( | |
623 | BloomHash(FilterData()), | |
624 | SelectByCacheLineSize(1302145999, 2811644657U, 756553699)); | |
625 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U); | |
626 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U); | |
627 | ||
628 | EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10)); | |
629 | EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10)); | |
630 | ||
631 | ResetPolicy(9); // num_probes = 6 | |
632 | for (int key = 0; key < 2087; key++) { | |
633 | Add(Key(key, buffer)); | |
634 | } | |
635 | Build(); | |
636 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); | |
637 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
638 | ||
639 | EXPECT_EQ_LegacyBloom( | |
640 | BloomHash(FilterData()), | |
641 | SelectByCacheLineSize(2092755149, 661139132, 1182970461)); | |
642 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U); | |
643 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U); | |
644 | ||
645 | EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9)); | |
646 | EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10)); | |
647 | ||
648 | ResetPolicy(11); // num_probes = 7 | |
649 | for (int key = 0; key < 2087; key++) { | |
650 | Add(Key(key, buffer)); | |
651 | } | |
652 | Build(); | |
653 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7); | |
654 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
655 | ||
656 | EXPECT_EQ_LegacyBloom( | |
657 | BloomHash(FilterData()), | |
658 | SelectByCacheLineSize(3755609649U, 1812694762, 1449142939)); | |
659 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U); | |
660 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U); | |
661 | ||
662 | EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10)); | |
663 | EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10)); | |
664 | ||
665 | // This used to be 9 probes, but 8 is a better choice for speed, | |
666 | // especially with SIMD groups of 8 probes, with essentially no | |
667 | // change in FP rate. | |
668 | // FP rate @ 9 probes, old Bloom: 0.4321% | |
669 | // FP rate @ 9 probes, new Bloom: 0.1846% | |
670 | // FP rate @ 8 probes, new Bloom: 0.1843% | |
671 | ResetPolicy(14); // num_probes = 8 (new), 9 (old) | |
672 | for (int key = 0; key < 2087; key++) { | |
673 | Add(Key(key, buffer)); | |
674 | } | |
675 | Build(); | |
676 | EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9); | |
677 | EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8); | |
678 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
679 | ||
680 | EXPECT_EQ_LegacyBloom( | |
681 | BloomHash(FilterData()), | |
682 | SelectByCacheLineSize(178861123, 379087593, 2574136516U)); | |
683 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U); | |
684 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U); | |
685 | ||
686 | EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9)); | |
687 | EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9)); | |
688 | ||
689 | // This used to be 11 probes, but 9 is a better choice for speed | |
690 | // AND accuracy. | |
691 | // FP rate @ 11 probes, old Bloom: 0.3571% | |
692 | // FP rate @ 11 probes, new Bloom: 0.0884% | |
693 | // FP rate @ 9 probes, new Bloom: 0.0843% | |
694 | ResetPolicy(16); // num_probes = 9 (new), 11 (old) | |
695 | for (int key = 0; key < 2087; key++) { | |
696 | Add(Key(key, buffer)); | |
697 | } | |
698 | Build(); | |
699 | EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11); | |
700 | EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9); | |
701 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
702 | ||
703 | EXPECT_EQ_LegacyBloom( | |
704 | BloomHash(FilterData()), | |
705 | SelectByCacheLineSize(1129406313, 3049154394U, 1727750964)); | |
706 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U); | |
707 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U); | |
708 | ||
709 | EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8)); | |
710 | EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9)); | |
711 | ||
712 | ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9 | |
713 | for (int key = 0; key < 2087; key++) { | |
714 | Add(Key(key, buffer)); | |
715 | } | |
716 | Build(); | |
717 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); | |
718 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); | |
719 | ||
720 | EXPECT_EQ_LegacyBloom( | |
721 | BloomHash(FilterData()), | |
722 | SelectByCacheLineSize(1478976371, 2910591341U, 1182970461)); | |
723 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U); | |
724 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U); | |
725 | ||
726 | EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); | |
727 | EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10)); | |
728 | ||
729 | ResetPolicy(10); | |
730 | for (int key = /*CHANGED*/ 1; key < 2087; key++) { | |
731 | Add(Key(key, buffer)); | |
732 | } | |
733 | Build(); | |
734 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); | |
735 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184); | |
736 | ||
737 | EXPECT_EQ_LegacyBloom( | |
738 | BloomHash(FilterData()), | |
739 | SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U)); | |
740 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U); | |
741 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U); | |
742 | ||
743 | EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); | |
744 | EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10)); | |
745 | ||
746 | ResetPolicy(10); | |
747 | for (int key = 1; key < /*CHANGED*/ 2088; key++) { | |
748 | Add(Key(key, buffer)); | |
749 | } | |
750 | Build(); | |
751 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); | |
752 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); | |
753 | ||
754 | EXPECT_EQ_LegacyBloom( | |
755 | BloomHash(FilterData()), | |
756 | SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); | |
757 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U); | |
758 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U); | |
759 | ||
760 | EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); | |
761 | EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10)); | |
762 | ||
763 | // With new fractional bits_per_key, check that we are rounding to | |
764 | // whole bits per key for old Bloom filters but fractional for | |
765 | // new Bloom filter. | |
766 | ResetPolicy(9.5); | |
767 | for (int key = 1; key < 2088; key++) { | |
768 | Add(Key(key, buffer)); | |
769 | } | |
770 | Build(); | |
771 | EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); | |
772 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); | |
773 | ||
774 | EXPECT_EQ_LegacyBloom( | |
775 | BloomHash(FilterData()), | |
776 | /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); | |
777 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U); | |
778 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U); | |
779 | ||
780 | EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9)); | |
781 | EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10)); | |
782 | ||
783 | ResetPolicy(10.499); | |
784 | for (int key = 1; key < 2088; key++) { | |
785 | Add(Key(key, buffer)); | |
786 | } | |
787 | Build(); | |
788 | EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6); | |
789 | EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7); | |
790 | EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); | |
791 | ||
792 | EXPECT_EQ_LegacyBloom( | |
793 | BloomHash(FilterData()), | |
794 | /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); | |
795 | EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U); | |
796 | EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U); | |
797 | ||
798 | EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9)); | |
799 | EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10)); | |
800 | ||
801 | ResetPolicy(); | |
802 | } | |
803 | ||
804 | // A helper class for testing custom or corrupt filter bits as read by | |
805 | // built-in FilterBitsReaders. | |
806 | struct RawFilterTester { | |
807 | // Buffer, from which we always return a tail Slice, so the | |
808 | // last five bytes are always the metadata bytes. | |
809 | std::array<char, 3000> data_; | |
810 | // Points five bytes from the end | |
811 | char* metadata_ptr_; | |
812 | ||
813 | RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {} | |
814 | ||
815 | Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines, | |
816 | uint32_t num_probes) { | |
817 | metadata_ptr_[0] = static_cast<char>(num_probes); | |
818 | EncodeFixed32(metadata_ptr_ + 1, num_lines); | |
819 | uint32_t len = len_without_metadata + /*metadata*/ 5; | |
820 | assert(len <= data_.size()); | |
821 | return Slice(metadata_ptr_ - len_without_metadata, len); | |
822 | } | |
823 | ||
824 | Slice Reset(uint32_t len_without_metadata, uint32_t num_lines, | |
825 | uint32_t num_probes, bool fill_ones) { | |
826 | data_.fill(fill_ones ? 0xff : 0); | |
827 | return ResetNoFill(len_without_metadata, num_lines, num_probes); | |
828 | } | |
829 | ||
830 | Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines, | |
831 | uint32_t num_probes) { | |
832 | for (uint32_t i = 0; i < data_.size(); ++i) { | |
833 | data_[i] = static_cast<char>(0x7b7b >> (i % 7)); | |
834 | } | |
835 | return ResetNoFill(len_without_metadata, num_lines, num_probes); | |
836 | } | |
837 | }; | |
838 | ||
839 | TEST_P(FullBloomTest, RawSchema) { | |
840 | RawFilterTester cft; | |
841 | // Legacy Bloom configurations | |
842 | // Two probes, about 3/4 bits set: ~50% "FP" rate | |
843 | // One 256-byte cache line. | |
844 | OpenRaw(cft.ResetWeirdFill(256, 1, 2)); | |
845 | EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches()); | |
846 | ||
847 | // Two 128-byte cache lines. | |
848 | OpenRaw(cft.ResetWeirdFill(256, 2, 2)); | |
849 | EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches()); | |
850 | ||
851 | // Four 64-byte cache lines. | |
852 | OpenRaw(cft.ResetWeirdFill(256, 4, 2)); | |
853 | EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches()); | |
854 | ||
855 | // Fast local Bloom configurations (marker 255 -> -1) | |
856 | // Two probes, about 3/4 bits set: ~50% "FP" rate | |
857 | // Four 64-byte cache lines. | |
858 | OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255)); | |
859 | EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches()); | |
860 | ||
861 | // Ribbon configurations (marker 254 -> -2) | |
862 | ||
863 | // Even though the builder never builds configurations this | |
864 | // small (preferring Bloom), we can test that the configuration | |
865 | // can be read, for possible future-proofing. | |
866 | ||
867 | // 256 slots, one result column = 32 bytes (2 blocks, seed 0) | |
868 | // ~50% FP rate: | |
869 | // 0b0101010111110101010000110000011011011111100100001110010011101010 | |
870 | OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254)); | |
871 | EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches()); | |
872 | ||
873 | // 256 slots, three-to-four result columns = 112 bytes | |
874 | // ~ 1 in 10 FP rate: | |
875 | // 0b0000000000100000000000000000000001000001000000010000101000000000 | |
876 | OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254)); | |
877 | EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches()); | |
878 | } | |
879 | ||
880 | TEST_P(FullBloomTest, CorruptFilters) { | |
881 | RawFilterTester cft; | |
882 | ||
883 | for (bool fill : {false, true}) { | |
884 | // Legacy Bloom configurations | |
885 | // Good filter bits - returns same as fill | |
886 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill)); | |
887 | ASSERT_EQ(fill, Matches("hello")); | |
888 | ASSERT_EQ(fill, Matches("world")); | |
889 | ||
890 | // Good filter bits - returns same as fill | |
891 | OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill)); | |
892 | ASSERT_EQ(fill, Matches("hello")); | |
893 | ASSERT_EQ(fill, Matches("world")); | |
894 | ||
895 | // Good filter bits - returns same as fill | |
896 | // 256 is unusual but legal cache line size | |
897 | OpenRaw(cft.Reset(256 * 3, 3, 6, fill)); | |
898 | ASSERT_EQ(fill, Matches("hello")); | |
899 | ASSERT_EQ(fill, Matches("world")); | |
900 | ||
901 | // Good filter bits - returns same as fill | |
902 | // 30 should be max num_probes | |
903 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill)); | |
904 | ASSERT_EQ(fill, Matches("hello")); | |
905 | ASSERT_EQ(fill, Matches("world")); | |
906 | ||
907 | // Good filter bits - returns same as fill | |
908 | // 1 should be min num_probes | |
909 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill)); | |
910 | ASSERT_EQ(fill, Matches("hello")); | |
911 | ASSERT_EQ(fill, Matches("world")); | |
912 | ||
913 | // Type 1 trivial filter bits - returns true as if FP by zero probes | |
914 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill)); | |
915 | ASSERT_TRUE(Matches("hello")); | |
916 | ASSERT_TRUE(Matches("world")); | |
917 | ||
918 | // Type 2 trivial filter bits - returns false as if built from zero keys | |
919 | OpenRaw(cft.Reset(0, 0, 6, fill)); | |
920 | ASSERT_FALSE(Matches("hello")); | |
921 | ASSERT_FALSE(Matches("world")); | |
922 | ||
923 | // Type 2 trivial filter bits - returns false as if built from zero keys | |
924 | OpenRaw(cft.Reset(0, 37, 6, fill)); | |
925 | ASSERT_FALSE(Matches("hello")); | |
926 | ASSERT_FALSE(Matches("world")); | |
927 | ||
928 | // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes | |
929 | OpenRaw(cft.Reset(0, 0, 0, fill)); | |
930 | ASSERT_FALSE(Matches("hello")); | |
931 | ASSERT_FALSE(Matches("world")); | |
932 | ||
933 | // Bad filter bits - returns true for safety | |
934 | // No solution to 0 * x == CACHE_LINE_SIZE | |
935 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill)); | |
936 | ASSERT_TRUE(Matches("hello")); | |
937 | ASSERT_TRUE(Matches("world")); | |
938 | ||
939 | // Bad filter bits - returns true for safety | |
940 | // Can't have 3 * x == 4 for integer x | |
941 | OpenRaw(cft.Reset(4, 3, 6, fill)); | |
942 | ASSERT_TRUE(Matches("hello")); | |
943 | ASSERT_TRUE(Matches("world")); | |
944 | ||
945 | // Bad filter bits - returns true for safety | |
946 | // 97 bytes is not a power of two, so not a legal cache line size | |
947 | OpenRaw(cft.Reset(97 * 3, 3, 6, fill)); | |
948 | ASSERT_TRUE(Matches("hello")); | |
949 | ASSERT_TRUE(Matches("world")); | |
950 | ||
951 | // Bad filter bits - returns true for safety | |
952 | // 65 bytes is not a power of two, so not a legal cache line size | |
953 | OpenRaw(cft.Reset(65 * 3, 3, 6, fill)); | |
954 | ASSERT_TRUE(Matches("hello")); | |
955 | ASSERT_TRUE(Matches("world")); | |
956 | ||
957 | // Bad filter bits - returns false as if built from zero keys | |
958 | // < 5 bytes overall means missing even metadata | |
959 | OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill)); | |
960 | ASSERT_FALSE(Matches("hello")); | |
961 | ASSERT_FALSE(Matches("world")); | |
962 | ||
963 | OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill)); | |
964 | ASSERT_FALSE(Matches("hello")); | |
965 | ASSERT_FALSE(Matches("world")); | |
966 | ||
967 | // Dubious filter bits - returns same as fill (for now) | |
968 | // 31 is not a useful num_probes, nor generated by RocksDB unless directly | |
969 | // using filter bits API without BloomFilterPolicy. | |
970 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill)); | |
971 | ASSERT_EQ(fill, Matches("hello")); | |
972 | ASSERT_EQ(fill, Matches("world")); | |
973 | ||
974 | // Dubious filter bits - returns same as fill (for now) | |
975 | // Similar, with 127, largest positive char | |
976 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill)); | |
977 | ASSERT_EQ(fill, Matches("hello")); | |
978 | ASSERT_EQ(fill, Matches("world")); | |
979 | ||
980 | // Dubious filter bits - returns true (for now) | |
981 | // num_probes set to 128 / -128, lowest negative char | |
982 | // NB: Bug in implementation interprets this as negative and has same | |
983 | // effect as zero probes, but effectively reserves negative char values | |
984 | // for future use. | |
985 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill)); | |
986 | ASSERT_TRUE(Matches("hello")); | |
987 | ASSERT_TRUE(Matches("world")); | |
988 | ||
989 | // Dubious filter bits - returns true (for now) | |
990 | // Similar, with 253 / -3 | |
991 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill)); | |
992 | ASSERT_TRUE(Matches("hello")); | |
993 | ASSERT_TRUE(Matches("world")); | |
994 | ||
995 | // ######################################################### | |
996 | // Fast local Bloom configurations (marker 255 -> -1) | |
997 | // Good config with six probes | |
998 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill)); | |
999 | ASSERT_EQ(fill, Matches("hello")); | |
1000 | ASSERT_EQ(fill, Matches("world")); | |
1001 | ||
1002 | // Becomes bad/reserved config (always true) if any other byte set | |
1003 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill)); | |
1004 | ASSERT_TRUE(Matches("hello")); | |
1005 | ASSERT_TRUE(Matches("world")); | |
1006 | ||
1007 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill)); | |
1008 | ASSERT_TRUE(Matches("hello")); | |
1009 | ASSERT_TRUE(Matches("world")); | |
1010 | ||
1011 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill)); | |
1012 | ASSERT_TRUE(Matches("hello")); | |
1013 | ASSERT_TRUE(Matches("world")); | |
1014 | ||
1015 | // Good config, max 30 probes | |
1016 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill)); | |
1017 | ASSERT_EQ(fill, Matches("hello")); | |
1018 | ASSERT_EQ(fill, Matches("world")); | |
1019 | ||
1020 | // Bad/reserved config (always true) if more than 30 | |
1021 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill)); | |
1022 | ASSERT_TRUE(Matches("hello")); | |
1023 | ASSERT_TRUE(Matches("world")); | |
1024 | ||
1025 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill)); | |
1026 | ASSERT_TRUE(Matches("hello")); | |
1027 | ASSERT_TRUE(Matches("world")); | |
1028 | ||
1029 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill)); | |
1030 | ASSERT_TRUE(Matches("hello")); | |
1031 | ASSERT_TRUE(Matches("world")); | |
1032 | ||
1033 | OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill)); | |
1034 | ASSERT_TRUE(Matches("hello")); | |
1035 | ASSERT_TRUE(Matches("world")); | |
1036 | } | |
1037 | ||
1038 | // ######################################################### | |
1039 | // Ribbon configurations (marker 254 -> -2) | |
1040 | // ("fill" doesn't work to detect good configurations, we just | |
1041 | // have to rely on TN probability) | |
1042 | ||
1043 | // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes | |
1044 | // seed = 123 | |
1045 | OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false)); | |
1046 | ASSERT_FALSE(Matches("hello")); | |
1047 | ASSERT_FALSE(Matches("world")); | |
1048 | ||
1049 | // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes | |
1050 | OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false)); | |
1051 | ASSERT_FALSE(Matches("hello")); | |
1052 | ASSERT_FALSE(Matches("world")); | |
1053 | ||
1054 | // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits | |
1055 | // -> average close to 0 columns | |
1056 | OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false)); | |
1057 | // *Almost* all FPs | |
1058 | ASSERT_TRUE(Matches("hello")); | |
1059 | ASSERT_TRUE(Matches("world")); | |
1060 | // Need many queries to find a "true negative" | |
1061 | for (int i = 0; Matches(std::to_string(i)); ++i) { | |
1062 | ASSERT_LT(i, 1000); | |
1063 | } | |
1064 | ||
1065 | // Bad: 1 block not allowed (for implementation detail reasons) | |
1066 | OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false)); | |
1067 | ASSERT_TRUE(Matches("hello")); | |
1068 | ASSERT_TRUE(Matches("world")); | |
1069 | ||
1070 | // Bad: 0 blocks not allowed | |
1071 | OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false)); | |
1072 | ASSERT_TRUE(Matches("hello")); | |
1073 | ASSERT_TRUE(Matches("world")); | |
1074 | } | |
1075 | ||
1076 | INSTANTIATE_TEST_CASE_P(Full, FullBloomTest, | |
1077 | testing::Values(kLegacyBloom, kFastLocalBloom, | |
1078 | kStandard128Ribbon)); | |
1079 | ||
1080 | static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) { | |
1081 | union { | |
1082 | uint64_t key_value = 0; | |
1083 | char key_bytes[8]; | |
1084 | }; | |
1085 | ||
1086 | const unsigned kNumKeys = 1000; | |
1087 | ||
1088 | Slice key_slice{key_bytes, 8}; | |
1089 | for (key_value = 0; key_value < kNumKeys; ++key_value) { | |
1090 | builder->AddKey(key_slice); | |
1091 | } | |
1092 | ||
1093 | std::unique_ptr<const char[]> buf; | |
1094 | auto filter = builder->Finish(&buf); | |
1095 | return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys); | |
1096 | } | |
1097 | ||
1098 | static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) { | |
1099 | if (levelish == -1) { | |
1100 | // Flush is treated as level -1 for this option but actually level 0 | |
1101 | ctx->level_at_creation = 0; | |
1102 | ctx->reason = TableFileCreationReason::kFlush; | |
1103 | } else { | |
1104 | ctx->level_at_creation = levelish; | |
1105 | ctx->reason = TableFileCreationReason::kCompaction; | |
1106 | } | |
1107 | } | |
1108 | ||
1109 | TEST(RibbonTest, RibbonTestLevelThreshold) { | |
1110 | BlockBasedTableOptions opts; | |
1111 | FilterBuildingContext ctx(opts); | |
1112 | // A few settings | |
1113 | for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal, | |
1114 | kCompactionStyleFIFO, kCompactionStyleNone}) { | |
1115 | ctx.compaction_style = cs; | |
1116 | for (int bloom_before_level : {-1, 0, 1, 10}) { | |
1117 | std::vector<std::unique_ptr<const FilterPolicy> > policies; | |
1118 | policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level)); | |
1119 | ||
1120 | if (bloom_before_level == 0) { | |
1121 | // Also test new API default | |
1122 | policies.emplace_back(NewRibbonFilterPolicy(10)); | |
1123 | } | |
1124 | ||
1125 | for (std::unique_ptr<const FilterPolicy>& policy : policies) { | |
1126 | // Claim to be generating filter for this level | |
1127 | SetTestingLevel(bloom_before_level, &ctx); | |
1128 | ||
1129 | std::unique_ptr<FilterBitsBuilder> builder{ | |
1130 | policy->GetBuilderWithContext(ctx)}; | |
1131 | ||
1132 | // Must be Ribbon (more space efficient than 10 bits per key) | |
1133 | ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); | |
1134 | ||
1135 | if (bloom_before_level >= 0) { | |
1136 | // Claim to be generating filter for previous level | |
1137 | SetTestingLevel(bloom_before_level - 1, &ctx); | |
1138 | ||
1139 | builder.reset(policy->GetBuilderWithContext(ctx)); | |
1140 | ||
1141 | if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) { | |
1142 | // Level is considered. | |
1143 | // Must be Bloom (~ 10 bits per key) | |
1144 | ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9); | |
1145 | } else { | |
1146 | // Level is ignored under non-traditional compaction styles. | |
1147 | // Must be Ribbon (more space efficient than 10 bits per key) | |
1148 | ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); | |
1149 | } | |
1150 | } | |
1151 | ||
1152 | // Like SST file writer | |
1153 | ctx.level_at_creation = -1; | |
1154 | ctx.reason = TableFileCreationReason::kMisc; | |
1155 | ||
1156 | builder.reset(policy->GetBuilderWithContext(ctx)); | |
1157 | ||
1158 | // Must be Ribbon (more space efficient than 10 bits per key) | |
1159 | ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); | |
1160 | } | |
1161 | } | |
1162 | } | |
1163 | } | |
1164 | ||
1165 | } // namespace ROCKSDB_NAMESPACE | |
1166 | ||
1167 | int main(int argc, char** argv) { | |
1168 | ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); | |
1169 | ::testing::InitGoogleTest(&argc, argv); | |
1170 | ParseCommandLineFlags(&argc, &argv, true); | |
1171 | ||
1172 | return RUN_ALL_TESTS(); | |
1173 | } | |
1174 | ||
1175 | #endif // GFLAGS |