1 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "test_util/testharness.h"
9 #include "util/bloom_impl.h"
10 #include "util/coding.h"
11 #include "util/hash.h"
12 #include "util/ribbon_impl.h"
13 #include "util/stop_watch.h"
16 uint32_t FLAGS_thoroughness
= 5;
17 bool FLAGS_find_occ
= false;
18 double FLAGS_find_next_factor
= 1.414;
19 double FLAGS_find_success
= 0.95;
20 double FLAGS_find_delta_start
= 0.01;
21 double FLAGS_find_delta_end
= 0.0001;
22 double FLAGS_find_delta_shrink
= 0.99;
23 uint32_t FLAGS_find_min_slots
= 128;
24 uint32_t FLAGS_find_max_slots
= 12800000;
26 #include "util/gflags_compat.h"
27 using GFLAGS_NAMESPACE::ParseCommandLineFlags
;
28 // Using 500 is a good test when you have time to be thorough.
29 // Default is for general RocksDB regression test runs.
30 DEFINE_uint32(thoroughness
, 5, "iterations per configuration");
32 // Options for FindOccupancyForSuccessRate, which is more of a tool
34 DEFINE_bool(find_occ
, false,
35 "whether to run the FindOccupancyForSuccessRate tool");
36 DEFINE_double(find_next_factor
, 1.414,
37 "target success rate for FindOccupancyForSuccessRate");
38 DEFINE_double(find_success
, 0.95,
39 "target success rate for FindOccupancyForSuccessRate");
40 DEFINE_double(find_delta_start
, 0.01, " for FindOccupancyForSuccessRate");
41 DEFINE_double(find_delta_end
, 0.0001, " for FindOccupancyForSuccessRate");
42 DEFINE_double(find_delta_shrink
, 0.99, " for FindOccupancyForSuccessRate");
43 DEFINE_uint32(find_min_slots
, 128,
44 "number of slots for FindOccupancyForSuccessRate");
45 DEFINE_uint32(find_max_slots
, 12800000,
46 "number of slots for FindOccupancyForSuccessRate");
49 template <typename TypesAndSettings
>
50 class RibbonTypeParamTest
: public ::testing::Test
{};
52 class RibbonTest
: public ::testing::Test
{};
56 // Different ways of generating keys for testing
58 // Generate semi-sequential keys
59 struct StandardKeyGen
{
60 StandardKeyGen(const std::string
& prefix
, uint64_t id
)
61 : id_(id
), str_(prefix
) {
62 ROCKSDB_NAMESPACE::PutFixed64(&str_
, /*placeholder*/ 0);
65 // Prefix (only one required)
66 StandardKeyGen
& operator++() {
71 StandardKeyGen
& operator+=(uint64_t i
) {
76 const std::string
& operator*() {
77 // Use multiplication to mix things up a little in the key
78 ROCKSDB_NAMESPACE::EncodeFixed64(&str_
[str_
.size() - 8],
79 id_
* uint64_t{0x1500000001});
83 bool operator==(const StandardKeyGen
& other
) {
84 // Same prefix is assumed
85 return id_
== other
.id_
;
87 bool operator!=(const StandardKeyGen
& other
) {
88 // Same prefix is assumed
89 return id_
!= other
.id_
;
96 // Generate small sequential keys, that can misbehave with sequential seeds
97 // as in https://github.com/Cyan4973/xxHash/issues/469.
98 // These keys are only heuristically unique, but that's OK with 64 bits,
99 // for testing purposes.
101 SmallKeyGen(const std::string
& prefix
, uint64_t id
) : id_(id
) {
102 // Hash the prefix for a heuristically unique offset
103 id_
+= ROCKSDB_NAMESPACE::GetSliceHash64(prefix
);
104 ROCKSDB_NAMESPACE::PutFixed64(&str_
, id_
);
107 // Prefix (only one required)
108 SmallKeyGen
& operator++() {
113 SmallKeyGen
& operator+=(uint64_t i
) {
118 const std::string
& operator*() {
119 ROCKSDB_NAMESPACE::EncodeFixed64(&str_
[str_
.size() - 8], id_
);
123 bool operator==(const SmallKeyGen
& other
) { return id_
== other
.id_
; }
124 bool operator!=(const SmallKeyGen
& other
) { return id_
!= other
.id_
; }
130 template <typename KeyGen
>
131 struct Hash32KeyGenWrapper
: public KeyGen
{
132 Hash32KeyGenWrapper(const std::string
& prefix
, uint64_t id
)
133 : KeyGen(prefix
, id
) {}
134 uint32_t operator*() {
135 auto& key
= *static_cast<KeyGen
&>(*this);
137 return ROCKSDB_NAMESPACE::GetSliceHash(key
);
141 template <typename KeyGen
>
142 struct Hash64KeyGenWrapper
: public KeyGen
{
143 Hash64KeyGenWrapper(const std::string
& prefix
, uint64_t id
)
144 : KeyGen(prefix
, id
) {}
145 uint64_t operator*() {
146 auto& key
= *static_cast<KeyGen
&>(*this);
148 return ROCKSDB_NAMESPACE::GetSliceHash64(key
);
154 using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate
;
155 using ROCKSDB_NAMESPACE::ribbon::StandardHasher
;
156 using ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter
;
158 struct DefaultTypesAndSettings
{
159 using CoeffRow
= ROCKSDB_NAMESPACE::Unsigned128
;
160 using ResultRow
= uint8_t;
161 using Index
= uint32_t;
162 using Hash
= uint64_t;
163 using Seed
= uint32_t;
164 using Key
= ROCKSDB_NAMESPACE::Slice
;
165 static constexpr bool kIsFilter
= true;
166 static constexpr bool kFirstCoeffAlwaysOne
= true;
167 static constexpr bool kUseSmash
= false;
168 static constexpr bool kAllowZeroStarts
= false;
169 static Hash
HashFn(const Key
& key
, uint64_t raw_seed
) {
170 // This version 0.7.2 preview of XXH3 (a.k.a. XXH3p) function does
171 // not pass SmallKeyGen tests below without some seed premixing from
172 // StandardHasher. See https://github.com/Cyan4973/xxHash/issues/469
173 return ROCKSDB_NAMESPACE::Hash64(key
.data(), key
.size(), raw_seed
);
176 using KeyGen
= StandardKeyGen
;
179 using TypesAndSettings_Coeff128
= DefaultTypesAndSettings
;
180 struct TypesAndSettings_Coeff128Smash
: public DefaultTypesAndSettings
{
181 static constexpr bool kUseSmash
= true;
183 struct TypesAndSettings_Coeff64
: public DefaultTypesAndSettings
{
184 using CoeffRow
= uint64_t;
186 struct TypesAndSettings_Coeff64Smash1
: public DefaultTypesAndSettings
{
187 using CoeffRow
= uint64_t;
188 static constexpr bool kUseSmash
= true;
190 struct TypesAndSettings_Coeff64Smash0
: public TypesAndSettings_Coeff64Smash1
{
191 static constexpr bool kFirstCoeffAlwaysOne
= false;
193 struct TypesAndSettings_Result16
: public DefaultTypesAndSettings
{
194 using ResultRow
= uint16_t;
196 struct TypesAndSettings_Result32
: public DefaultTypesAndSettings
{
197 using ResultRow
= uint32_t;
199 struct TypesAndSettings_IndexSizeT
: public DefaultTypesAndSettings
{
200 using Index
= size_t;
202 struct TypesAndSettings_Hash32
: public DefaultTypesAndSettings
{
203 using Hash
= uint32_t;
204 static Hash
HashFn(const Key
& key
, Hash raw_seed
) {
205 // This MurmurHash1 function does not pass tests below without the
206 // seed premixing from StandardHasher. In fact, it needs more than
207 // just a multiplication mixer on the ordinal seed.
208 return ROCKSDB_NAMESPACE::Hash(key
.data(), key
.size(), raw_seed
);
211 struct TypesAndSettings_Hash32_Result16
: public TypesAndSettings_Hash32
{
212 using ResultRow
= uint16_t;
214 struct TypesAndSettings_KeyString
: public DefaultTypesAndSettings
{
215 using Key
= std::string
;
217 struct TypesAndSettings_Seed8
: public DefaultTypesAndSettings
{
218 // This is not a generally recommended configuration. With the configured
219 // hash function, it would fail with SmallKeyGen due to insufficient
220 // independence among the seeds.
221 using Seed
= uint8_t;
223 struct TypesAndSettings_NoAlwaysOne
: public DefaultTypesAndSettings
{
224 static constexpr bool kFirstCoeffAlwaysOne
= false;
226 struct TypesAndSettings_AllowZeroStarts
: public DefaultTypesAndSettings
{
227 static constexpr bool kAllowZeroStarts
= true;
229 struct TypesAndSettings_Seed64
: public DefaultTypesAndSettings
{
230 using Seed
= uint64_t;
232 struct TypesAndSettings_Rehasher
233 : public StandardRehasherAdapter
<DefaultTypesAndSettings
> {
234 using KeyGen
= Hash64KeyGenWrapper
<StandardKeyGen
>;
236 struct TypesAndSettings_Rehasher_Result16
: public TypesAndSettings_Rehasher
{
237 using ResultRow
= uint16_t;
239 struct TypesAndSettings_Rehasher_Result32
: public TypesAndSettings_Rehasher
{
240 using ResultRow
= uint32_t;
242 struct TypesAndSettings_Rehasher_Seed64
243 : public StandardRehasherAdapter
<TypesAndSettings_Seed64
> {
244 using KeyGen
= Hash64KeyGenWrapper
<StandardKeyGen
>;
245 // Note: 64-bit seed with Rehasher gives slightly better average reseeds
247 struct TypesAndSettings_Rehasher32
248 : public StandardRehasherAdapter
<TypesAndSettings_Hash32
> {
249 using KeyGen
= Hash32KeyGenWrapper
<StandardKeyGen
>;
251 struct TypesAndSettings_Rehasher32_Coeff64
252 : public TypesAndSettings_Rehasher32
{
253 using CoeffRow
= uint64_t;
255 struct TypesAndSettings_SmallKeyGen
: public DefaultTypesAndSettings
{
256 // SmallKeyGen stresses the independence of different hash seeds
257 using KeyGen
= SmallKeyGen
;
259 struct TypesAndSettings_Hash32_SmallKeyGen
: public TypesAndSettings_Hash32
{
260 // SmallKeyGen stresses the independence of different hash seeds
261 using KeyGen
= SmallKeyGen
;
264 using TestTypesAndSettings
= ::testing::Types
<
265 TypesAndSettings_Coeff128
, TypesAndSettings_Coeff128Smash
,
266 TypesAndSettings_Coeff64
, TypesAndSettings_Coeff64Smash0
,
267 TypesAndSettings_Coeff64Smash1
, TypesAndSettings_Result16
,
268 TypesAndSettings_Result32
, TypesAndSettings_IndexSizeT
,
269 TypesAndSettings_Hash32
, TypesAndSettings_Hash32_Result16
,
270 TypesAndSettings_KeyString
, TypesAndSettings_Seed8
,
271 TypesAndSettings_NoAlwaysOne
, TypesAndSettings_AllowZeroStarts
,
272 TypesAndSettings_Seed64
, TypesAndSettings_Rehasher
,
273 TypesAndSettings_Rehasher_Result16
, TypesAndSettings_Rehasher_Result32
,
274 TypesAndSettings_Rehasher_Seed64
, TypesAndSettings_Rehasher32
,
275 TypesAndSettings_Rehasher32_Coeff64
, TypesAndSettings_SmallKeyGen
,
276 TypesAndSettings_Hash32_SmallKeyGen
>;
277 TYPED_TEST_CASE(RibbonTypeParamTest
, TestTypesAndSettings
);
281 // For testing Poisson-distributed (or similar) statistics, get value for
282 // `stddevs_allowed` standard deviations above expected mean
284 // (Poisson approximates Binomial only if probability of a trial being
285 // in the count is low.)
286 uint64_t PoissonUpperBound(double expected_count
, double stddevs_allowed
) {
287 return static_cast<uint64_t>(
288 expected_count
+ stddevs_allowed
* std::sqrt(expected_count
) + 1.0);
291 uint64_t PoissonLowerBound(double expected_count
, double stddevs_allowed
) {
292 return static_cast<uint64_t>(std::max(
293 0.0, expected_count
- stddevs_allowed
* std::sqrt(expected_count
)));
296 uint64_t FrequentPoissonUpperBound(double expected_count
) {
297 // Allow up to 5.0 standard deviations for frequently checked statistics
298 return PoissonUpperBound(expected_count
, 5.0);
301 uint64_t FrequentPoissonLowerBound(double expected_count
) {
302 return PoissonLowerBound(expected_count
, 5.0);
305 uint64_t InfrequentPoissonUpperBound(double expected_count
) {
306 // Allow up to 3 standard deviations for infrequently checked statistics
307 return PoissonUpperBound(expected_count
, 3.0);
310 uint64_t InfrequentPoissonLowerBound(double expected_count
) {
311 return PoissonLowerBound(expected_count
, 3.0);
316 TYPED_TEST(RibbonTypeParamTest
, CompactnessAndBacktrackAndFpRate
) {
317 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam
);
318 IMPORT_RIBBON_IMPL_TYPES(TypeParam
);
319 using KeyGen
= typename
TypeParam::KeyGen
;
321 // For testing FP rate etc.
322 constexpr Index kNumToCheck
= 100000;
324 const auto log2_thoroughness
=
325 static_cast<Hash
>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness
));
327 // With overhead of just 2%, expect ~50% encoding success per
328 // seed with ~5k keys on 64-bit ribbon, or ~150k keys on 128-bit ribbon.
329 const double kFactor
= 1.02;
331 uint64_t total_reseeds
= 0;
332 uint64_t total_single_failures
= 0;
333 uint64_t total_batch_successes
= 0;
334 uint64_t total_fp_count
= 0;
335 uint64_t total_added
= 0;
337 uint64_t soln_query_nanos
= 0;
338 uint64_t soln_query_count
= 0;
339 uint64_t bloom_query_nanos
= 0;
340 uint64_t isoln_query_nanos
= 0;
341 uint64_t isoln_query_count
= 0;
343 // Take different samples if you change thoroughness
344 ROCKSDB_NAMESPACE::Random32
rnd(FLAGS_thoroughness
);
346 for (uint32_t i
= 0; i
< FLAGS_thoroughness
; ++i
) {
347 uint32_t num_to_add
=
348 sizeof(CoeffRow
) == 16 ? 130000 : TypeParam::kUseSmash
? 5500 : 2500;
350 // Use different values between that number and 50% of that number
351 num_to_add
-= rnd
.Uniformish(num_to_add
/ 2);
353 total_added
+= num_to_add
;
355 // Most of the time, test the Interleaved solution storage, but when
356 // we do we have to make num_slots a multiple of kCoeffBits. So
357 // sometimes we want to test without that limitation.
358 bool test_interleaved
= (i
% 7) != 6;
360 Index num_slots
= static_cast<Index
>(num_to_add
* kFactor
);
361 if (test_interleaved
) {
362 // Round to supported number of slots
363 num_slots
= InterleavedSoln::RoundUpNumSlots(num_slots
);
364 // Re-adjust num_to_add to get as close as possible to kFactor
365 num_to_add
= static_cast<uint32_t>(num_slots
/ kFactor
);
369 ROCKSDB_NAMESPACE::PutFixed32(&prefix
, rnd
.Next());
371 // Batch that must be added
372 std::string added_str
= prefix
+ "added";
373 KeyGen
keys_begin(added_str
, 0);
374 KeyGen
keys_end(added_str
, num_to_add
);
376 // A couple more that will probably be added
377 KeyGen
one_more(prefix
+ "more", 1);
378 KeyGen
two_more(prefix
+ "more", 2);
380 // Batch that may or may not be added
381 const Index kBatchSize
=
382 sizeof(CoeffRow
) == 16 ? 300 : TypeParam::kUseSmash
? 20 : 10;
383 std::string batch_str
= prefix
+ "batch";
384 KeyGen
batch_begin(batch_str
, 0);
385 KeyGen
batch_end(batch_str
, kBatchSize
);
387 // Batch never (successfully) added, but used for querying FP rate
388 std::string not_str
= prefix
+ "not";
389 KeyGen
other_keys_begin(not_str
, 0);
390 KeyGen
other_keys_end(not_str
, kNumToCheck
);
392 // Vary bytes for InterleavedSoln to use number of solution columns
393 // from 0 to max allowed by ResultRow type (and used by SimpleSoln).
394 // Specifically include 0 and max, and otherwise skew toward max.
395 uint32_t max_ibytes
= static_cast<uint32_t>(sizeof(ResultRow
) * num_slots
);
403 ibytes
= std::max(rnd
.Uniformish(max_ibytes
), rnd
.Uniformish(max_ibytes
));
405 std::unique_ptr
<char[]> idata(new char[ibytes
]);
406 InterleavedSoln
isoln(idata
.get(), ibytes
);
415 // Traditional solve for a fixed set.
417 banding
.ResetAndFindSeedToSolve(num_slots
, keys_begin
, keys_end
));
419 // Now to test backtracking, starting with guaranteed fail. By using
420 // the keys that will be used to test FP rate, we are then doing an
421 // extra check that after backtracking there are no remnants (e.g. in
422 // result side of banding) of these entries.
423 Index occupied_count
= banding
.GetOccupiedCount();
424 banding
.EnsureBacktrackSize(kNumToCheck
);
426 banding
.AddRangeOrRollBack(other_keys_begin
, other_keys_end
));
427 EXPECT_EQ(occupied_count
, banding
.GetOccupiedCount());
429 // Check that we still have a good chance of adding a couple more
431 first_single
= banding
.Add(*one_more
);
432 second_single
= banding
.Add(*two_more
);
433 Index more_added
= (first_single
? 1 : 0) + (second_single
? 1 : 0);
434 total_single_failures
+= 2U - more_added
;
437 batch_success
= banding
.AddRangeOrRollBack(batch_begin
, batch_end
);
439 more_added
+= kBatchSize
;
440 ++total_batch_successes
;
442 EXPECT_LE(banding
.GetOccupiedCount(), occupied_count
+ more_added
);
444 // Also verify that redundant adds are OK (no effect)
446 banding
.AddRange(keys_begin
, KeyGen(added_str
, num_to_add
/ 8)));
447 EXPECT_LE(banding
.GetOccupiedCount(), occupied_count
+ more_added
);
449 // Now back-substitution
450 soln
.BackSubstFrom(banding
);
451 if (test_interleaved
) {
452 isoln
.BackSubstFrom(banding
);
455 Seed reseeds
= banding
.GetOrdinalSeed();
456 total_reseeds
+= reseeds
;
458 EXPECT_LE(reseeds
, 8 + log2_thoroughness
);
459 if (reseeds
> log2_thoroughness
+ 1) {
461 stderr
, "%s high reseeds at %u, %u/%u: %u\n",
462 reseeds
> log2_thoroughness
+ 8 ? "ERROR Extremely" : "Somewhat",
463 static_cast<unsigned>(i
), static_cast<unsigned>(num_to_add
),
464 static_cast<unsigned>(num_slots
), static_cast<unsigned>(reseeds
));
466 hasher
.SetOrdinalSeed(reseeds
);
468 // soln and hasher now independent of Banding object
471 KeyGen cur
= keys_begin
;
472 while (cur
!= keys_end
) {
473 ASSERT_TRUE(soln
.FilterQuery(*cur
, hasher
));
474 ASSERT_TRUE(!test_interleaved
|| isoln
.FilterQuery(*cur
, hasher
));
477 // We (maybe) snuck these in!
479 ASSERT_TRUE(soln
.FilterQuery(*one_more
, hasher
));
480 ASSERT_TRUE(!test_interleaved
|| isoln
.FilterQuery(*one_more
, hasher
));
483 ASSERT_TRUE(soln
.FilterQuery(*two_more
, hasher
));
484 ASSERT_TRUE(!test_interleaved
|| isoln
.FilterQuery(*two_more
, hasher
));
488 while (cur
!= batch_end
) {
489 ASSERT_TRUE(soln
.FilterQuery(*cur
, hasher
));
490 ASSERT_TRUE(!test_interleaved
|| isoln
.FilterQuery(*cur
, hasher
));
495 // Check FP rate (depends only on number of result bits == solution columns)
497 cur
= other_keys_begin
;
499 ROCKSDB_NAMESPACE::StopWatchNano
timer(ROCKSDB_NAMESPACE::Env::Default(),
501 while (cur
!= other_keys_end
) {
502 bool fp
= soln
.FilterQuery(*cur
, hasher
);
503 fp_count
+= fp
? 1 : 0;
506 soln_query_nanos
+= timer
.ElapsedNanos();
507 soln_query_count
+= kNumToCheck
;
510 double expected_fp_count
= soln
.ExpectedFpRate() * kNumToCheck
;
511 // For expected FP rate, also include false positives due to collisions
512 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
514 kNumToCheck
* ExpectedCollisionFpRate(hasher
, num_to_add
);
516 FrequentPoissonUpperBound(expected_fp_count
+ correction
));
518 FrequentPoissonLowerBound(expected_fp_count
+ correction
));
520 total_fp_count
+= fp_count
;
522 // And also check FP rate for isoln
523 if (test_interleaved
) {
525 cur
= other_keys_begin
;
526 ROCKSDB_NAMESPACE::StopWatchNano
timer(ROCKSDB_NAMESPACE::Env::Default(),
528 while (cur
!= other_keys_end
) {
529 ifp_count
+= isoln
.FilterQuery(*cur
, hasher
) ? 1 : 0;
532 isoln_query_nanos
+= timer
.ElapsedNanos();
533 isoln_query_count
+= kNumToCheck
;
535 double expected_fp_count
= isoln
.ExpectedFpRate() * kNumToCheck
;
536 // For expected FP rate, also include false positives due to collisions
537 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
539 kNumToCheck
* ExpectedCollisionFpRate(hasher
, num_to_add
);
541 FrequentPoissonUpperBound(expected_fp_count
+ correction
));
543 FrequentPoissonLowerBound(expected_fp_count
+ correction
));
545 // Since the bits used in isoln are a subset of the bits used in soln,
546 // it cannot have fewer FPs
547 EXPECT_GE(ifp_count
, fp_count
);
550 // And compare to Bloom time, for fun
551 if (ibytes
>= /* minimum Bloom impl bytes*/ 64) {
553 cur
= other_keys_begin
;
554 ROCKSDB_NAMESPACE::StopWatchNano
timer(ROCKSDB_NAMESPACE::Env::Default(),
556 while (cur
!= other_keys_end
) {
557 uint64_t h
= hasher
.GetHash(*cur
);
558 uint32_t h1
= ROCKSDB_NAMESPACE::Lower32of64(h
);
559 uint32_t h2
= sizeof(Hash
) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h
)
561 bfp_count
+= ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch(
562 h1
, h2
, static_cast<uint32_t>(ibytes
), 6, idata
.get())
567 bloom_query_nanos
+= timer
.ElapsedNanos();
568 // ensure bfp_count is used
569 ASSERT_LT(bfp_count
, kNumToCheck
);
573 // "outside" == key not in original set so either negative or false positive
574 fprintf(stderr
, "Simple outside query, hot, incl hashing, ns/key: %g\n",
575 1.0 * soln_query_nanos
/ soln_query_count
);
576 fprintf(stderr
, "Interleaved outside query, hot, incl hashing, ns/key: %g\n",
577 1.0 * isoln_query_nanos
/ isoln_query_count
);
578 fprintf(stderr
, "Bloom outside query, hot, incl hashing, ns/key: %g\n",
579 1.0 * bloom_query_nanos
/ soln_query_count
);
582 double average_reseeds
= 1.0 * total_reseeds
/ FLAGS_thoroughness
;
583 fprintf(stderr
, "Average re-seeds: %g\n", average_reseeds
);
584 // Values above were chosen to target around 50% chance of encoding success
585 // rate (average of 1.0 re-seeds) or slightly better. But 1.15 is also close
587 EXPECT_LE(total_reseeds
,
588 InfrequentPoissonUpperBound(1.15 * FLAGS_thoroughness
));
589 // Would use 0.85 here instead of 0.75, but
590 // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of
591 // sequential keys with a small, cheap hash function. We accept that
592 // there are surely inputs that are somewhat bad for this setup, but
593 // these somewhat good inputs are probably more likely.
594 EXPECT_GE(total_reseeds
,
595 InfrequentPoissonLowerBound(0.75 * FLAGS_thoroughness
));
599 uint64_t total_singles
= 2 * FLAGS_thoroughness
;
600 double single_failure_rate
= 1.0 * total_single_failures
/ total_singles
;
601 fprintf(stderr
, "Add'l single, failure rate: %g\n", single_failure_rate
);
602 // A rough bound (one sided) based on nothing in particular
603 double expected_single_failures
=
604 1.0 * total_singles
/
605 (sizeof(CoeffRow
) == 16 ? 128 : TypeParam::kUseSmash
? 64 : 32);
606 EXPECT_LE(total_single_failures
,
607 InfrequentPoissonUpperBound(expected_single_failures
));
611 // Counting successes here for Poisson to approximate the Binomial
613 // A rough bound (one sided) based on nothing in particular.
614 double expected_batch_successes
= 1.0 * FLAGS_thoroughness
/ 2;
615 uint64_t lower_bound
=
616 InfrequentPoissonLowerBound(expected_batch_successes
);
617 fprintf(stderr
, "Add'l batch, success rate: %g (>= %g)\n",
618 1.0 * total_batch_successes
/ FLAGS_thoroughness
,
619 1.0 * lower_bound
/ FLAGS_thoroughness
);
620 EXPECT_GE(total_batch_successes
, lower_bound
);
624 uint64_t total_checked
= uint64_t{kNumToCheck
} * FLAGS_thoroughness
;
625 double expected_total_fp_count
=
626 total_checked
* std::pow(0.5, 8U * sizeof(ResultRow
));
627 // For expected FP rate, also include false positives due to collisions
628 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
629 double average_added
= 1.0 * total_added
/ FLAGS_thoroughness
;
630 expected_total_fp_count
+=
631 total_checked
* ExpectedCollisionFpRate(Hasher(), average_added
);
633 uint64_t upper_bound
= InfrequentPoissonUpperBound(expected_total_fp_count
);
634 uint64_t lower_bound
= InfrequentPoissonLowerBound(expected_total_fp_count
);
635 fprintf(stderr
, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
636 1.0 * total_fp_count
/ total_checked
,
637 expected_total_fp_count
/ total_checked
,
638 1.0 * upper_bound
/ total_checked
,
639 1.0 * lower_bound
/ total_checked
);
640 EXPECT_LE(total_fp_count
, upper_bound
);
641 EXPECT_GE(total_fp_count
, lower_bound
);
645 TYPED_TEST(RibbonTypeParamTest
, Extremes
) {
646 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam
);
647 IMPORT_RIBBON_IMPL_TYPES(TypeParam
);
648 using KeyGen
= typename
TypeParam::KeyGen
;
650 size_t bytes
= 128 * 1024;
651 std::unique_ptr
<char[]> buf(new char[bytes
]);
652 InterleavedSoln
isoln(buf
.get(), bytes
);
657 // ########################################
658 // Add zero keys to minimal number of slots
659 KeyGen
begin_and_end("foo", 123);
660 ASSERT_TRUE(banding
.ResetAndFindSeedToSolve(
661 /*slots*/ kCoeffBits
, begin_and_end
, begin_and_end
, /*first seed*/ 0,
664 soln
.BackSubstFrom(banding
);
665 isoln
.BackSubstFrom(banding
);
667 // Because there's plenty of memory, we expect the interleaved solution to
668 // use maximum supported columns (same as simple solution)
669 ASSERT_EQ(isoln
.GetUpperNumColumns(), 8U * sizeof(ResultRow
));
670 ASSERT_EQ(isoln
.GetUpperStartBlock(), 0U);
672 // Somewhat oddly, we expect same FP rate as if we had essentially filled
674 constexpr Index kNumToCheck
= 100000;
675 KeyGen
other_keys_begin("not", 0);
676 KeyGen
other_keys_end("not", kNumToCheck
);
679 KeyGen cur
= other_keys_begin
;
680 while (cur
!= other_keys_end
) {
681 bool isoln_query_result
= isoln
.FilterQuery(*cur
, hasher
);
682 bool soln_query_result
= soln
.FilterQuery(*cur
, hasher
);
683 // Solutions are equivalent
684 ASSERT_EQ(isoln_query_result
, soln_query_result
);
685 // And in fact we only expect an FP when ResultRow is 0
686 // CHANGE: no longer true because of filling some unused slots
687 // with pseudorandom values.
688 // ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash(
689 // hasher.GetHash(*cur)) == ResultRow{0});
690 fp_count
+= soln_query_result
? 1 : 0;
694 ASSERT_EQ(isoln
.ExpectedFpRate(), soln
.ExpectedFpRate());
695 double expected_fp_count
= isoln
.ExpectedFpRate() * kNumToCheck
;
696 EXPECT_LE(fp_count
, InfrequentPoissonUpperBound(expected_fp_count
));
697 EXPECT_GE(fp_count
, InfrequentPoissonLowerBound(expected_fp_count
));
700 // ######################################################
701 // Use zero bytes for interleaved solution (key(s) added)
704 KeyGen
key_begin("added", 0);
705 KeyGen
key_end("added", 1);
706 ASSERT_TRUE(banding
.ResetAndFindSeedToSolve(
707 /*slots*/ kCoeffBits
, key_begin
, key_end
, /*first seed*/ 0,
710 InterleavedSoln
isoln2(nullptr, /*bytes*/ 0);
712 isoln2
.BackSubstFrom(banding
);
714 ASSERT_EQ(isoln2
.GetUpperNumColumns(), 0U);
715 ASSERT_EQ(isoln2
.GetUpperStartBlock(), 0U);
717 // All queries return true
718 ASSERT_TRUE(isoln2
.FilterQuery(*other_keys_begin
, hasher
));
719 ASSERT_EQ(isoln2
.ExpectedFpRate(), 1.0);
722 TEST(RibbonTest
, AllowZeroStarts
) {
723 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts
);
724 IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts
);
725 using KeyGen
= StandardKeyGen
;
727 InterleavedSoln
isoln(nullptr, /*bytes*/ 0);
732 KeyGen
begin("foo", 0);
733 KeyGen
end("foo", 1);
735 ASSERT_FALSE(banding
.ResetAndFindSeedToSolve(/*slots*/ 0, begin
, end
));
737 KeyGen
begin_and_end("foo", 123);
739 ASSERT_TRUE(banding
.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end
,
742 Seed reseeds
= banding
.GetOrdinalSeed();
743 ASSERT_EQ(reseeds
, 0U);
744 hasher
.SetOrdinalSeed(reseeds
);
746 // Can construct 0-slot solutions
747 isoln
.BackSubstFrom(banding
);
748 soln
.BackSubstFrom(banding
);
750 // Should always return false
751 ASSERT_FALSE(isoln
.FilterQuery(*begin
, hasher
));
752 ASSERT_FALSE(soln
.FilterQuery(*begin
, hasher
));
754 // And report that in FP rate
755 ASSERT_EQ(isoln
.ExpectedFpRate(), 0.0);
756 ASSERT_EQ(soln
.ExpectedFpRate(), 0.0);
759 TEST(RibbonTest
, RawAndOrdinalSeeds
) {
760 StandardHasher
<TypesAndSettings_Seed64
> hasher64
;
761 StandardHasher
<DefaultTypesAndSettings
> hasher64_32
;
762 StandardHasher
<TypesAndSettings_Hash32
> hasher32
;
763 StandardHasher
<TypesAndSettings_Seed8
> hasher8
;
765 for (uint32_t limit
: {0xffU
, 0xffffU
}) {
766 std::vector
<bool> seen(limit
+ 1);
767 for (uint32_t i
= 0; i
< limit
; ++i
) {
768 hasher64
.SetOrdinalSeed(i
);
769 auto raw64
= hasher64
.GetRawSeed();
770 hasher32
.SetOrdinalSeed(i
);
771 auto raw32
= hasher32
.GetRawSeed();
772 hasher8
.SetOrdinalSeed(static_cast<uint8_t>(i
));
773 auto raw8
= hasher8
.GetRawSeed();
775 hasher64_32
.SetOrdinalSeed(i
);
776 auto raw64_32
= hasher64_32
.GetRawSeed();
777 ASSERT_EQ(raw64_32
, raw32
); // Same size seed
780 // Documented that ordinal seed 0 == raw seed 0
781 ASSERT_EQ(raw64
, 0U);
782 ASSERT_EQ(raw32
, 0U);
785 // Extremely likely that upper bits are set
786 ASSERT_GT(raw64
, raw32
);
787 ASSERT_GT(raw32
, raw8
);
789 // Hashers agree on lower bits
790 ASSERT_EQ(static_cast<uint32_t>(raw64
), raw32
);
791 ASSERT_EQ(static_cast<uint8_t>(raw32
), raw8
);
793 // The translation is one-to-one for this size prefix
794 uint32_t v
= static_cast<uint32_t>(raw32
& limit
);
795 ASSERT_EQ(raw64
& limit
, v
);
796 ASSERT_FALSE(seen
[v
]);
804 struct PhsfInputGen
{
805 PhsfInputGen(const std::string
& prefix
, uint64_t id
) : id_(id
) {
807 ROCKSDB_NAMESPACE::PutFixed64(&val_
.first
, /*placeholder*/ 0);
810 // Prefix (only one required)
811 PhsfInputGen
& operator++() {
816 const std::pair
<std::string
, uint8_t>& operator*() {
817 // Use multiplication to mix things up a little in the key
818 ROCKSDB_NAMESPACE::EncodeFixed64(&val_
.first
[val_
.first
.size() - 8],
819 id_
* uint64_t{0x1500000001});
820 // Occasionally repeat values etc.
821 val_
.second
= static_cast<uint8_t>(id_
* 7 / 8);
825 const std::pair
<std::string
, uint8_t>* operator->() { return &**this; }
827 bool operator==(const PhsfInputGen
& other
) {
828 // Same prefix is assumed
829 return id_
== other
.id_
;
831 bool operator!=(const PhsfInputGen
& other
) {
832 // Same prefix is assumed
833 return id_
!= other
.id_
;
837 std::pair
<std::string
, uint8_t> val_
;
840 struct PhsfTypesAndSettings
: public DefaultTypesAndSettings
{
841 static constexpr bool kIsFilter
= false;
845 TEST(RibbonTest
, PhsfBasic
) {
846 IMPORT_RIBBON_TYPES_AND_SETTINGS(PhsfTypesAndSettings
);
847 IMPORT_RIBBON_IMPL_TYPES(PhsfTypesAndSettings
);
849 Index num_slots
= 12800;
850 Index num_to_add
= static_cast<Index
>(num_slots
/ 1.02);
852 PhsfInputGen
begin("in", 0);
853 PhsfInputGen
end("in", num_to_add
);
855 std::unique_ptr
<char[]> idata(new char[/*bytes*/ num_slots
]);
856 InterleavedSoln
isoln(idata
.get(), /*bytes*/ num_slots
);
862 ASSERT_TRUE(banding
.ResetAndFindSeedToSolve(num_slots
, begin
, end
));
864 soln
.BackSubstFrom(banding
);
865 isoln
.BackSubstFrom(banding
);
867 hasher
.SetOrdinalSeed(banding
.GetOrdinalSeed());
870 for (PhsfInputGen cur
= begin
; cur
!= end
; ++cur
) {
871 ASSERT_EQ(cur
->second
, soln
.PhsfQuery(cur
->first
, hasher
));
872 ASSERT_EQ(cur
->second
, isoln
.PhsfQuery(cur
->first
, hasher
));
876 // Not a real test, but a tool used to build GetNumSlotsFor95PctSuccess
877 TYPED_TEST(RibbonTypeParamTest
, FindOccupancyForSuccessRate
) {
878 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam
);
879 IMPORT_RIBBON_IMPL_TYPES(TypeParam
);
880 using KeyGen
= typename
TypeParam::KeyGen
;
882 if (!FLAGS_find_occ
) {
883 fprintf(stderr
, "Tool disabled during unit test runs\n");
887 KeyGen
cur("blah", 0);
890 Index num_slots
= InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots
);
891 while (num_slots
< FLAGS_find_max_slots
) {
892 double factor
= 0.95;
893 double delta
= FLAGS_find_delta_start
;
894 while (delta
> FLAGS_find_delta_end
) {
895 Index num_to_add
= static_cast<Index
>(factor
* num_slots
);
898 bool success
= banding
.ResetAndFindSeedToSolve(num_slots
, cur
, end
, 0, 0);
899 cur
= end
; // fresh keys
901 factor
+= delta
* (1.0 - FLAGS_find_success
);
902 factor
= std::min(factor
, 1.0);
904 factor
-= delta
* FLAGS_find_success
;
905 factor
= std::max(factor
, 0.0);
907 delta
*= FLAGS_find_delta_shrink
;
909 "slots: %u log2_slots: %g target_success: %g ->overhead: %g\r",
910 static_cast<unsigned>(num_slots
),
911 std::log(num_slots
* 1.0) / std::log(2.0), FLAGS_find_success
,
914 fprintf(stderr
, "\n");
916 num_slots
= std::max(
917 num_slots
+ 1, static_cast<Index
>(num_slots
* FLAGS_find_next_factor
));
918 num_slots
= InterleavedSoln::RoundUpNumSlots(num_slots
);
922 // TODO: unit tests for configuration APIs
923 // TODO: unit tests for small filter FP rates
925 int main(int argc
, char** argv
) {
926 ::testing::InitGoogleTest(&argc
, argv
);
928 ParseCommandLineFlags(&argc
, &argv
, true);
930 return RUN_ALL_TESTS();