]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/ribbon_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / util / ribbon_test.cc
1 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include <cmath>
7
8 #include "test_util/testharness.h"
9 #include "util/bloom_impl.h"
10 #include "util/coding.h"
11 #include "util/hash.h"
12 #include "util/ribbon_impl.h"
13 #include "util/stop_watch.h"
14
15 #ifndef GFLAGS
16 uint32_t FLAGS_thoroughness = 5;
17 bool FLAGS_find_occ = false;
18 double FLAGS_find_next_factor = 1.414;
19 double FLAGS_find_success = 0.95;
20 double FLAGS_find_delta_start = 0.01;
21 double FLAGS_find_delta_end = 0.0001;
22 double FLAGS_find_delta_shrink = 0.99;
23 uint32_t FLAGS_find_min_slots = 128;
24 uint32_t FLAGS_find_max_slots = 12800000;
25 #else
26 #include "util/gflags_compat.h"
27 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
28 // Using 500 is a good test when you have time to be thorough.
29 // Default is for general RocksDB regression test runs.
30 DEFINE_uint32(thoroughness, 5, "iterations per configuration");
31
32 // Options for FindOccupancyForSuccessRate, which is more of a tool
33 // than a test.
34 DEFINE_bool(find_occ, false,
35 "whether to run the FindOccupancyForSuccessRate tool");
36 DEFINE_double(find_next_factor, 1.414,
37 "target success rate for FindOccupancyForSuccessRate");
38 DEFINE_double(find_success, 0.95,
39 "target success rate for FindOccupancyForSuccessRate");
40 DEFINE_double(find_delta_start, 0.01, " for FindOccupancyForSuccessRate");
41 DEFINE_double(find_delta_end, 0.0001, " for FindOccupancyForSuccessRate");
42 DEFINE_double(find_delta_shrink, 0.99, " for FindOccupancyForSuccessRate");
43 DEFINE_uint32(find_min_slots, 128,
44 "number of slots for FindOccupancyForSuccessRate");
45 DEFINE_uint32(find_max_slots, 12800000,
46 "number of slots for FindOccupancyForSuccessRate");
47 #endif // GFLAGS
48
49 template <typename TypesAndSettings>
50 class RibbonTypeParamTest : public ::testing::Test {};
51
52 class RibbonTest : public ::testing::Test {};
53
54 namespace {
55
56 // Different ways of generating keys for testing
57
58 // Generate semi-sequential keys
59 struct StandardKeyGen {
60 StandardKeyGen(const std::string& prefix, uint64_t id)
61 : id_(id), str_(prefix) {
62 ROCKSDB_NAMESPACE::PutFixed64(&str_, /*placeholder*/ 0);
63 }
64
65 // Prefix (only one required)
66 StandardKeyGen& operator++() {
67 ++id_;
68 return *this;
69 }
70
71 StandardKeyGen& operator+=(uint64_t i) {
72 id_ += i;
73 return *this;
74 }
75
76 const std::string& operator*() {
77 // Use multiplication to mix things up a little in the key
78 ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
79 id_ * uint64_t{0x1500000001});
80 return str_;
81 }
82
83 bool operator==(const StandardKeyGen& other) {
84 // Same prefix is assumed
85 return id_ == other.id_;
86 }
87 bool operator!=(const StandardKeyGen& other) {
88 // Same prefix is assumed
89 return id_ != other.id_;
90 }
91
92 uint64_t id_;
93 std::string str_;
94 };
95
96 // Generate small sequential keys, that can misbehave with sequential seeds
97 // as in https://github.com/Cyan4973/xxHash/issues/469.
98 // These keys are only heuristically unique, but that's OK with 64 bits,
99 // for testing purposes.
100 struct SmallKeyGen {
101 SmallKeyGen(const std::string& prefix, uint64_t id) : id_(id) {
102 // Hash the prefix for a heuristically unique offset
103 id_ += ROCKSDB_NAMESPACE::GetSliceHash64(prefix);
104 ROCKSDB_NAMESPACE::PutFixed64(&str_, id_);
105 }
106
107 // Prefix (only one required)
108 SmallKeyGen& operator++() {
109 ++id_;
110 return *this;
111 }
112
113 SmallKeyGen& operator+=(uint64_t i) {
114 id_ += i;
115 return *this;
116 }
117
118 const std::string& operator*() {
119 ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], id_);
120 return str_;
121 }
122
123 bool operator==(const SmallKeyGen& other) { return id_ == other.id_; }
124 bool operator!=(const SmallKeyGen& other) { return id_ != other.id_; }
125
126 uint64_t id_;
127 std::string str_;
128 };
129
130 template <typename KeyGen>
131 struct Hash32KeyGenWrapper : public KeyGen {
132 Hash32KeyGenWrapper(const std::string& prefix, uint64_t id)
133 : KeyGen(prefix, id) {}
134 uint32_t operator*() {
135 auto& key = *static_cast<KeyGen&>(*this);
136 // unseeded
137 return ROCKSDB_NAMESPACE::GetSliceHash(key);
138 }
139 };
140
141 template <typename KeyGen>
142 struct Hash64KeyGenWrapper : public KeyGen {
143 Hash64KeyGenWrapper(const std::string& prefix, uint64_t id)
144 : KeyGen(prefix, id) {}
145 uint64_t operator*() {
146 auto& key = *static_cast<KeyGen&>(*this);
147 // unseeded
148 return ROCKSDB_NAMESPACE::GetSliceHash64(key);
149 }
150 };
151
152 } // namespace
153
154 using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate;
155 using ROCKSDB_NAMESPACE::ribbon::StandardHasher;
156 using ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter;
157
158 struct DefaultTypesAndSettings {
159 using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
160 using ResultRow = uint8_t;
161 using Index = uint32_t;
162 using Hash = uint64_t;
163 using Seed = uint32_t;
164 using Key = ROCKSDB_NAMESPACE::Slice;
165 static constexpr bool kIsFilter = true;
166 static constexpr bool kFirstCoeffAlwaysOne = true;
167 static constexpr bool kUseSmash = false;
168 static constexpr bool kAllowZeroStarts = false;
169 static Hash HashFn(const Key& key, uint64_t raw_seed) {
170 // This version 0.7.2 preview of XXH3 (a.k.a. XXH3p) function does
171 // not pass SmallKeyGen tests below without some seed premixing from
172 // StandardHasher. See https://github.com/Cyan4973/xxHash/issues/469
173 return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), raw_seed);
174 }
175 // For testing
176 using KeyGen = StandardKeyGen;
177 };
178
179 using TypesAndSettings_Coeff128 = DefaultTypesAndSettings;
180 struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings {
181 static constexpr bool kUseSmash = true;
182 };
183 struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
184 using CoeffRow = uint64_t;
185 };
186 struct TypesAndSettings_Coeff64Smash1 : public DefaultTypesAndSettings {
187 using CoeffRow = uint64_t;
188 static constexpr bool kUseSmash = true;
189 };
190 struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash1 {
191 static constexpr bool kFirstCoeffAlwaysOne = false;
192 };
193 struct TypesAndSettings_Result16 : public DefaultTypesAndSettings {
194 using ResultRow = uint16_t;
195 };
196 struct TypesAndSettings_Result32 : public DefaultTypesAndSettings {
197 using ResultRow = uint32_t;
198 };
199 struct TypesAndSettings_IndexSizeT : public DefaultTypesAndSettings {
200 using Index = size_t;
201 };
202 struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings {
203 using Hash = uint32_t;
204 static Hash HashFn(const Key& key, Hash raw_seed) {
205 // This MurmurHash1 function does not pass tests below without the
206 // seed premixing from StandardHasher. In fact, it needs more than
207 // just a multiplication mixer on the ordinal seed.
208 return ROCKSDB_NAMESPACE::Hash(key.data(), key.size(), raw_seed);
209 }
210 };
211 struct TypesAndSettings_Hash32_Result16 : public TypesAndSettings_Hash32 {
212 using ResultRow = uint16_t;
213 };
214 struct TypesAndSettings_KeyString : public DefaultTypesAndSettings {
215 using Key = std::string;
216 };
217 struct TypesAndSettings_Seed8 : public DefaultTypesAndSettings {
218 // This is not a generally recommended configuration. With the configured
219 // hash function, it would fail with SmallKeyGen due to insufficient
220 // independence among the seeds.
221 using Seed = uint8_t;
222 };
223 struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings {
224 static constexpr bool kFirstCoeffAlwaysOne = false;
225 };
226 struct TypesAndSettings_AllowZeroStarts : public DefaultTypesAndSettings {
227 static constexpr bool kAllowZeroStarts = true;
228 };
229 struct TypesAndSettings_Seed64 : public DefaultTypesAndSettings {
230 using Seed = uint64_t;
231 };
232 struct TypesAndSettings_Rehasher
233 : public StandardRehasherAdapter<DefaultTypesAndSettings> {
234 using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
235 };
236 struct TypesAndSettings_Rehasher_Result16 : public TypesAndSettings_Rehasher {
237 using ResultRow = uint16_t;
238 };
239 struct TypesAndSettings_Rehasher_Result32 : public TypesAndSettings_Rehasher {
240 using ResultRow = uint32_t;
241 };
242 struct TypesAndSettings_Rehasher_Seed64
243 : public StandardRehasherAdapter<TypesAndSettings_Seed64> {
244 using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
245 // Note: 64-bit seed with Rehasher gives slightly better average reseeds
246 };
247 struct TypesAndSettings_Rehasher32
248 : public StandardRehasherAdapter<TypesAndSettings_Hash32> {
249 using KeyGen = Hash32KeyGenWrapper<StandardKeyGen>;
250 };
251 struct TypesAndSettings_Rehasher32_Coeff64
252 : public TypesAndSettings_Rehasher32 {
253 using CoeffRow = uint64_t;
254 };
255 struct TypesAndSettings_SmallKeyGen : public DefaultTypesAndSettings {
256 // SmallKeyGen stresses the independence of different hash seeds
257 using KeyGen = SmallKeyGen;
258 };
259 struct TypesAndSettings_Hash32_SmallKeyGen : public TypesAndSettings_Hash32 {
260 // SmallKeyGen stresses the independence of different hash seeds
261 using KeyGen = SmallKeyGen;
262 };
263
264 using TestTypesAndSettings = ::testing::Types<
265 TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
266 TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash0,
267 TypesAndSettings_Coeff64Smash1, TypesAndSettings_Result16,
268 TypesAndSettings_Result32, TypesAndSettings_IndexSizeT,
269 TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16,
270 TypesAndSettings_KeyString, TypesAndSettings_Seed8,
271 TypesAndSettings_NoAlwaysOne, TypesAndSettings_AllowZeroStarts,
272 TypesAndSettings_Seed64, TypesAndSettings_Rehasher,
273 TypesAndSettings_Rehasher_Result16, TypesAndSettings_Rehasher_Result32,
274 TypesAndSettings_Rehasher_Seed64, TypesAndSettings_Rehasher32,
275 TypesAndSettings_Rehasher32_Coeff64, TypesAndSettings_SmallKeyGen,
276 TypesAndSettings_Hash32_SmallKeyGen>;
277 TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
278
279 namespace {
280
281 // For testing Poisson-distributed (or similar) statistics, get value for
282 // `stddevs_allowed` standard deviations above expected mean
283 // `expected_count`.
284 // (Poisson approximates Binomial only if probability of a trial being
285 // in the count is low.)
286 uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) {
287 return static_cast<uint64_t>(
288 expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0);
289 }
290
291 uint64_t PoissonLowerBound(double expected_count, double stddevs_allowed) {
292 return static_cast<uint64_t>(std::max(
293 0.0, expected_count - stddevs_allowed * std::sqrt(expected_count)));
294 }
295
296 uint64_t FrequentPoissonUpperBound(double expected_count) {
297 // Allow up to 5.0 standard deviations for frequently checked statistics
298 return PoissonUpperBound(expected_count, 5.0);
299 }
300
301 uint64_t FrequentPoissonLowerBound(double expected_count) {
302 return PoissonLowerBound(expected_count, 5.0);
303 }
304
305 uint64_t InfrequentPoissonUpperBound(double expected_count) {
306 // Allow up to 3 standard deviations for infrequently checked statistics
307 return PoissonUpperBound(expected_count, 3.0);
308 }
309
310 uint64_t InfrequentPoissonLowerBound(double expected_count) {
311 return PoissonLowerBound(expected_count, 3.0);
312 }
313
314 } // namespace
315
316 TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
317 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
318 IMPORT_RIBBON_IMPL_TYPES(TypeParam);
319 using KeyGen = typename TypeParam::KeyGen;
320
321 // For testing FP rate etc.
322 constexpr Index kNumToCheck = 100000;
323
324 const auto log2_thoroughness =
325 static_cast<Hash>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
326
327 // With overhead of just 2%, expect ~50% encoding success per
328 // seed with ~5k keys on 64-bit ribbon, or ~150k keys on 128-bit ribbon.
329 const double kFactor = 1.02;
330
331 uint64_t total_reseeds = 0;
332 uint64_t total_single_failures = 0;
333 uint64_t total_batch_successes = 0;
334 uint64_t total_fp_count = 0;
335 uint64_t total_added = 0;
336
337 uint64_t soln_query_nanos = 0;
338 uint64_t soln_query_count = 0;
339 uint64_t bloom_query_nanos = 0;
340 uint64_t isoln_query_nanos = 0;
341 uint64_t isoln_query_count = 0;
342
343 // Take different samples if you change thoroughness
344 ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness);
345
346 for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
347 uint32_t num_to_add =
348 sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5500 : 2500;
349
350 // Use different values between that number and 50% of that number
351 num_to_add -= rnd.Uniformish(num_to_add / 2);
352
353 total_added += num_to_add;
354
355 // Most of the time, test the Interleaved solution storage, but when
356 // we do we have to make num_slots a multiple of kCoeffBits. So
357 // sometimes we want to test without that limitation.
358 bool test_interleaved = (i % 7) != 6;
359
360 Index num_slots = static_cast<Index>(num_to_add * kFactor);
361 if (test_interleaved) {
362 // Round to supported number of slots
363 num_slots = InterleavedSoln::RoundUpNumSlots(num_slots);
364 // Re-adjust num_to_add to get as close as possible to kFactor
365 num_to_add = static_cast<uint32_t>(num_slots / kFactor);
366 }
367
368 std::string prefix;
369 ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next());
370
371 // Batch that must be added
372 std::string added_str = prefix + "added";
373 KeyGen keys_begin(added_str, 0);
374 KeyGen keys_end(added_str, num_to_add);
375
376 // A couple more that will probably be added
377 KeyGen one_more(prefix + "more", 1);
378 KeyGen two_more(prefix + "more", 2);
379
380 // Batch that may or may not be added
381 const Index kBatchSize =
382 sizeof(CoeffRow) == 16 ? 300 : TypeParam::kUseSmash ? 20 : 10;
383 std::string batch_str = prefix + "batch";
384 KeyGen batch_begin(batch_str, 0);
385 KeyGen batch_end(batch_str, kBatchSize);
386
387 // Batch never (successfully) added, but used for querying FP rate
388 std::string not_str = prefix + "not";
389 KeyGen other_keys_begin(not_str, 0);
390 KeyGen other_keys_end(not_str, kNumToCheck);
391
392 // Vary bytes for InterleavedSoln to use number of solution columns
393 // from 0 to max allowed by ResultRow type (and used by SimpleSoln).
394 // Specifically include 0 and max, and otherwise skew toward max.
395 uint32_t max_ibytes = static_cast<uint32_t>(sizeof(ResultRow) * num_slots);
396 size_t ibytes;
397 if (i == 0) {
398 ibytes = 0;
399 } else if (i == 1) {
400 ibytes = max_ibytes;
401 } else {
402 // Skewed
403 ibytes = std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes));
404 }
405 std::unique_ptr<char[]> idata(new char[ibytes]);
406 InterleavedSoln isoln(idata.get(), ibytes);
407
408 SimpleSoln soln;
409 Hasher hasher;
410 bool first_single;
411 bool second_single;
412 bool batch_success;
413 {
414 Banding banding;
415 // Traditional solve for a fixed set.
416 ASSERT_TRUE(
417 banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end));
418
419 // Now to test backtracking, starting with guaranteed fail. By using
420 // the keys that will be used to test FP rate, we are then doing an
421 // extra check that after backtracking there are no remnants (e.g. in
422 // result side of banding) of these entries.
423 Index occupied_count = banding.GetOccupiedCount();
424 banding.EnsureBacktrackSize(kNumToCheck);
425 EXPECT_FALSE(
426 banding.AddRangeOrRollBack(other_keys_begin, other_keys_end));
427 EXPECT_EQ(occupied_count, banding.GetOccupiedCount());
428
429 // Check that we still have a good chance of adding a couple more
430 // individually
431 first_single = banding.Add(*one_more);
432 second_single = banding.Add(*two_more);
433 Index more_added = (first_single ? 1 : 0) + (second_single ? 1 : 0);
434 total_single_failures += 2U - more_added;
435
436 // Or as a batch
437 batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end);
438 if (batch_success) {
439 more_added += kBatchSize;
440 ++total_batch_successes;
441 }
442 EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
443
444 // Also verify that redundant adds are OK (no effect)
445 ASSERT_TRUE(
446 banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8)));
447 EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
448
449 // Now back-substitution
450 soln.BackSubstFrom(banding);
451 if (test_interleaved) {
452 isoln.BackSubstFrom(banding);
453 }
454
455 Seed reseeds = banding.GetOrdinalSeed();
456 total_reseeds += reseeds;
457
458 EXPECT_LE(reseeds, 8 + log2_thoroughness);
459 if (reseeds > log2_thoroughness + 1) {
460 fprintf(
461 stderr, "%s high reseeds at %u, %u/%u: %u\n",
462 reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat",
463 static_cast<unsigned>(i), static_cast<unsigned>(num_to_add),
464 static_cast<unsigned>(num_slots), static_cast<unsigned>(reseeds));
465 }
466 hasher.SetOrdinalSeed(reseeds);
467 }
468 // soln and hasher now independent of Banding object
469
470 // Verify keys added
471 KeyGen cur = keys_begin;
472 while (cur != keys_end) {
473 ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
474 ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
475 ++cur;
476 }
477 // We (maybe) snuck these in!
478 if (first_single) {
479 ASSERT_TRUE(soln.FilterQuery(*one_more, hasher));
480 ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher));
481 }
482 if (second_single) {
483 ASSERT_TRUE(soln.FilterQuery(*two_more, hasher));
484 ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher));
485 }
486 if (batch_success) {
487 cur = batch_begin;
488 while (cur != batch_end) {
489 ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
490 ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
491 ++cur;
492 }
493 }
494
495 // Check FP rate (depends only on number of result bits == solution columns)
496 Index fp_count = 0;
497 cur = other_keys_begin;
498 {
499 ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
500 true);
501 while (cur != other_keys_end) {
502 bool fp = soln.FilterQuery(*cur, hasher);
503 fp_count += fp ? 1 : 0;
504 ++cur;
505 }
506 soln_query_nanos += timer.ElapsedNanos();
507 soln_query_count += kNumToCheck;
508 }
509 {
510 double expected_fp_count = soln.ExpectedFpRate() * kNumToCheck;
511 // For expected FP rate, also include false positives due to collisions
512 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
513 double correction =
514 kNumToCheck * ExpectedCollisionFpRate(hasher, num_to_add);
515 EXPECT_LE(fp_count,
516 FrequentPoissonUpperBound(expected_fp_count + correction));
517 EXPECT_GE(fp_count,
518 FrequentPoissonLowerBound(expected_fp_count + correction));
519 }
520 total_fp_count += fp_count;
521
522 // And also check FP rate for isoln
523 if (test_interleaved) {
524 Index ifp_count = 0;
525 cur = other_keys_begin;
526 ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
527 true);
528 while (cur != other_keys_end) {
529 ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0;
530 ++cur;
531 }
532 isoln_query_nanos += timer.ElapsedNanos();
533 isoln_query_count += kNumToCheck;
534 {
535 double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck;
536 // For expected FP rate, also include false positives due to collisions
537 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
538 double correction =
539 kNumToCheck * ExpectedCollisionFpRate(hasher, num_to_add);
540 EXPECT_LE(ifp_count,
541 FrequentPoissonUpperBound(expected_fp_count + correction));
542 EXPECT_GE(ifp_count,
543 FrequentPoissonLowerBound(expected_fp_count + correction));
544 }
545 // Since the bits used in isoln are a subset of the bits used in soln,
546 // it cannot have fewer FPs
547 EXPECT_GE(ifp_count, fp_count);
548 }
549
550 // And compare to Bloom time, for fun
551 if (ibytes >= /* minimum Bloom impl bytes*/ 64) {
552 Index bfp_count = 0;
553 cur = other_keys_begin;
554 ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
555 true);
556 while (cur != other_keys_end) {
557 uint64_t h = hasher.GetHash(*cur);
558 uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h);
559 uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h)
560 : h1 * 0x9e3779b9;
561 bfp_count += ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch(
562 h1, h2, static_cast<uint32_t>(ibytes), 6, idata.get())
563 ? 1
564 : 0;
565 ++cur;
566 }
567 bloom_query_nanos += timer.ElapsedNanos();
568 // ensure bfp_count is used
569 ASSERT_LT(bfp_count, kNumToCheck);
570 }
571 }
572
573 // "outside" == key not in original set so either negative or false positive
574 fprintf(stderr, "Simple outside query, hot, incl hashing, ns/key: %g\n",
575 1.0 * soln_query_nanos / soln_query_count);
576 fprintf(stderr, "Interleaved outside query, hot, incl hashing, ns/key: %g\n",
577 1.0 * isoln_query_nanos / isoln_query_count);
578 fprintf(stderr, "Bloom outside query, hot, incl hashing, ns/key: %g\n",
579 1.0 * bloom_query_nanos / soln_query_count);
580
581 {
582 double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
583 fprintf(stderr, "Average re-seeds: %g\n", average_reseeds);
584 // Values above were chosen to target around 50% chance of encoding success
585 // rate (average of 1.0 re-seeds) or slightly better. But 1.15 is also close
586 // enough.
587 EXPECT_LE(total_reseeds,
588 InfrequentPoissonUpperBound(1.15 * FLAGS_thoroughness));
589 // Would use 0.85 here instead of 0.75, but
590 // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of
591 // sequential keys with a small, cheap hash function. We accept that
592 // there are surely inputs that are somewhat bad for this setup, but
593 // these somewhat good inputs are probably more likely.
594 EXPECT_GE(total_reseeds,
595 InfrequentPoissonLowerBound(0.75 * FLAGS_thoroughness));
596 }
597
598 {
599 uint64_t total_singles = 2 * FLAGS_thoroughness;
600 double single_failure_rate = 1.0 * total_single_failures / total_singles;
601 fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate);
602 // A rough bound (one sided) based on nothing in particular
603 double expected_single_failures =
604 1.0 * total_singles /
605 (sizeof(CoeffRow) == 16 ? 128 : TypeParam::kUseSmash ? 64 : 32);
606 EXPECT_LE(total_single_failures,
607 InfrequentPoissonUpperBound(expected_single_failures));
608 }
609
610 {
611 // Counting successes here for Poisson to approximate the Binomial
612 // distribution.
613 // A rough bound (one sided) based on nothing in particular.
614 double expected_batch_successes = 1.0 * FLAGS_thoroughness / 2;
615 uint64_t lower_bound =
616 InfrequentPoissonLowerBound(expected_batch_successes);
617 fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n",
618 1.0 * total_batch_successes / FLAGS_thoroughness,
619 1.0 * lower_bound / FLAGS_thoroughness);
620 EXPECT_GE(total_batch_successes, lower_bound);
621 }
622
623 {
624 uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness;
625 double expected_total_fp_count =
626 total_checked * std::pow(0.5, 8U * sizeof(ResultRow));
627 // For expected FP rate, also include false positives due to collisions
628 // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
629 double average_added = 1.0 * total_added / FLAGS_thoroughness;
630 expected_total_fp_count +=
631 total_checked * ExpectedCollisionFpRate(Hasher(), average_added);
632
633 uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count);
634 uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count);
635 fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
636 1.0 * total_fp_count / total_checked,
637 expected_total_fp_count / total_checked,
638 1.0 * upper_bound / total_checked,
639 1.0 * lower_bound / total_checked);
640 EXPECT_LE(total_fp_count, upper_bound);
641 EXPECT_GE(total_fp_count, lower_bound);
642 }
643 }
644
645 TYPED_TEST(RibbonTypeParamTest, Extremes) {
646 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
647 IMPORT_RIBBON_IMPL_TYPES(TypeParam);
648 using KeyGen = typename TypeParam::KeyGen;
649
650 size_t bytes = 128 * 1024;
651 std::unique_ptr<char[]> buf(new char[bytes]);
652 InterleavedSoln isoln(buf.get(), bytes);
653 SimpleSoln soln;
654 Hasher hasher;
655 Banding banding;
656
657 // ########################################
658 // Add zero keys to minimal number of slots
659 KeyGen begin_and_end("foo", 123);
660 ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
661 /*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*first seed*/ 0,
662 /* seed mask*/ 0));
663
664 soln.BackSubstFrom(banding);
665 isoln.BackSubstFrom(banding);
666
667 // Because there's plenty of memory, we expect the interleaved solution to
668 // use maximum supported columns (same as simple solution)
669 ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow));
670 ASSERT_EQ(isoln.GetUpperStartBlock(), 0U);
671
672 // Somewhat oddly, we expect same FP rate as if we had essentially filled
673 // up the slots.
674 constexpr Index kNumToCheck = 100000;
675 KeyGen other_keys_begin("not", 0);
676 KeyGen other_keys_end("not", kNumToCheck);
677
678 Index fp_count = 0;
679 KeyGen cur = other_keys_begin;
680 while (cur != other_keys_end) {
681 bool isoln_query_result = isoln.FilterQuery(*cur, hasher);
682 bool soln_query_result = soln.FilterQuery(*cur, hasher);
683 // Solutions are equivalent
684 ASSERT_EQ(isoln_query_result, soln_query_result);
685 // And in fact we only expect an FP when ResultRow is 0
686 // CHANGE: no longer true because of filling some unused slots
687 // with pseudorandom values.
688 // ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash(
689 // hasher.GetHash(*cur)) == ResultRow{0});
690 fp_count += soln_query_result ? 1 : 0;
691 ++cur;
692 }
693 {
694 ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate());
695 double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck;
696 EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count));
697 EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count));
698 }
699
700 // ######################################################
701 // Use zero bytes for interleaved solution (key(s) added)
702
703 // Add one key
704 KeyGen key_begin("added", 0);
705 KeyGen key_end("added", 1);
706 ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
707 /*slots*/ kCoeffBits, key_begin, key_end, /*first seed*/ 0,
708 /* seed mask*/ 0));
709
710 InterleavedSoln isoln2(nullptr, /*bytes*/ 0);
711
712 isoln2.BackSubstFrom(banding);
713
714 ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U);
715 ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U);
716
717 // All queries return true
718 ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher));
719 ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0);
720 }
721
722 TEST(RibbonTest, AllowZeroStarts) {
723 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts);
724 IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts);
725 using KeyGen = StandardKeyGen;
726
727 InterleavedSoln isoln(nullptr, /*bytes*/ 0);
728 SimpleSoln soln;
729 Hasher hasher;
730 Banding banding;
731
732 KeyGen begin("foo", 0);
733 KeyGen end("foo", 1);
734 // Can't add 1 entry
735 ASSERT_FALSE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end));
736
737 KeyGen begin_and_end("foo", 123);
738 // Can add 0 entries
739 ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end,
740 begin_and_end));
741
742 Seed reseeds = banding.GetOrdinalSeed();
743 ASSERT_EQ(reseeds, 0U);
744 hasher.SetOrdinalSeed(reseeds);
745
746 // Can construct 0-slot solutions
747 isoln.BackSubstFrom(banding);
748 soln.BackSubstFrom(banding);
749
750 // Should always return false
751 ASSERT_FALSE(isoln.FilterQuery(*begin, hasher));
752 ASSERT_FALSE(soln.FilterQuery(*begin, hasher));
753
754 // And report that in FP rate
755 ASSERT_EQ(isoln.ExpectedFpRate(), 0.0);
756 ASSERT_EQ(soln.ExpectedFpRate(), 0.0);
757 }
758
759 TEST(RibbonTest, RawAndOrdinalSeeds) {
760 StandardHasher<TypesAndSettings_Seed64> hasher64;
761 StandardHasher<DefaultTypesAndSettings> hasher64_32;
762 StandardHasher<TypesAndSettings_Hash32> hasher32;
763 StandardHasher<TypesAndSettings_Seed8> hasher8;
764
765 for (uint32_t limit : {0xffU, 0xffffU}) {
766 std::vector<bool> seen(limit + 1);
767 for (uint32_t i = 0; i < limit; ++i) {
768 hasher64.SetOrdinalSeed(i);
769 auto raw64 = hasher64.GetRawSeed();
770 hasher32.SetOrdinalSeed(i);
771 auto raw32 = hasher32.GetRawSeed();
772 hasher8.SetOrdinalSeed(static_cast<uint8_t>(i));
773 auto raw8 = hasher8.GetRawSeed();
774 {
775 hasher64_32.SetOrdinalSeed(i);
776 auto raw64_32 = hasher64_32.GetRawSeed();
777 ASSERT_EQ(raw64_32, raw32); // Same size seed
778 }
779 if (i == 0) {
780 // Documented that ordinal seed 0 == raw seed 0
781 ASSERT_EQ(raw64, 0U);
782 ASSERT_EQ(raw32, 0U);
783 ASSERT_EQ(raw8, 0U);
784 } else {
785 // Extremely likely that upper bits are set
786 ASSERT_GT(raw64, raw32);
787 ASSERT_GT(raw32, raw8);
788 }
789 // Hashers agree on lower bits
790 ASSERT_EQ(static_cast<uint32_t>(raw64), raw32);
791 ASSERT_EQ(static_cast<uint8_t>(raw32), raw8);
792
793 // The translation is one-to-one for this size prefix
794 uint32_t v = static_cast<uint32_t>(raw32 & limit);
795 ASSERT_EQ(raw64 & limit, v);
796 ASSERT_FALSE(seen[v]);
797 seen[v] = true;
798 }
799 }
800 }
801
802 namespace {
803
804 struct PhsfInputGen {
805 PhsfInputGen(const std::string& prefix, uint64_t id) : id_(id) {
806 val_.first = prefix;
807 ROCKSDB_NAMESPACE::PutFixed64(&val_.first, /*placeholder*/ 0);
808 }
809
810 // Prefix (only one required)
811 PhsfInputGen& operator++() {
812 ++id_;
813 return *this;
814 }
815
816 const std::pair<std::string, uint8_t>& operator*() {
817 // Use multiplication to mix things up a little in the key
818 ROCKSDB_NAMESPACE::EncodeFixed64(&val_.first[val_.first.size() - 8],
819 id_ * uint64_t{0x1500000001});
820 // Occasionally repeat values etc.
821 val_.second = static_cast<uint8_t>(id_ * 7 / 8);
822 return val_;
823 }
824
825 const std::pair<std::string, uint8_t>* operator->() { return &**this; }
826
827 bool operator==(const PhsfInputGen& other) {
828 // Same prefix is assumed
829 return id_ == other.id_;
830 }
831 bool operator!=(const PhsfInputGen& other) {
832 // Same prefix is assumed
833 return id_ != other.id_;
834 }
835
836 uint64_t id_;
837 std::pair<std::string, uint8_t> val_;
838 };
839
840 struct PhsfTypesAndSettings : public DefaultTypesAndSettings {
841 static constexpr bool kIsFilter = false;
842 };
843 } // namespace
844
845 TEST(RibbonTest, PhsfBasic) {
846 IMPORT_RIBBON_TYPES_AND_SETTINGS(PhsfTypesAndSettings);
847 IMPORT_RIBBON_IMPL_TYPES(PhsfTypesAndSettings);
848
849 Index num_slots = 12800;
850 Index num_to_add = static_cast<Index>(num_slots / 1.02);
851
852 PhsfInputGen begin("in", 0);
853 PhsfInputGen end("in", num_to_add);
854
855 std::unique_ptr<char[]> idata(new char[/*bytes*/ num_slots]);
856 InterleavedSoln isoln(idata.get(), /*bytes*/ num_slots);
857 SimpleSoln soln;
858 Hasher hasher;
859
860 {
861 Banding banding;
862 ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, begin, end));
863
864 soln.BackSubstFrom(banding);
865 isoln.BackSubstFrom(banding);
866
867 hasher.SetOrdinalSeed(banding.GetOrdinalSeed());
868 }
869
870 for (PhsfInputGen cur = begin; cur != end; ++cur) {
871 ASSERT_EQ(cur->second, soln.PhsfQuery(cur->first, hasher));
872 ASSERT_EQ(cur->second, isoln.PhsfQuery(cur->first, hasher));
873 }
874 }
875
876 // Not a real test, but a tool used to build GetNumSlotsFor95PctSuccess
877 TYPED_TEST(RibbonTypeParamTest, FindOccupancyForSuccessRate) {
878 IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
879 IMPORT_RIBBON_IMPL_TYPES(TypeParam);
880 using KeyGen = typename TypeParam::KeyGen;
881
882 if (!FLAGS_find_occ) {
883 fprintf(stderr, "Tool disabled during unit test runs\n");
884 return;
885 }
886
887 KeyGen cur("blah", 0);
888
889 Banding banding;
890 Index num_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots);
891 while (num_slots < FLAGS_find_max_slots) {
892 double factor = 0.95;
893 double delta = FLAGS_find_delta_start;
894 while (delta > FLAGS_find_delta_end) {
895 Index num_to_add = static_cast<Index>(factor * num_slots);
896 KeyGen end = cur;
897 end += num_to_add;
898 bool success = banding.ResetAndFindSeedToSolve(num_slots, cur, end, 0, 0);
899 cur = end; // fresh keys
900 if (success) {
901 factor += delta * (1.0 - FLAGS_find_success);
902 factor = std::min(factor, 1.0);
903 } else {
904 factor -= delta * FLAGS_find_success;
905 factor = std::max(factor, 0.0);
906 }
907 delta *= FLAGS_find_delta_shrink;
908 fprintf(stderr,
909 "slots: %u log2_slots: %g target_success: %g ->overhead: %g\r",
910 static_cast<unsigned>(num_slots),
911 std::log(num_slots * 1.0) / std::log(2.0), FLAGS_find_success,
912 1.0 / factor);
913 }
914 fprintf(stderr, "\n");
915
916 num_slots = std::max(
917 num_slots + 1, static_cast<Index>(num_slots * FLAGS_find_next_factor));
918 num_slots = InterleavedSoln::RoundUpNumSlots(num_slots);
919 }
920 }
921
922 // TODO: unit tests for configuration APIs
923 // TODO: unit tests for small filter FP rates
924
925 int main(int argc, char** argv) {
926 ::testing::InitGoogleTest(&argc, argv);
927 #ifdef GFLAGS
928 ParseCommandLineFlags(&argc, &argv, true);
929 #endif // GFLAGS
930 return RUN_ALL_TESTS();
931 }