]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <cmath> | |
9 | ||
10 | #include "port/port.h" // for PREFETCH | |
1e59de90 | 11 | #include "util/fastrange.h" |
20effc67 TL |
12 | #include "util/ribbon_alg.h" |
13 | ||
14 | namespace ROCKSDB_NAMESPACE { | |
15 | ||
16 | namespace ribbon { | |
17 | ||
18 | // RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly) | |
19 | // | |
20 | // ribbon_impl.h: templated (parameterized) standard implementations | |
21 | // | |
22 | // Ribbon is a Perfect Hash Static Function construction useful as a compact | |
23 | // static Bloom filter alternative. See ribbon_alg.h for core algorithms | |
24 | // and core design details. | |
25 | // | |
26 | // TODO: more details on trade-offs and practical issues. | |
1e59de90 TL |
27 | // |
28 | // APIs for configuring Ribbon are in ribbon_config.h | |
20effc67 TL |
29 | |
30 | // Ribbon implementations in this file take these parameters, which must be | |
31 | // provided in a class/struct type with members expressed in this concept: | |
32 | ||
33 | // concept TypesAndSettings { | |
34 | // // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have | |
35 | // // the added constraint that Hash be equivalent to either uint32_t or | |
36 | // // uint64_t. | |
37 | // typename Hash; | |
38 | // typename CoeffRow; | |
39 | // typename ResultRow; | |
40 | // typename Index; | |
41 | // typename Key; | |
42 | // static constexpr bool kFirstCoeffAlwaysOne; | |
43 | // | |
44 | // // An unsigned integer type for identifying a hash seed, typically | |
45 | // // uint32_t or uint64_t. Importantly, this is the amount of data | |
46 | // // stored in memory for identifying a raw seed. See StandardHasher. | |
47 | // typename Seed; | |
48 | // | |
49 | // // When true, the PHSF implements a static filter, expecting just | |
50 | // // keys as inputs for construction. When false, implements a general | |
51 | // // PHSF and expects std::pair<Key, ResultRow> as inputs for | |
52 | // // construction. | |
53 | // static constexpr bool kIsFilter; | |
54 | // | |
1e59de90 TL |
55 | // // When true, enables a special "homogeneous" filter implementation that |
56 | // // is slightly faster to construct, and never fails to construct though | |
57 | // // FP rate can quickly explode in cases where corresponding | |
58 | // // non-homogeneous filter would fail (or nearly fail?) to construct. | |
59 | // // For smaller filters, you can configure with ConstructionFailureChance | |
60 | // // smaller than desired FP rate to largely counteract this effect. | |
61 | // // TODO: configuring Homogeneous Ribbon for arbitrarily large filters | |
62 | // // based on data from OptimizeHomogAtScale | |
63 | // static constexpr bool kHomogeneous; | |
64 | // | |
20effc67 TL |
65 | // // When true, adds a tiny bit more hashing logic on queries and |
66 | // // construction to improve utilization at the beginning and end of | |
67 | // // the structure. Recommended when CoeffRow is only 64 bits (or | |
1e59de90 TL |
68 | // // less), so typical num_starts < 10k. Although this is compatible |
69 | // // with kHomogeneous, the competing space vs. time priorities might | |
70 | // // not be useful. | |
20effc67 TL |
71 | // static constexpr bool kUseSmash; |
72 | // | |
73 | // // When true, allows number of "starts" to be zero, for best support | |
74 | // // of the "no keys to add" case by always returning false for filter | |
75 | // // queries. (This is distinct from the "keys added but no space for | |
76 | // // any data" case, in which a filter always returns true.) The cost | |
77 | // // supporting this is a conditional branch (probably predictable) in | |
78 | // // queries. | |
79 | // static constexpr bool kAllowZeroStarts; | |
80 | // | |
81 | // // A seedable stock hash function on Keys. All bits of Hash must | |
82 | // // be reasonably high quality. XXH functions recommended, but | |
83 | // // Murmur, City, Farm, etc. also work. | |
84 | // static Hash HashFn(const Key &, Seed raw_seed); | |
85 | // }; | |
86 | ||
87 | // A bit of a hack to automatically construct the type for | |
88 | // AddInput based on a constexpr bool. | |
89 | template <typename Key, typename ResultRow, bool IsFilter> | |
90 | struct AddInputSelector { | |
91 | // For general PHSF, not filter | |
92 | using T = std::pair<Key, ResultRow>; | |
93 | }; | |
94 | ||
95 | template <typename Key, typename ResultRow> | |
96 | struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> { | |
97 | // For Filter | |
98 | using T = Key; | |
99 | }; | |
100 | ||
101 | // To avoid writing 'typename' everywhere that we use types like 'Index' | |
102 | #define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \ | |
103 | using CoeffRow = typename TypesAndSettings::CoeffRow; \ | |
104 | using ResultRow = typename TypesAndSettings::ResultRow; \ | |
105 | using Index = typename TypesAndSettings::Index; \ | |
106 | using Hash = typename TypesAndSettings::Hash; \ | |
107 | using Key = typename TypesAndSettings::Key; \ | |
108 | using Seed = typename TypesAndSettings::Seed; \ | |
109 | \ | |
110 | /* Some more additions */ \ | |
111 | using QueryInput = Key; \ | |
112 | using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector< \ | |
113 | Key, ResultRow, TypesAndSettings::kIsFilter>::T; \ | |
114 | static constexpr auto kCoeffBits = \ | |
115 | static_cast<Index>(sizeof(CoeffRow) * 8U); \ | |
116 | \ | |
117 | /* Export to algorithm */ \ | |
118 | static constexpr bool kFirstCoeffAlwaysOne = \ | |
119 | TypesAndSettings::kFirstCoeffAlwaysOne; \ | |
120 | \ | |
121 | static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) + \ | |
122 | sizeof(Hash) + sizeof(Key) + sizeof(Seed) + \ | |
123 | sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \ | |
124 | kFirstCoeffAlwaysOne > \ | |
125 | 0, \ | |
126 | "avoid unused warnings, semicolon expected after macro call") | |
127 | ||
128 | #ifdef _MSC_VER | |
129 | #pragma warning(push) | |
130 | #pragma warning(disable : 4309) // cast truncating constant | |
131 | #pragma warning(disable : 4307) // arithmetic constant overflow | |
132 | #endif | |
133 | ||
134 | // StandardHasher: A standard implementation of concepts RibbonTypes, | |
135 | // PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h. | |
136 | // | |
137 | // This implementation should be suitable for most all practical purposes | |
138 | // as it "behaves" across a wide range of settings, with little room left | |
139 | // for improvement. The key functionality in this hasher is generating | |
140 | // CoeffRows, starts, and (for filters) ResultRows, which could be ~150 | |
141 | // bits of data or more, from a modest hash of 64 or even just 32 bits, with | |
142 | // enough uniformity and bitwise independence to be close to "the best you | |
143 | // can do" with available hash information in terms of FP rate and | |
144 | // compactness. (64 bits recommended and sufficient for PHSF practical | |
145 | // purposes.) | |
146 | // | |
147 | // Another feature of this hasher is a minimal "premixing" of seeds before | |
148 | // they are provided to TypesAndSettings::HashFn in case that function does | |
149 | // not provide sufficiently independent hashes when iterating merely | |
150 | // sequentially on seeds. (This for example works around a problem with the | |
1e59de90 | 151 | // preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and |
20effc67 TL |
152 | // MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step |
153 | // translates "ordinal seeds," which we iterate sequentially to find a | |
154 | // solution, into "raw seeds," with many more bits changing for each | |
155 | // iteration. The translation is an easily reversible lightweight mixing, | |
156 | // not suitable for hashing on its own. An advantage of this approach is that | |
157 | // StandardHasher can store just the raw seed (e.g. 64 bits) for fast query | |
158 | // times, while from the application perspective, we can limit to a small | |
159 | // number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata. | |
160 | // | |
161 | // The default constructor initializes the seed to ordinal seed zero, which | |
162 | // is equal to raw seed zero. | |
163 | // | |
164 | template <class TypesAndSettings> | |
165 | class StandardHasher { | |
166 | public: | |
167 | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); | |
168 | ||
169 | inline Hash GetHash(const Key& key) const { | |
170 | return TypesAndSettings::HashFn(key, raw_seed_); | |
171 | }; | |
172 | // For when AddInput == pair<Key, ResultRow> (kIsFilter == false) | |
173 | inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const { | |
174 | return GetHash(bi.first); | |
175 | }; | |
176 | inline Index GetStart(Hash h, Index num_starts) const { | |
177 | // This is "critical path" code because it's required before memory | |
178 | // lookup. | |
179 | // | |
180 | // FastRange gives us a fast and effective mapping from h to the | |
181 | // appropriate range. This depends most, sometimes exclusively, on | |
182 | // upper bits of h. | |
183 | // | |
184 | if (TypesAndSettings::kUseSmash) { | |
185 | // Extra logic to "smash" entries at beginning and end, for | |
186 | // better utilization. For example, without smash and with | |
187 | // kFirstCoeffAlwaysOne, there's about a 30% chance that the | |
188 | // first slot in the banding will be unused, and worse without | |
189 | // kFirstCoeffAlwaysOne. The ending slots are even less utilized | |
190 | // without smash. | |
191 | // | |
192 | // But since this only affects roughly kCoeffBits of the slots, | |
193 | // it's usually small enough to be ignorable (less computation in | |
194 | // this function) when number of slots is roughly 10k or larger. | |
195 | // | |
196 | // The best values for these smash weights might depend on how | |
197 | // densely you're packing entries, and also kCoeffBits, but this | |
198 | // seems to work well for roughly 95% success probability. | |
199 | // | |
200 | constexpr Index kFrontSmash = kCoeffBits / 4; | |
201 | constexpr Index kBackSmash = kCoeffBits / 4; | |
202 | Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash); | |
203 | start = std::max(start, kFrontSmash); | |
204 | start -= kFrontSmash; | |
205 | start = std::min(start, num_starts - 1); | |
206 | return start; | |
207 | } else { | |
208 | // For query speed, we allow small number of initial and final | |
209 | // entries to be under-utilized. | |
210 | // NOTE: This call statically enforces that Hash is equivalent to | |
211 | // either uint32_t or uint64_t. | |
212 | return FastRangeGeneric(h, num_starts); | |
213 | } | |
214 | } | |
215 | inline CoeffRow GetCoeffRow(Hash h) const { | |
216 | // This is not so much "critical path" code because it can be done in | |
217 | // parallel (instruction level) with memory lookup. | |
218 | // | |
1e59de90 TL |
219 | // When we might have many entries squeezed into a single start, |
220 | // we need reasonably good remixing for CoeffRow. | |
221 | if (TypesAndSettings::kUseSmash) { | |
222 | // Reasonably good, reasonably fast, reasonably general. | |
223 | // Probably not 1:1 but probably close enough. | |
224 | Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1); | |
225 | Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2); | |
226 | auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64)); | |
227 | ||
228 | // Now ensure the value is non-zero | |
229 | if (kFirstCoeffAlwaysOne) { | |
230 | cr |= 1; | |
231 | } else { | |
232 | // Still have to ensure some bit is non-zero | |
233 | cr |= (cr == 0) ? 1 : 0; | |
234 | } | |
235 | return cr; | |
236 | } | |
237 | // If not kUseSmash, we ensure we're not squeezing many entries into a | |
238 | // single start, in part by ensuring num_starts > num_slots / 2. Thus, | |
239 | // here we do not need good remixing for CoeffRow, but just enough that | |
20effc67 TL |
240 | // (a) every bit is reasonably independent from Start. |
241 | // (b) every Hash-length bit subsequence of the CoeffRow has full or | |
242 | // nearly full entropy from h. | |
243 | // (c) if nontrivial bit subsequences within are correlated, it needs to | |
244 | // be more complicated than exact copy or bitwise not (at least without | |
245 | // kFirstCoeffAlwaysOne), or else there seems to be a kind of | |
246 | // correlated clustering effect. | |
247 | // (d) the CoeffRow is not zero, so that no one input on its own can | |
248 | // doom construction success. (Preferably a mix of 1's and 0's if | |
249 | // satisfying above.) | |
250 | ||
251 | // First, establish sufficient bitwise independence from Start, with | |
252 | // multiplication by a large random prime. | |
253 | // Note that we cast to Hash because if we use product bits beyond | |
254 | // original input size, that's going to correlate with Start (FastRange) | |
255 | // even with a (likely) different multiplier here. | |
256 | Hash a = h * kCoeffAndResultFactor; | |
257 | ||
20effc67 TL |
258 | static_assert( |
259 | sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t), | |
260 | "Supported sizes"); | |
1e59de90 TL |
261 | // If that's big enough, we're done. If not, we have to expand it, |
262 | // maybe up to 4x size. | |
263 | uint64_t b; | |
20effc67 TL |
264 | if (sizeof(Hash) < sizeof(uint64_t)) { |
265 | // Almost-trivial hash expansion (OK - see above), favoring roughly | |
266 | // equal number of 1's and 0's in result | |
1e59de90 TL |
267 | b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32); |
268 | } else { | |
269 | b = a; | |
20effc67 | 270 | } |
1e59de90 TL |
271 | static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes"); |
272 | Unsigned128 c; | |
20effc67 TL |
273 | if (sizeof(uint64_t) < sizeof(CoeffRow)) { |
274 | // Almost-trivial hash expansion (OK - see above), favoring roughly | |
275 | // equal number of 1's and 0's in result | |
1e59de90 TL |
276 | c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64); |
277 | } else { | |
278 | c = b; | |
20effc67 TL |
279 | } |
280 | auto cr = static_cast<CoeffRow>(c); | |
281 | ||
282 | // Now ensure the value is non-zero | |
283 | if (kFirstCoeffAlwaysOne) { | |
284 | cr |= 1; | |
285 | } else if (sizeof(CoeffRow) == sizeof(Hash)) { | |
286 | // Still have to ensure some bit is non-zero | |
287 | cr |= (cr == 0) ? 1 : 0; | |
288 | } else { | |
289 | // (We did trivial expansion with constant xor, which ensures some | |
290 | // bits are non-zero.) | |
291 | } | |
292 | return cr; | |
293 | } | |
294 | inline ResultRow GetResultRowMask() const { | |
295 | // TODO: will be used with InterleavedSolutionStorage? | |
296 | // For now, all bits set (note: might be a small type so might need to | |
297 | // narrow after promotion) | |
298 | return static_cast<ResultRow>(~ResultRow{0}); | |
299 | } | |
300 | inline ResultRow GetResultRowFromHash(Hash h) const { | |
1e59de90 | 301 | if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) { |
20effc67 TL |
302 | // This is not so much "critical path" code because it can be done in |
303 | // parallel (instruction level) with memory lookup. | |
304 | // | |
305 | // ResultRow bits only needs to be independent from CoeffRow bits if | |
306 | // many entries might have the same start location, where "many" is | |
307 | // comparable to number of hash bits or kCoeffBits. If !kUseSmash | |
308 | // and num_starts > kCoeffBits, it is safe and efficient to draw from | |
309 | // the same bits computed for CoeffRow, which are reasonably | |
310 | // independent from Start. (Inlining and common subexpression | |
311 | // elimination with GetCoeffRow should make this | |
1e59de90 | 312 | // a single shared multiplication in generated code when !kUseSmash.) |
20effc67 | 313 | Hash a = h * kCoeffAndResultFactor; |
1e59de90 | 314 | |
20effc67 TL |
315 | // The bits here that are *most* independent of Start are the highest |
316 | // order bits (as in Knuth multiplicative hash). To make those the | |
317 | // most preferred for use in the result row, we do a bswap here. | |
318 | auto rr = static_cast<ResultRow>(EndianSwapValue(a)); | |
319 | return rr & GetResultRowMask(); | |
320 | } else { | |
321 | // Must be zero | |
322 | return 0; | |
323 | } | |
324 | } | |
325 | // For when AddInput == Key (kIsFilter == true) | |
326 | inline ResultRow GetResultRowFromInput(const Key&) const { | |
327 | // Must be zero | |
328 | return 0; | |
329 | } | |
330 | // For when AddInput == pair<Key, ResultRow> (kIsFilter == false) | |
331 | inline ResultRow GetResultRowFromInput( | |
332 | const std::pair<Key, ResultRow>& bi) const { | |
333 | // Simple extraction | |
334 | return bi.second; | |
335 | } | |
336 | ||
337 | // Seed tracking APIs - see class comment | |
338 | void SetRawSeed(Seed seed) { raw_seed_ = seed; } | |
339 | Seed GetRawSeed() { return raw_seed_; } | |
340 | void SetOrdinalSeed(Seed count) { | |
341 | // A simple, reversible mixing of any size (whole bytes) up to 64 bits. | |
342 | // This allows casting the raw seed to any smaller size we use for | |
343 | // ordinal seeds without risk of duplicate raw seeds for unique ordinal | |
344 | // seeds. | |
345 | ||
346 | // Seed type might be smaller than numerical promotion size, but Hash | |
347 | // should be at least that size, so we use Hash as intermediate type. | |
348 | static_assert(sizeof(Seed) <= sizeof(Hash), | |
349 | "Hash must be at least size of Seed"); | |
350 | ||
351 | // Multiply by a large random prime (one-to-one for any prefix of bits) | |
352 | Hash tmp = count * kToRawSeedFactor; | |
353 | // Within-byte one-to-one mixing | |
354 | static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0, | |
355 | "Illegal mask+shift"); | |
356 | tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift; | |
357 | raw_seed_ = static_cast<Seed>(tmp); | |
358 | // dynamic verification | |
359 | assert(GetOrdinalSeed() == count); | |
360 | } | |
361 | Seed GetOrdinalSeed() { | |
362 | Hash tmp = raw_seed_; | |
363 | // Within-byte one-to-one mixing (its own inverse) | |
364 | tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift; | |
365 | // Multiply by 64-bit multiplicative inverse | |
366 | static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1}, | |
367 | "Must be inverses"); | |
368 | return static_cast<Seed>(tmp * kFromRawSeedFactor); | |
369 | } | |
370 | ||
371 | protected: | |
372 | // For expanding hash: | |
373 | // large random prime | |
374 | static constexpr Hash kCoeffAndResultFactor = | |
375 | static_cast<Hash>(0xc28f82822b650bedULL); | |
1e59de90 TL |
376 | static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U; |
377 | static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU; | |
20effc67 TL |
378 | // random-ish data |
379 | static constexpr uint32_t kCoeffXor32 = 0xa6293635U; | |
380 | static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU; | |
381 | ||
382 | // For pre-mixing seeds | |
383 | static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL); | |
384 | static constexpr unsigned kSeedMixShift = 4U; | |
385 | static constexpr Hash kToRawSeedFactor = | |
386 | static_cast<Hash>(0xc78219a23eeadd03ULL); | |
387 | static constexpr Hash kFromRawSeedFactor = | |
388 | static_cast<Hash>(0xfe1a137d14b475abULL); | |
389 | ||
390 | // See class description | |
391 | Seed raw_seed_ = 0; | |
392 | }; | |
393 | ||
394 | // StandardRehasher (and StandardRehasherAdapter): A variant of | |
395 | // StandardHasher that uses the same type for keys as for hashes. | |
396 | // This is primarily intended for building a Ribbon filter | |
397 | // from existing hashes without going back to original inputs in | |
398 | // order to apply a different seed. This hasher seeds a 1-to-1 mixing | |
399 | // transformation to apply a seed to an existing hash. (Untested for | |
400 | // hash-sized keys that are not already uniformly distributed.) This | |
401 | // transformation builds on the seed pre-mixing done in StandardHasher. | |
402 | // | |
403 | // Testing suggests essentially no degradation of solution success rate | |
404 | // vs. going back to original inputs when changing hash seeds. For example: | |
405 | // Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys | |
406 | // is about 1.10 for both StandardHasher and StandardRehasher. | |
407 | // | |
408 | // StandardRehasher is not really recommended for general PHSFs (not | |
409 | // filters) because a collision in the original hash could prevent | |
410 | // construction despite re-seeding the Rehasher. (Such collisions | |
411 | // do not interfere with filter construction.) | |
412 | // | |
413 | // concept RehasherTypesAndSettings: like TypesAndSettings but | |
414 | // does not require Key or HashFn. | |
415 | template <class RehasherTypesAndSettings> | |
416 | class StandardRehasherAdapter : public RehasherTypesAndSettings { | |
417 | public: | |
418 | using Hash = typename RehasherTypesAndSettings::Hash; | |
419 | using Key = Hash; | |
420 | using Seed = typename RehasherTypesAndSettings::Seed; | |
421 | ||
422 | static Hash HashFn(const Hash& input, Seed raw_seed) { | |
423 | // Note: raw_seed is already lightly pre-mixed, and this multiplication | |
424 | // by a large prime is sufficient mixing (low-to-high bits) on top of | |
425 | // that for good FastRange results, which depends primarily on highest | |
426 | // bits. (The hashed CoeffRow and ResultRow are less sensitive to | |
427 | // mixing than Start.) | |
428 | // Also note: did consider adding ^ (input >> some) before the | |
429 | // multiplication, but doesn't appear to be necessary. | |
430 | return (input ^ raw_seed) * kRehashFactor; | |
431 | } | |
432 | ||
433 | private: | |
434 | static constexpr Hash kRehashFactor = | |
435 | static_cast<Hash>(0x6193d459236a3a0dULL); | |
436 | }; | |
437 | ||
438 | // See comment on StandardRehasherAdapter | |
439 | template <class RehasherTypesAndSettings> | |
440 | using StandardRehasher = | |
441 | StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>; | |
442 | ||
443 | #ifdef _MSC_VER | |
444 | #pragma warning(pop) | |
445 | #endif | |
446 | ||
447 | // Especially with smaller hashes (e.g. 32 bit), there can be noticeable | |
448 | // false positives due to collisions in the Hash returned by GetHash. | |
449 | // This function returns the expected FP rate due to those collisions, | |
450 | // which can be added to the expected FP rate from the underlying data | |
451 | // structure. (Note: technically, a + b is only a good approximation of | |
452 | // 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.) | |
453 | // The number of entries added can be a double here in case it's an | |
454 | // average. | |
455 | template <class Hasher, typename Numerical> | |
456 | double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) { | |
457 | // Standardize on the 'double' specialization | |
458 | return ExpectedCollisionFpRate(hasher, 1.0 * added); | |
459 | } | |
460 | template <class Hasher> | |
461 | double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) { | |
462 | // Technically, there could be overlap among the added, but ignoring that | |
463 | // is typically close enough. | |
464 | return added / std::pow(256.0, sizeof(typename Hasher::Hash)); | |
465 | } | |
466 | ||
467 | // StandardBanding: a canonical implementation of BandingStorage and | |
468 | // BacktrackStorage, with convenience API for banding (solving with on-the-fly | |
469 | // Gaussian elimination) with and without backtracking. | |
470 | template <class TypesAndSettings> | |
471 | class StandardBanding : public StandardHasher<TypesAndSettings> { | |
472 | public: | |
473 | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); | |
474 | ||
475 | StandardBanding(Index num_slots = 0, Index backtrack_size = 0) { | |
476 | Reset(num_slots, backtrack_size); | |
477 | } | |
478 | ||
479 | void Reset(Index num_slots, Index backtrack_size = 0) { | |
480 | if (num_slots == 0) { | |
481 | // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized" | |
482 | num_starts_ = 0; | |
483 | } else { | |
484 | // Normal | |
485 | assert(num_slots >= kCoeffBits); | |
486 | if (num_slots > num_slots_allocated_) { | |
487 | coeff_rows_.reset(new CoeffRow[num_slots]()); | |
1e59de90 TL |
488 | if (!TypesAndSettings::kHomogeneous) { |
489 | // Note: don't strictly have to zero-init result_rows, | |
490 | // except possible information leakage, etc ;) | |
491 | result_rows_.reset(new ResultRow[num_slots]()); | |
492 | } | |
20effc67 TL |
493 | num_slots_allocated_ = num_slots; |
494 | } else { | |
495 | for (Index i = 0; i < num_slots; ++i) { | |
496 | coeff_rows_[i] = 0; | |
1e59de90 TL |
497 | if (!TypesAndSettings::kHomogeneous) { |
498 | // Note: don't strictly have to zero-init result_rows, | |
499 | // except possible information leakage, etc ;) | |
500 | result_rows_[i] = 0; | |
501 | } | |
20effc67 TL |
502 | } |
503 | } | |
504 | num_starts_ = num_slots - kCoeffBits + 1; | |
505 | } | |
506 | EnsureBacktrackSize(backtrack_size); | |
507 | } | |
508 | ||
509 | void EnsureBacktrackSize(Index backtrack_size) { | |
510 | if (backtrack_size > backtrack_size_) { | |
511 | backtrack_.reset(new Index[backtrack_size]); | |
512 | backtrack_size_ = backtrack_size; | |
513 | } | |
514 | } | |
515 | ||
516 | // ******************************************************************** | |
517 | // From concept BandingStorage | |
518 | ||
519 | inline bool UsePrefetch() const { | |
520 | // A rough guesstimate of when prefetching during construction pays off. | |
521 | // TODO: verify/validate | |
522 | return num_starts_ > 1500; | |
523 | } | |
524 | inline void Prefetch(Index i) const { | |
525 | PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */); | |
1e59de90 TL |
526 | if (!TypesAndSettings::kHomogeneous) { |
527 | PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */); | |
528 | } | |
529 | } | |
530 | inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr, | |
531 | bool for_back_subst) const { | |
532 | *cr = coeff_rows_[i]; | |
533 | if (TypesAndSettings::kHomogeneous) { | |
534 | if (for_back_subst && *cr == 0) { | |
535 | // Cheap pseudorandom data to fill unconstrained solution rows | |
536 | *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL); | |
537 | } else { | |
538 | *rr = 0; | |
539 | } | |
540 | } else { | |
541 | *rr = result_rows_[i]; | |
542 | } | |
543 | } | |
544 | inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) { | |
545 | coeff_rows_[i] = cr; | |
546 | if (TypesAndSettings::kHomogeneous) { | |
547 | assert(rr == 0); | |
548 | } else { | |
549 | result_rows_[i] = rr; | |
550 | } | |
20effc67 | 551 | } |
20effc67 TL |
552 | inline Index GetNumStarts() const { return num_starts_; } |
553 | ||
554 | // from concept BacktrackStorage, for when backtracking is used | |
555 | inline bool UseBacktrack() const { return true; } | |
556 | inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; } | |
557 | inline Index BacktrackGet(Index i) const { return backtrack_[i]; } | |
558 | ||
559 | // ******************************************************************** | |
560 | // Some useful API, still somewhat low level. Here an input is | |
561 | // a Key for filters, or std::pair<Key, ResultRow> for general PHSF. | |
562 | ||
563 | // Adds a range of inputs to the banding, returning true if successful. | |
564 | // False means none or some may have been successfully added, so it's | |
565 | // best to Reset this banding before any further use. | |
566 | // | |
567 | // Adding can fail even before all the "slots" are completely "full". | |
568 | // | |
569 | template <typename InputIterator> | |
570 | bool AddRange(InputIterator begin, InputIterator end) { | |
571 | assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); | |
572 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
573 | // Unusual. Can't add any in this case. | |
574 | return begin == end; | |
575 | } | |
576 | // Normal | |
577 | return BandingAddRange(this, *this, begin, end); | |
578 | } | |
579 | ||
580 | // Adds a range of inputs to the banding, returning true if successful, | |
581 | // or if unsuccessful, rolls back to state before this call and returns | |
582 | // false. Caller guarantees that the number of inputs in this batch | |
583 | // does not exceed `backtrack_size` provided to Reset. | |
584 | // | |
585 | // Adding can fail even before all the "slots" are completely "full". | |
586 | // | |
587 | template <typename InputIterator> | |
588 | bool AddRangeOrRollBack(InputIterator begin, InputIterator end) { | |
589 | assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); | |
590 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
591 | // Unusual. Can't add any in this case. | |
592 | return begin == end; | |
593 | } | |
594 | // else Normal | |
595 | return BandingAddRange(this, this, *this, begin, end); | |
596 | } | |
597 | ||
598 | // Adds a single input to the banding, returning true if successful. | |
599 | // If unsuccessful, returns false and banding state is unchanged. | |
600 | // | |
601 | // Adding can fail even before all the "slots" are completely "full". | |
602 | // | |
603 | bool Add(const AddInput& input) { | |
604 | // Pointer can act as iterator | |
605 | return AddRange(&input, &input + 1); | |
606 | } | |
607 | ||
608 | // Return the number of "occupied" rows (with non-zero coefficients stored). | |
609 | Index GetOccupiedCount() const { | |
610 | Index count = 0; | |
611 | if (num_starts_ > 0) { | |
612 | const Index num_slots = num_starts_ + kCoeffBits - 1; | |
613 | for (Index i = 0; i < num_slots; ++i) { | |
614 | if (coeff_rows_[i] != 0) { | |
615 | ++count; | |
616 | } | |
617 | } | |
618 | } | |
619 | return count; | |
620 | } | |
621 | ||
1e59de90 TL |
622 | // Returns whether a row is "occupied" in the banding (non-zero |
623 | // coefficients stored). (Only recommended for debug/test) | |
624 | bool IsOccupied(Index i) { return coeff_rows_[i] != 0; } | |
625 | ||
20effc67 TL |
626 | // ******************************************************************** |
627 | // High-level API | |
628 | ||
629 | // Iteratively (a) resets the structure for `num_slots`, (b) attempts | |
630 | // to add the range of inputs, and (c) if unsuccessful, chooses next | |
631 | // hash seed, until either successful or unsuccessful with all the | |
632 | // allowed seeds. Returns true if successful. In that case, use | |
633 | // GetOrdinalSeed() or GetRawSeed() to get the successful seed. | |
634 | // | |
635 | // The allowed sequence of hash seeds is determined by | |
636 | // `starting_ordinal_seed,` the first ordinal seed to be attempted | |
637 | // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of | |
638 | // two minus one) for the range of ordinal seeds to consider. The | |
639 | // max number of seeds considered will be ordinal_seed_mask + 1. | |
640 | // For filters we suggest `starting_ordinal_seed` be chosen randomly | |
641 | // or round-robin, to minimize false positive correlations between keys. | |
642 | // | |
643 | // If unsuccessful, how best to continue is going to be application | |
644 | // specific. It should be possible to choose parameters such that | |
645 | // failure is extremely unlikely, using max_seed around 32 to 64. | |
646 | // (TODO: APIs to help choose parameters) One option for fallback in | |
647 | // constructing a filter is to construct a Bloom filter instead. | |
648 | // Increasing num_slots is an option, but should not be used often | |
649 | // unless construction maximum latency is a concern (rather than | |
650 | // average running time of construction). Instead, choose parameters | |
651 | // appropriately and trust that seeds are independent. (Also, | |
652 | // increasing num_slots without changing hash seed would have a | |
653 | // significant correlation in success, rather than independence.) | |
654 | template <typename InputIterator> | |
655 | bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin, | |
656 | InputIterator end, | |
657 | Seed starting_ordinal_seed = 0U, | |
658 | Seed ordinal_seed_mask = 63U) { | |
659 | // power of 2 minus 1 | |
660 | assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0); | |
661 | // starting seed is within mask | |
662 | assert((starting_ordinal_seed & ordinal_seed_mask) == | |
663 | starting_ordinal_seed); | |
664 | starting_ordinal_seed &= ordinal_seed_mask; // if not debug | |
665 | ||
666 | Seed cur_ordinal_seed = starting_ordinal_seed; | |
667 | do { | |
668 | StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed); | |
669 | Reset(num_slots); | |
670 | bool success = AddRange(begin, end); | |
671 | if (success) { | |
672 | return true; | |
673 | } | |
674 | cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask; | |
675 | } while (cur_ordinal_seed != starting_ordinal_seed); | |
676 | // Reached limit by circling around | |
677 | return false; | |
678 | } | |
679 | ||
1e59de90 TL |
680 | static std::size_t EstimateMemoryUsage(uint32_t num_slots) { |
681 | std::size_t bytes_coeff_rows = num_slots * sizeof(CoeffRow); | |
682 | std::size_t bytes_result_rows = num_slots * sizeof(ResultRow); | |
683 | std::size_t bytes_backtrack = 0; | |
684 | std::size_t bytes_banding = | |
685 | bytes_coeff_rows + bytes_result_rows + bytes_backtrack; | |
20effc67 | 686 | |
1e59de90 | 687 | return bytes_banding; |
20effc67 TL |
688 | } |
689 | ||
690 | protected: | |
691 | // TODO: explore combining in a struct | |
692 | std::unique_ptr<CoeffRow[]> coeff_rows_; | |
693 | std::unique_ptr<ResultRow[]> result_rows_; | |
694 | // We generally store "starts" instead of slots for speed of GetStart(), | |
695 | // as in StandardHasher. | |
696 | Index num_starts_ = 0; | |
697 | Index num_slots_allocated_ = 0; | |
698 | std::unique_ptr<Index[]> backtrack_; | |
699 | Index backtrack_size_ = 0; | |
700 | }; | |
701 | ||
702 | // Implements concept SimpleSolutionStorage, mostly for demonstration | |
703 | // purposes. This is "in memory" only because it does not handle byte | |
704 | // ordering issues for serialization. | |
705 | template <class TypesAndSettings> | |
706 | class InMemSimpleSolution { | |
707 | public: | |
708 | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); | |
709 | ||
710 | void PrepareForNumStarts(Index num_starts) { | |
711 | if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) { | |
712 | // Unusual | |
713 | num_starts_ = 0; | |
714 | } else { | |
715 | // Normal | |
716 | const Index num_slots = num_starts + kCoeffBits - 1; | |
717 | assert(num_slots >= kCoeffBits); | |
718 | if (num_slots > num_slots_allocated_) { | |
719 | // Do not need to init the memory | |
720 | solution_rows_.reset(new ResultRow[num_slots]); | |
721 | num_slots_allocated_ = num_slots; | |
722 | } | |
723 | num_starts_ = num_starts; | |
724 | } | |
725 | } | |
726 | ||
727 | Index GetNumStarts() const { return num_starts_; } | |
728 | ||
729 | ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; } | |
730 | ||
731 | void Store(Index slot_num, ResultRow solution_row) { | |
732 | solution_rows_[slot_num] = solution_row; | |
733 | } | |
734 | ||
735 | // ******************************************************************** | |
736 | // High-level API | |
737 | ||
738 | template <typename BandingStorage> | |
739 | void BackSubstFrom(const BandingStorage& bs) { | |
740 | if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { | |
741 | // Unusual | |
742 | PrepareForNumStarts(0); | |
743 | } else { | |
744 | // Normal | |
745 | SimpleBackSubst(this, bs); | |
746 | } | |
747 | } | |
748 | ||
749 | template <typename PhsfQueryHasher> | |
1e59de90 TL |
750 | ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { |
751 | // assert(!TypesAndSettings::kIsFilter); Can be useful in testing | |
20effc67 TL |
752 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
753 | // Unusual | |
754 | return 0; | |
755 | } else { | |
756 | // Normal | |
757 | return SimplePhsfQuery(input, hasher, *this); | |
758 | } | |
759 | } | |
760 | ||
761 | template <typename FilterQueryHasher> | |
1e59de90 | 762 | bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { |
20effc67 TL |
763 | assert(TypesAndSettings::kIsFilter); |
764 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
765 | // Unusual. Zero starts presumes no keys added -> always false | |
766 | return false; | |
767 | } else { | |
768 | // Normal, or upper_num_columns_ == 0 means "no space for data" and | |
769 | // thus will always return true. | |
770 | return SimpleFilterQuery(input, hasher, *this); | |
771 | } | |
772 | } | |
773 | ||
1e59de90 | 774 | double ExpectedFpRate() const { |
20effc67 TL |
775 | assert(TypesAndSettings::kIsFilter); |
776 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
777 | // Unusual, but we don't have FPs if we always return false. | |
778 | return 0.0; | |
779 | } | |
780 | // else Normal | |
781 | ||
782 | // Each result (solution) bit (column) cuts FP rate in half | |
783 | return std::pow(0.5, 8U * sizeof(ResultRow)); | |
784 | } | |
785 | ||
1e59de90 TL |
786 | // ******************************************************************** |
787 | // Static high-level API | |
788 | ||
789 | // Round up to a number of slots supported by this structure. Note that | |
790 | // this needs to be must be taken into account for the banding if this | |
791 | // solution layout/storage is to be used. | |
792 | static Index RoundUpNumSlots(Index num_slots) { | |
793 | // Must be at least kCoeffBits for at least one start | |
794 | // Or if not smash, even more because hashing not equipped | |
795 | // for stacking up so many entries on a single start location | |
796 | auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2); | |
797 | return std::max(num_slots, static_cast<Index>(min_slots)); | |
798 | } | |
799 | ||
20effc67 TL |
800 | protected: |
801 | // We generally store "starts" instead of slots for speed of GetStart(), | |
802 | // as in StandardHasher. | |
803 | Index num_starts_ = 0; | |
804 | Index num_slots_allocated_ = 0; | |
805 | std::unique_ptr<ResultRow[]> solution_rows_; | |
806 | }; | |
807 | ||
808 | // Implements concept InterleavedSolutionStorage always using little-endian | |
809 | // byte order, so easy for serialization/deserialization. This implementation | |
810 | // fully supports fractional bits per key, where any number of segments | |
811 | // (number of bytes multiple of sizeof(CoeffRow)) can be used with any number | |
812 | // of slots that is a multiple of kCoeffBits. | |
813 | // | |
814 | // The structure is passed an externally allocated/de-allocated byte buffer | |
815 | // that is optionally pre-populated (from storage) for answering queries, | |
816 | // or can be populated by BackSubstFrom. | |
817 | // | |
818 | template <class TypesAndSettings> | |
819 | class SerializableInterleavedSolution { | |
820 | public: | |
821 | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); | |
822 | ||
823 | // Does not take ownership of `data` but uses it (up to `data_len` bytes) | |
824 | // throughout lifetime | |
825 | SerializableInterleavedSolution(char* data, size_t data_len) | |
826 | : data_(data), data_len_(data_len) {} | |
827 | ||
828 | void PrepareForNumStarts(Index num_starts) { | |
829 | assert(num_starts == 0 || (num_starts % kCoeffBits == 1)); | |
830 | num_starts_ = num_starts; | |
831 | ||
832 | InternalConfigure(); | |
833 | } | |
834 | ||
835 | Index GetNumStarts() const { return num_starts_; } | |
836 | ||
837 | Index GetNumBlocks() const { | |
838 | const Index num_slots = num_starts_ + kCoeffBits - 1; | |
839 | return num_slots / kCoeffBits; | |
840 | } | |
841 | ||
842 | Index GetUpperNumColumns() const { return upper_num_columns_; } | |
843 | ||
844 | Index GetUpperStartBlock() const { return upper_start_block_; } | |
845 | ||
846 | Index GetNumSegments() const { | |
847 | return static_cast<Index>(data_len_ / sizeof(CoeffRow)); | |
848 | } | |
849 | ||
850 | CoeffRow LoadSegment(Index segment_num) const { | |
851 | assert(data_ != nullptr); // suppress clang analyzer report | |
852 | return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow)); | |
853 | } | |
854 | void StoreSegment(Index segment_num, CoeffRow val) { | |
855 | assert(data_ != nullptr); // suppress clang analyzer report | |
856 | EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val); | |
857 | } | |
1e59de90 TL |
858 | void PrefetchSegmentRange(Index begin_segment_num, |
859 | Index end_segment_num) const { | |
860 | if (end_segment_num == begin_segment_num) { | |
861 | // Nothing to do | |
862 | return; | |
863 | } | |
864 | char* cur = data_ + begin_segment_num * sizeof(CoeffRow); | |
865 | char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow); | |
866 | while (cur < last) { | |
867 | PREFETCH(cur, 0 /* rw */, 1 /* locality */); | |
868 | cur += CACHE_LINE_SIZE; | |
869 | } | |
870 | PREFETCH(last, 0 /* rw */, 1 /* locality */); | |
871 | } | |
20effc67 TL |
872 | |
873 | // ******************************************************************** | |
874 | // High-level API | |
875 | ||
876 | void ConfigureForNumBlocks(Index num_blocks) { | |
877 | if (num_blocks == 0) { | |
878 | PrepareForNumStarts(0); | |
879 | } else { | |
880 | PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1); | |
881 | } | |
882 | } | |
883 | ||
884 | void ConfigureForNumSlots(Index num_slots) { | |
885 | assert(num_slots % kCoeffBits == 0); | |
886 | ConfigureForNumBlocks(num_slots / kCoeffBits); | |
887 | } | |
888 | ||
889 | template <typename BandingStorage> | |
890 | void BackSubstFrom(const BandingStorage& bs) { | |
891 | if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { | |
892 | // Unusual | |
893 | PrepareForNumStarts(0); | |
894 | } else { | |
895 | // Normal | |
896 | InterleavedBackSubst(this, bs); | |
897 | } | |
898 | } | |
899 | ||
900 | template <typename PhsfQueryHasher> | |
1e59de90 TL |
901 | ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { |
902 | // assert(!TypesAndSettings::kIsFilter); Can be useful in testing | |
20effc67 TL |
903 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
904 | // Unusual | |
905 | return 0; | |
906 | } else { | |
907 | // Normal | |
1e59de90 TL |
908 | // NOTE: not using a struct to encourage compiler optimization |
909 | Hash hash; | |
910 | Index segment_num; | |
911 | Index num_columns; | |
912 | Index start_bit; | |
913 | InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num, | |
914 | &num_columns, &start_bit); | |
915 | return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit, | |
916 | hasher, *this); | |
20effc67 TL |
917 | } |
918 | } | |
919 | ||
920 | template <typename FilterQueryHasher> | |
1e59de90 | 921 | bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { |
20effc67 TL |
922 | assert(TypesAndSettings::kIsFilter); |
923 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
924 | // Unusual. Zero starts presumes no keys added -> always false | |
925 | return false; | |
926 | } else { | |
927 | // Normal, or upper_num_columns_ == 0 means "no space for data" and | |
928 | // thus will always return true. | |
1e59de90 TL |
929 | // NOTE: not using a struct to encourage compiler optimization |
930 | Hash hash; | |
931 | Index segment_num; | |
932 | Index num_columns; | |
933 | Index start_bit; | |
934 | InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num, | |
935 | &num_columns, &start_bit); | |
936 | return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit, | |
937 | hasher, *this); | |
20effc67 TL |
938 | } |
939 | } | |
940 | ||
1e59de90 | 941 | double ExpectedFpRate() const { |
20effc67 TL |
942 | assert(TypesAndSettings::kIsFilter); |
943 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { | |
944 | // Unusual. Zero starts presumes no keys added -> always false | |
945 | return 0.0; | |
946 | } | |
947 | // else Normal | |
948 | ||
949 | // Note: Ignoring smash setting; still close enough in that case | |
950 | double lower_portion = | |
951 | (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_; | |
952 | ||
953 | // Each result (solution) bit (column) cuts FP rate in half. Weight that | |
954 | // for upper and lower number of bits (columns). | |
955 | return lower_portion * std::pow(0.5, upper_num_columns_ - 1) + | |
956 | (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_); | |
957 | } | |
958 | ||
959 | // ******************************************************************** | |
960 | // Static high-level API | |
961 | ||
962 | // Round up to a number of slots supported by this structure. Note that | |
963 | // this needs to be must be taken into account for the banding if this | |
964 | // solution layout/storage is to be used. | |
965 | static Index RoundUpNumSlots(Index num_slots) { | |
966 | // Must be multiple of kCoeffBits | |
967 | Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits; | |
968 | ||
969 | // Do not use num_starts==1 unless kUseSmash, because the hashing | |
970 | // might not be equipped for stacking up so many entries on a | |
971 | // single start location. | |
972 | if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) { | |
973 | corrected += kCoeffBits; | |
974 | } | |
975 | return corrected; | |
976 | } | |
977 | ||
1e59de90 TL |
978 | // Round down to a number of slots supported by this structure. Note that |
979 | // this needs to be must be taken into account for the banding if this | |
980 | // solution layout/storage is to be used. | |
981 | static Index RoundDownNumSlots(Index num_slots) { | |
982 | // Must be multiple of kCoeffBits | |
983 | Index corrected = num_slots / kCoeffBits * kCoeffBits; | |
984 | ||
985 | // Do not use num_starts==1 unless kUseSmash, because the hashing | |
986 | // might not be equipped for stacking up so many entries on a | |
987 | // single start location. | |
988 | if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) { | |
989 | corrected = 0; | |
990 | } | |
991 | return corrected; | |
992 | } | |
993 | ||
20effc67 TL |
994 | // Compute the number of bytes for a given number of slots and desired |
995 | // FP rate. Since desired FP rate might not be exactly achievable, | |
996 | // rounding_bias32==0 means to always round toward lower FP rate | |
997 | // than desired (more bytes); rounding_bias32==max uint32_t means always | |
998 | // round toward higher FP rate than desired (fewer bytes); other values | |
999 | // act as a proportional threshold or bias between the two. | |
1000 | static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate, | |
1001 | uint32_t rounding_bias32) { | |
1002 | return InternalGetBytesForFpRate(num_slots, desired_fp_rate, | |
1003 | 1.0 / desired_fp_rate, rounding_bias32); | |
1004 | } | |
1005 | ||
1006 | // The same, but specifying desired accuracy as 1.0 / FP rate, or | |
1007 | // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate. | |
1008 | static size_t GetBytesForOneInFpRate(Index num_slots, | |
1009 | double desired_one_in_fp_rate, | |
1010 | uint32_t rounding_bias32) { | |
1011 | return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate, | |
1012 | desired_one_in_fp_rate, rounding_bias32); | |
1013 | } | |
1014 | ||
1015 | protected: | |
1016 | static size_t InternalGetBytesForFpRate(Index num_slots, | |
1017 | double desired_fp_rate, | |
1018 | double desired_one_in_fp_rate, | |
1019 | uint32_t rounding_bias32) { | |
1020 | assert(TypesAndSettings::kIsFilter); | |
1e59de90 TL |
1021 | if (TypesAndSettings::kAllowZeroStarts) { |
1022 | if (num_slots == 0) { | |
1023 | // Unusual. Zero starts presumes no keys added -> always false (no FPs) | |
1024 | return 0U; | |
1025 | } | |
1026 | } else { | |
1027 | assert(num_slots > 0); | |
20effc67 TL |
1028 | } |
1029 | // Must be rounded up already. | |
1030 | assert(RoundUpNumSlots(num_slots) == num_slots); | |
1031 | ||
1032 | if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) { | |
1033 | // Typical: less than 100% FP rate | |
1034 | if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) { | |
1035 | // Typical: Less than maximum result row entropy | |
1036 | ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate); | |
1037 | int lower_columns = FloorLog2(rounded); | |
1038 | double lower_columns_fp_rate = std::pow(2.0, -lower_columns); | |
1039 | double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1)); | |
1040 | // Floating point don't let me down! | |
1041 | assert(lower_columns_fp_rate >= desired_fp_rate); | |
1042 | assert(upper_columns_fp_rate <= desired_fp_rate); | |
1043 | ||
1044 | double lower_portion = (desired_fp_rate - upper_columns_fp_rate) / | |
1045 | (lower_columns_fp_rate - upper_columns_fp_rate); | |
1046 | // Floating point don't let me down! | |
1047 | assert(lower_portion >= 0.0); | |
1048 | assert(lower_portion <= 1.0); | |
1049 | ||
1050 | double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000}; | |
1051 | assert(rounding_bias > 0.0); | |
1052 | assert(rounding_bias < 1.0); | |
1053 | ||
1054 | // Note: Ignoring smash setting; still close enough in that case | |
1055 | Index num_starts = num_slots - kCoeffBits + 1; | |
1056 | // Lower upper_start_block means lower FP rate (higher accuracy) | |
1057 | Index upper_start_block = static_cast<Index>( | |
1058 | (lower_portion * num_starts + rounding_bias) / kCoeffBits); | |
1059 | Index num_blocks = num_slots / kCoeffBits; | |
1060 | assert(upper_start_block < num_blocks); | |
1061 | ||
1062 | // Start by assuming all blocks use lower number of columns | |
1063 | Index num_segments = num_blocks * static_cast<Index>(lower_columns); | |
1064 | // Correct by 1 each for blocks using upper number of columns | |
1065 | num_segments += (num_blocks - upper_start_block); | |
1066 | // Total bytes | |
1067 | return num_segments * sizeof(CoeffRow); | |
1068 | } else { | |
1069 | // one_in_fp_rate too big, thus requested FP rate is smaller than | |
1070 | // supported. Use max number of columns for minimum supported FP rate. | |
1071 | return num_slots * sizeof(ResultRow); | |
1072 | } | |
1073 | } else { | |
1074 | // Effectively asking for 100% FP rate, or NaN etc. | |
1075 | if (TypesAndSettings::kAllowZeroStarts) { | |
1076 | // Zero segments | |
1077 | return 0U; | |
1078 | } else { | |
1079 | // One segment (minimum size, maximizing FP rate) | |
1080 | return sizeof(CoeffRow); | |
1081 | } | |
1082 | } | |
1083 | } | |
1084 | ||
1085 | void InternalConfigure() { | |
1086 | const Index num_blocks = GetNumBlocks(); | |
1087 | Index num_segments = GetNumSegments(); | |
1088 | ||
1089 | if (num_blocks == 0) { | |
1090 | // Exceptional | |
1091 | upper_num_columns_ = 0; | |
1092 | upper_start_block_ = 0; | |
1093 | } else { | |
1094 | // Normal | |
1095 | upper_num_columns_ = | |
1096 | (num_segments + /*round up*/ num_blocks - 1) / num_blocks; | |
1097 | upper_start_block_ = upper_num_columns_ * num_blocks - num_segments; | |
1098 | // Unless that's more columns than supported by ResultRow data type | |
1099 | if (upper_num_columns_ > 8U * sizeof(ResultRow)) { | |
1100 | // Use maximum columns (there will be space unused) | |
1101 | upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow)); | |
1102 | upper_start_block_ = 0; | |
1103 | num_segments = num_blocks * upper_num_columns_; | |
1104 | } | |
1105 | } | |
1106 | // Update data_len_ for correct rounding and/or unused space | |
1107 | // NOTE: unused space stays gone if we PrepareForNumStarts again. | |
1108 | // We are prioritizing minimizing the number of fields over making | |
1109 | // the "unusued space" feature work well. | |
1110 | data_len_ = num_segments * sizeof(CoeffRow); | |
1111 | } | |
1112 | ||
1113 | char* const data_; | |
1114 | size_t data_len_; | |
1115 | Index num_starts_ = 0; | |
1116 | Index upper_num_columns_ = 0; | |
1117 | Index upper_start_block_ = 0; | |
1118 | }; | |
1119 | ||
1120 | } // namespace ribbon | |
1121 | ||
1122 | } // namespace ROCKSDB_NAMESPACE | |
1123 | ||
1124 | // For convenience working with templates | |
1125 | #define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings) \ | |
1126 | using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \ | |
1127 | using Banding = \ | |
1128 | ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \ | |
1129 | using SimpleSoln = \ | |
1130 | ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \ | |
1131 | using InterleavedSoln = \ | |
1132 | ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution< \ | |
1133 | TypesAndSettings>; \ | |
1134 | static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) + \ | |
1135 | sizeof(InterleavedSoln) > \ | |
1136 | 0, \ | |
1137 | "avoid unused warnings, semicolon expected after macro call") |