]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | ******************************************************************* | |
6 | * * | |
7 | * Open Bloom Filter * | |
8 | * * | |
9 | * Author: Arash Partow - 2000 * | |
10 | * URL: http://www.partow.net/programming/hashfunctions/index.html * | |
11 | * * | |
12 | * Copyright notice: * | |
13 | * Free use of the Open Bloom Filter Library is permitted under * | |
14 | * the guidelines and in accordance with the most current version * | |
15 | * of the Boost Software License, Version 1.0 * | |
16 | * http://www.opensource.org/licenses/bsl1.0.html * | |
17 | * * | |
18 | ******************************************************************* | |
19 | */ | |
20 | ||
21 | ||
22 | #ifndef COMMON_BLOOM_FILTER_HPP | |
23 | #define COMMON_BLOOM_FILTER_HPP | |
24 | ||
7c673cae | 25 | #include <cmath> |
7c673cae | 26 | |
7c673cae | 27 | #include "include/encoding.h" |
20effc67 | 28 | #include "include/mempool.h" |
7c673cae | 29 | |
20effc67 | 30 | static const unsigned char bit_mask[CHAR_BIT] = { |
7c673cae FG |
31 | 0x01, //00000001 |
32 | 0x02, //00000010 | |
33 | 0x04, //00000100 | |
34 | 0x08, //00001000 | |
35 | 0x10, //00010000 | |
36 | 0x20, //00100000 | |
37 | 0x40, //01000000 | |
38 | 0x80 //10000000 | |
39 | }; | |
40 | ||
7c673cae FG |
41 | class bloom_filter |
42 | { | |
43 | protected: | |
44 | ||
20effc67 TL |
45 | using bloom_type = unsigned int; |
46 | using cell_type = unsigned char; | |
47 | using table_type = mempool::bloom_filter::vector<cell_type>; | |
7c673cae | 48 | |
7c673cae | 49 | std::vector<bloom_type> salt_; ///< vector of salts |
20effc67 | 50 | table_type bit_table_; ///< bit map |
7c673cae FG |
51 | std::size_t salt_count_; ///< number of salts |
52 | std::size_t table_size_; ///< bit table size in bytes | |
53 | std::size_t insert_count_; ///< insertion count | |
54 | std::size_t target_element_count_; ///< target number of unique insertions | |
55 | std::size_t random_seed_; ///< random seed | |
56 | ||
57 | public: | |
58 | ||
59 | bloom_filter() | |
20effc67 | 60 | : salt_count_(0), |
7c673cae FG |
61 | table_size_(0), |
62 | insert_count_(0), | |
63 | target_element_count_(0), | |
64 | random_seed_(0) | |
65 | {} | |
66 | ||
67 | bloom_filter(const std::size_t& predicted_inserted_element_count, | |
68 | const double& false_positive_probability, | |
69 | const std::size_t& random_seed) | |
20effc67 | 70 | : insert_count_(0), |
7c673cae FG |
71 | target_element_count_(predicted_inserted_element_count), |
72 | random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) | |
73 | { | |
11fdf7f2 | 74 | ceph_assert(false_positive_probability > 0.0); |
20effc67 TL |
75 | std::tie(salt_count_, table_size_) = |
76 | find_optimal_parameters(predicted_inserted_element_count, | |
77 | false_positive_probability); | |
7c673cae FG |
78 | init(); |
79 | } | |
80 | ||
81 | bloom_filter(const std::size_t& salt_count, | |
82 | std::size_t table_size, | |
83 | const std::size_t& random_seed, | |
84 | std::size_t target_element_count) | |
20effc67 | 85 | : salt_count_(salt_count), |
7c673cae FG |
86 | table_size_(table_size), |
87 | insert_count_(0), | |
88 | target_element_count_(target_element_count), | |
89 | random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) | |
90 | { | |
91 | init(); | |
92 | } | |
93 | ||
94 | void init() { | |
95 | generate_unique_salt(); | |
20effc67 | 96 | bit_table_.resize(table_size_, static_cast<unsigned char>(0x00)); |
7c673cae FG |
97 | } |
98 | ||
99 | bloom_filter(const bloom_filter& filter) | |
7c673cae FG |
100 | { |
101 | this->operator=(filter); | |
102 | } | |
103 | ||
104 | bloom_filter& operator = (const bloom_filter& filter) | |
105 | { | |
106 | if (this != &filter) { | |
7c673cae FG |
107 | salt_count_ = filter.salt_count_; |
108 | table_size_ = filter.table_size_; | |
109 | insert_count_ = filter.insert_count_; | |
110 | target_element_count_ = filter.target_element_count_; | |
111 | random_seed_ = filter.random_seed_; | |
20effc67 | 112 | bit_table_ = filter.bit_table_; |
7c673cae FG |
113 | salt_ = filter.salt_; |
114 | } | |
115 | return *this; | |
116 | } | |
117 | ||
20effc67 | 118 | virtual ~bloom_filter() = default; |
7c673cae FG |
119 | |
120 | inline bool operator!() const | |
121 | { | |
122 | return (0 == table_size_); | |
123 | } | |
124 | ||
125 | inline void clear() | |
126 | { | |
20effc67 TL |
127 | std::fill(bit_table_.begin(), bit_table_.end(), |
128 | static_cast<unsigned char>(0x00)); | |
7c673cae FG |
129 | insert_count_ = 0; |
130 | } | |
131 | ||
132 | /** | |
133 | * insert a u32 into the set | |
134 | * | |
135 | * NOTE: the internal hash is weak enough that consecutive inputs do | |
136 | * not achieve the desired fpp. Well-mixed values should be used | |
137 | * here (e.g., put rjhash(x) into the filter instead of just x). | |
138 | * | |
139 | * @param val integer value to insert | |
140 | */ | |
141 | inline void insert(uint32_t val) { | |
20effc67 TL |
142 | for (auto salt : salt_) { |
143 | auto [bit_index, bit] = compute_indices(hash_ap(val, salt)); | |
7c673cae FG |
144 | bit_table_[bit_index >> 3] |= bit_mask[bit]; |
145 | } | |
146 | ++insert_count_; | |
147 | } | |
148 | ||
149 | inline void insert(const unsigned char* key_begin, const std::size_t& length) | |
150 | { | |
20effc67 TL |
151 | for (auto salt : salt_) { |
152 | auto [bit_index, bit] = compute_indices(hash_ap(key_begin, length, salt)); | |
7c673cae FG |
153 | bit_table_[bit_index >> 3] |= bit_mask[bit]; |
154 | } | |
155 | ++insert_count_; | |
156 | } | |
157 | ||
7c673cae FG |
158 | inline void insert(const std::string& key) |
159 | { | |
160 | insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); | |
161 | } | |
162 | ||
163 | inline void insert(const char* data, const std::size_t& length) | |
164 | { | |
165 | insert(reinterpret_cast<const unsigned char*>(data),length); | |
166 | } | |
167 | ||
168 | template<typename InputIterator> | |
169 | inline void insert(const InputIterator begin, const InputIterator end) | |
170 | { | |
171 | InputIterator itr = begin; | |
172 | while (end != itr) | |
173 | { | |
174 | insert(*(itr++)); | |
175 | } | |
176 | } | |
177 | ||
178 | /** | |
179 | * check if a u32 is contained by set | |
180 | * | |
181 | * NOTE: the internal hash is weak enough that consecutive inputs do | |
182 | * not achieve the desired fpp. Well-mixed values should be used | |
183 | * here (e.g., put rjhash(x) into the filter instead of just x). | |
184 | * | |
185 | * @param val integer value to query | |
186 | * @returns true if value is (probably) in the set, false if it definitely is not | |
187 | */ | |
188 | inline virtual bool contains(uint32_t val) const | |
189 | { | |
20effc67 | 190 | if (table_size_ == 0) { |
7c673cae | 191 | return false; |
20effc67 TL |
192 | } |
193 | for (auto salt : salt_) { | |
194 | auto [bit_index, bit] = compute_indices(hash_ap(val, salt)); | |
195 | if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { | |
7c673cae FG |
196 | return false; |
197 | } | |
198 | } | |
199 | return true; | |
200 | } | |
201 | ||
202 | inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const | |
203 | { | |
20effc67 | 204 | if (table_size_ == 0) { |
7c673cae | 205 | return false; |
20effc67 TL |
206 | } |
207 | for (auto salt : salt_) { | |
208 | auto [bit_index, bit] = compute_indices(hash_ap(key_begin, length, salt)); | |
209 | if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { | |
7c673cae FG |
210 | return false; |
211 | } | |
212 | } | |
213 | return true; | |
214 | } | |
215 | ||
7c673cae FG |
216 | inline bool contains(const std::string& key) const |
217 | { | |
218 | return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); | |
219 | } | |
220 | ||
221 | inline bool contains(const char* data, const std::size_t& length) const | |
222 | { | |
223 | return contains(reinterpret_cast<const unsigned char*>(data),length); | |
224 | } | |
225 | ||
226 | template<typename InputIterator> | |
227 | inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const | |
228 | { | |
229 | InputIterator itr = begin; | |
230 | while (end != itr) | |
231 | { | |
232 | if (!contains(*itr)) | |
233 | { | |
234 | return itr; | |
235 | } | |
236 | ++itr; | |
237 | } | |
238 | return end; | |
239 | } | |
240 | ||
241 | template<typename InputIterator> | |
242 | inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const | |
243 | { | |
244 | InputIterator itr = begin; | |
245 | while (end != itr) | |
246 | { | |
247 | if (contains(*itr)) | |
248 | { | |
249 | return itr; | |
250 | } | |
251 | ++itr; | |
252 | } | |
253 | return end; | |
254 | } | |
255 | ||
256 | inline virtual std::size_t size() const | |
257 | { | |
20effc67 | 258 | return table_size_ * CHAR_BIT; |
7c673cae FG |
259 | } |
260 | ||
261 | inline std::size_t element_count() const | |
262 | { | |
263 | return insert_count_; | |
264 | } | |
265 | ||
266 | inline bool is_full() const | |
267 | { | |
268 | return insert_count_ >= target_element_count_; | |
269 | } | |
270 | ||
271 | /* | |
272 | * density of bits set. inconvenient units, but: | |
273 | * .3 = ~50% target insertions | |
274 | * .5 = 100% target insertions, "perfectly full" | |
275 | * .75 = 200% target insertions | |
276 | * 1.0 = all bits set... infinite insertions | |
277 | */ | |
20effc67 | 278 | double density() const; |
7c673cae FG |
279 | |
280 | virtual inline double approx_unique_element_count() const { | |
281 | // this is not a very good estimate; a better solution should have | |
282 | // some asymptotic behavior as density() approaches 1.0. | |
283 | return (double)target_element_count_ * 2.0 * density(); | |
284 | } | |
285 | ||
286 | inline double effective_fpp() const | |
287 | { | |
288 | /* | |
289 | Note: | |
290 | The effective false positive probability is calculated using the | |
291 | designated table size and hash function count in conjunction with | |
292 | the current number of inserted elements - not the user defined | |
293 | predicated/expected number of inserted elements. | |
294 | */ | |
295 | return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size()); | |
296 | } | |
297 | ||
298 | inline const cell_type* table() const | |
299 | { | |
20effc67 | 300 | return bit_table_.data(); |
7c673cae FG |
301 | } |
302 | ||
303 | protected: | |
304 | ||
20effc67 TL |
305 | virtual std::pair<size_t /* bit_index */, |
306 | size_t /* bit */> | |
307 | compute_indices(const bloom_type& hash) const | |
7c673cae | 308 | { |
20effc67 TL |
309 | size_t bit_index = hash % (table_size_ << 3); |
310 | size_t bit = bit_index & 7; | |
311 | return {bit_index, bit}; | |
7c673cae FG |
312 | } |
313 | ||
314 | void generate_unique_salt() | |
315 | { | |
316 | /* | |
317 | Note: | |
318 | A distinct hash function need not be implementation-wise | |
319 | distinct. In the current implementation "seeding" a common | |
320 | hash function with different values seems to be adequate. | |
321 | */ | |
322 | const unsigned int predef_salt_count = 128; | |
323 | static const bloom_type predef_salt[predef_salt_count] = { | |
324 | 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, | |
325 | 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B, | |
326 | 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66, | |
327 | 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA, | |
328 | 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99, | |
329 | 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33, | |
330 | 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5, | |
331 | 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000, | |
332 | 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F, | |
333 | 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63, | |
334 | 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7, | |
335 | 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492, | |
336 | 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A, | |
337 | 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B, | |
338 | 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, | |
339 | 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432, | |
340 | 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC, | |
341 | 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB, | |
342 | 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331, | |
343 | 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68, | |
344 | 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8, | |
345 | 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A, | |
346 | 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF, | |
347 | 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E, | |
348 | 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39, | |
349 | 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E, | |
350 | 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355, | |
351 | 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E, | |
352 | 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79, | |
353 | 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075, | |
354 | 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC, | |
355 | 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421 | |
356 | }; | |
357 | ||
358 | if (salt_count_ <= predef_salt_count) | |
359 | { | |
360 | std::copy(predef_salt, | |
361 | predef_salt + salt_count_, | |
362 | std::back_inserter(salt_)); | |
363 | for (unsigned int i = 0; i < salt_.size(); ++i) | |
364 | { | |
365 | /* | |
366 | Note: | |
367 | This is done to integrate the user defined random seed, | |
368 | so as to allow for the generation of unique bloom filter | |
369 | instances. | |
370 | */ | |
371 | salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + random_seed_; | |
372 | } | |
373 | } | |
374 | else | |
375 | { | |
376 | std::copy(predef_salt,predef_salt + predef_salt_count, | |
377 | std::back_inserter(salt_)); | |
378 | srand(static_cast<unsigned int>(random_seed_)); | |
379 | while (salt_.size() < salt_count_) | |
380 | { | |
381 | bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand()); | |
382 | if (0 == current_salt) | |
383 | continue; | |
384 | if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt)) | |
385 | { | |
386 | salt_.push_back(current_salt); | |
387 | } | |
388 | } | |
389 | } | |
390 | } | |
391 | ||
20effc67 TL |
392 | static std::pair<std::size_t /* salt_count */, |
393 | std::size_t /* table_size */> | |
394 | find_optimal_parameters(std::size_t target_insert_count, | |
395 | double target_fpp) | |
7c673cae FG |
396 | { |
397 | /* | |
398 | Note: | |
399 | The following will attempt to find the number of hash functions | |
400 | and minimum amount of storage bits required to construct a bloom | |
401 | filter consistent with the user defined false positive probability | |
402 | and estimated element insertion count. | |
403 | */ | |
404 | ||
405 | double min_m = std::numeric_limits<double>::infinity(); | |
406 | double min_k = 0.0; | |
407 | double curr_m = 0.0; | |
408 | double k = 1.0; | |
409 | while (k < 1000.0) | |
410 | { | |
411 | double numerator = (- k * target_insert_count); | |
412 | double denominator = std::log(1.0 - std::pow(target_fpp, 1.0 / k)); | |
413 | curr_m = numerator / denominator; | |
414 | ||
415 | if (curr_m < min_m) | |
416 | { | |
417 | min_m = curr_m; | |
418 | min_k = k; | |
419 | } | |
420 | k += 1.0; | |
421 | } | |
422 | ||
20effc67 | 423 | size_t salt_count = static_cast<std::size_t>(min_k); |
7c673cae | 424 | size_t t = static_cast<std::size_t>(min_m); |
20effc67 TL |
425 | t += (((t & 7) != 0) ? (CHAR_BIT - (t & 7)) : 0); |
426 | size_t table_size = t >> 3; | |
427 | return {salt_count, table_size}; | |
7c673cae FG |
428 | } |
429 | ||
430 | inline bloom_type hash_ap(uint32_t val, bloom_type hash) const | |
431 | { | |
432 | hash ^= (hash << 7) ^ ((val & 0xff000000) >> 24) * (hash >> 3); | |
433 | hash ^= (~((hash << 11) + (((val & 0xff0000) >> 16) ^ (hash >> 5)))); | |
434 | hash ^= (hash << 7) ^ ((val & 0xff00) >> 8) * (hash >> 3); | |
435 | hash ^= (~((hash << 11) + (((val & 0xff)) ^ (hash >> 5)))); | |
436 | return hash; | |
437 | } | |
438 | ||
439 | inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const | |
440 | { | |
441 | const unsigned char* itr = begin; | |
442 | ||
443 | while (remaining_length >= 4) | |
444 | { | |
445 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
446 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
447 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
448 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
449 | remaining_length -= 4; | |
450 | } | |
451 | ||
452 | while (remaining_length >= 2) | |
453 | { | |
454 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
455 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
456 | remaining_length -= 2; | |
457 | } | |
458 | ||
459 | if (remaining_length) | |
460 | { | |
461 | hash ^= (hash << 7) ^ (*itr) * (hash >> 3); | |
462 | } | |
463 | ||
464 | return hash; | |
465 | } | |
466 | ||
467 | public: | |
9f95a23c TL |
468 | void encode(ceph::buffer::list& bl) const; |
469 | void decode(ceph::buffer::list::const_iterator& bl); | |
470 | void dump(ceph::Formatter *f) const; | |
7c673cae FG |
471 | static void generate_test_instances(std::list<bloom_filter*>& ls); |
472 | }; | |
473 | WRITE_CLASS_ENCODER(bloom_filter) | |
474 | ||
475 | ||
476 | class compressible_bloom_filter : public bloom_filter | |
477 | { | |
478 | public: | |
479 | ||
480 | compressible_bloom_filter() : bloom_filter() {} | |
481 | ||
482 | compressible_bloom_filter(const std::size_t& predicted_element_count, | |
483 | const double& false_positive_probability, | |
484 | const std::size_t& random_seed) | |
485 | : bloom_filter(predicted_element_count, false_positive_probability, random_seed) | |
486 | { | |
487 | size_list.push_back(table_size_); | |
488 | } | |
489 | ||
490 | compressible_bloom_filter(const std::size_t& salt_count, | |
491 | std::size_t table_size, | |
492 | const std::size_t& random_seed, | |
493 | std::size_t target_count) | |
494 | : bloom_filter(salt_count, table_size, random_seed, target_count) | |
495 | { | |
496 | size_list.push_back(table_size_); | |
497 | } | |
498 | ||
499 | inline std::size_t size() const override | |
500 | { | |
20effc67 | 501 | return size_list.back() * CHAR_BIT; |
7c673cae FG |
502 | } |
503 | ||
504 | inline bool compress(const double& target_ratio) | |
505 | { | |
20effc67 | 506 | if (bit_table_.empty()) |
7c673cae FG |
507 | return false; |
508 | ||
509 | if ((0.0 >= target_ratio) || (target_ratio >= 1.0)) | |
510 | { | |
511 | return false; | |
512 | } | |
513 | ||
514 | std::size_t original_table_size = size_list.back(); | |
515 | std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio); | |
516 | ||
517 | if ((!new_table_size) || (new_table_size >= original_table_size)) | |
518 | { | |
519 | return false; | |
520 | } | |
521 | ||
20effc67 TL |
522 | table_type tmp(new_table_size); |
523 | std::copy(bit_table_.begin(), bit_table_.begin() + new_table_size, tmp.begin()); | |
524 | auto itr = bit_table_.begin() + new_table_size; | |
525 | auto end = bit_table_.begin() + original_table_size; | |
526 | auto itr_tmp = tmp.begin(); | |
527 | auto itr_end = tmp.begin() + new_table_size; | |
528 | while (end != itr) { | |
7c673cae | 529 | *(itr_tmp++) |= (*itr++); |
20effc67 TL |
530 | if (itr_tmp == itr_end) { |
531 | itr_tmp = tmp.begin(); | |
532 | } | |
7c673cae | 533 | } |
20effc67 | 534 | std::swap(bit_table_, tmp); |
7c673cae FG |
535 | size_list.push_back(new_table_size); |
536 | table_size_ = new_table_size; | |
537 | ||
538 | return true; | |
539 | } | |
540 | ||
541 | inline double approx_unique_element_count() const override { | |
542 | // this is not a very good estimate; a better solution should have | |
543 | // some asymptotic behavior as density() approaches 1.0. | |
544 | // | |
545 | // the compress() correction is also bad; it tends to under-estimate. | |
546 | return (double)target_element_count_ * 2.0 * density() * (double)size_list.back() / (double)size_list.front(); | |
547 | } | |
548 | ||
549 | private: | |
550 | ||
20effc67 TL |
551 | std::pair<size_t /* bit_index */, |
552 | size_t /* bit */> | |
553 | compute_indices(const bloom_type& hash) const final | |
7c673cae | 554 | { |
20effc67 TL |
555 | size_t bit_index = hash; |
556 | for (auto size : size_list) { | |
557 | bit_index %= size << 3; | |
7c673cae | 558 | } |
20effc67 TL |
559 | size_t bit = bit_index & 7; |
560 | return {bit_index, bit}; | |
7c673cae FG |
561 | } |
562 | ||
563 | std::vector<std::size_t> size_list; | |
564 | public: | |
9f95a23c TL |
565 | void encode(ceph::bufferlist& bl) const; |
566 | void decode(ceph::bufferlist::const_iterator& bl); | |
567 | void dump(ceph::Formatter *f) const; | |
7c673cae FG |
568 | static void generate_test_instances(std::list<compressible_bloom_filter*>& ls); |
569 | }; | |
570 | WRITE_CLASS_ENCODER(compressible_bloom_filter) | |
571 | ||
572 | #endif | |
573 | ||
574 | ||
575 | /* | |
576 | Note 1: | |
20effc67 | 577 | If it can be guaranteed that CHAR_BIT will be of the form 2^n then |
7c673cae FG |
578 | the following optimization can be used: |
579 | ||
20effc67 | 580 | bit_table_[bit_index >> n] |= bit_mask[bit_index & (CHAR_BIT - 1)]; |
7c673cae FG |
581 | |
582 | Note 2: | |
583 | For performance reasons where possible when allocating memory it should | |
584 | be aligned (aligned_alloc) according to the architecture being used. | |
585 | */ |