]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | ******************************************************************* | |
6 | * * | |
7 | * Open Bloom Filter * | |
8 | * * | |
9 | * Author: Arash Partow - 2000 * | |
10 | * URL: http://www.partow.net/programming/hashfunctions/index.html * | |
11 | * * | |
12 | * Copyright notice: * | |
13 | * Free use of the Open Bloom Filter Library is permitted under * | |
14 | * the guidelines and in accordance with the most current version * | |
15 | * of the Boost Software License, Version 1.0 * | |
16 | * http://www.opensource.org/licenses/bsl1.0.html * | |
17 | * * | |
18 | ******************************************************************* | |
19 | */ | |
20 | ||
21 | ||
22 | #ifndef COMMON_BLOOM_FILTER_HPP | |
23 | #define COMMON_BLOOM_FILTER_HPP | |
24 | ||
7c673cae | 25 | #include <cmath> |
7c673cae FG |
26 | |
27 | #include "include/mempool.h" | |
28 | #include "include/encoding.h" | |
29 | ||
30 | static const std::size_t bits_per_char = 0x08; // 8 bits in 1 char(unsigned) | |
31 | static const unsigned char bit_mask[bits_per_char] = { | |
32 | 0x01, //00000001 | |
33 | 0x02, //00000010 | |
34 | 0x04, //00000100 | |
35 | 0x08, //00001000 | |
36 | 0x10, //00010000 | |
37 | 0x20, //00100000 | |
38 | 0x40, //01000000 | |
39 | 0x80 //10000000 | |
40 | }; | |
41 | ||
42 | MEMPOOL_DECLARE_FACTORY(unsigned char, byte, bloom_filter); | |
43 | ||
44 | class bloom_filter | |
45 | { | |
46 | protected: | |
47 | ||
48 | typedef unsigned int bloom_type; | |
49 | typedef unsigned char cell_type; | |
50 | ||
51 | unsigned char* bit_table_; ///< pointer to bit map | |
52 | std::vector<bloom_type> salt_; ///< vector of salts | |
53 | std::size_t salt_count_; ///< number of salts | |
54 | std::size_t table_size_; ///< bit table size in bytes | |
55 | std::size_t insert_count_; ///< insertion count | |
56 | std::size_t target_element_count_; ///< target number of unique insertions | |
57 | std::size_t random_seed_; ///< random seed | |
58 | ||
59 | public: | |
60 | ||
61 | bloom_filter() | |
62 | : bit_table_(0), | |
63 | salt_count_(0), | |
64 | table_size_(0), | |
65 | insert_count_(0), | |
66 | target_element_count_(0), | |
67 | random_seed_(0) | |
68 | {} | |
69 | ||
70 | bloom_filter(const std::size_t& predicted_inserted_element_count, | |
71 | const double& false_positive_probability, | |
72 | const std::size_t& random_seed) | |
73 | : bit_table_(0), | |
74 | insert_count_(0), | |
75 | target_element_count_(predicted_inserted_element_count), | |
76 | random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) | |
77 | { | |
78 | assert(false_positive_probability > 0.0); | |
79 | find_optimal_parameters(predicted_inserted_element_count, false_positive_probability, | |
80 | &salt_count_, &table_size_); | |
81 | init(); | |
82 | } | |
83 | ||
84 | bloom_filter(const std::size_t& salt_count, | |
85 | std::size_t table_size, | |
86 | const std::size_t& random_seed, | |
87 | std::size_t target_element_count) | |
88 | : bit_table_(0), | |
89 | salt_count_(salt_count), | |
90 | table_size_(table_size), | |
91 | insert_count_(0), | |
92 | target_element_count_(target_element_count), | |
93 | random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) | |
94 | { | |
95 | init(); | |
96 | } | |
97 | ||
98 | void init() { | |
99 | generate_unique_salt(); | |
100 | if (table_size_) { | |
101 | bit_table_ = mempool::bloom_filter::alloc_byte.allocate(table_size_); | |
102 | std::fill_n(bit_table_, table_size_, 0x00); | |
103 | } else { | |
104 | bit_table_ = NULL; | |
105 | } | |
106 | } | |
107 | ||
108 | bloom_filter(const bloom_filter& filter) | |
109 | : bit_table_(0) | |
110 | { | |
111 | this->operator=(filter); | |
112 | } | |
113 | ||
114 | bloom_filter& operator = (const bloom_filter& filter) | |
115 | { | |
116 | if (this != &filter) { | |
117 | if (bit_table_) { | |
118 | mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_); | |
119 | } | |
120 | salt_count_ = filter.salt_count_; | |
121 | table_size_ = filter.table_size_; | |
122 | insert_count_ = filter.insert_count_; | |
123 | target_element_count_ = filter.target_element_count_; | |
124 | random_seed_ = filter.random_seed_; | |
125 | bit_table_ = mempool::bloom_filter::alloc_byte.allocate(table_size_); | |
126 | std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_); | |
127 | salt_ = filter.salt_; | |
128 | } | |
129 | return *this; | |
130 | } | |
131 | ||
132 | virtual ~bloom_filter() | |
133 | { | |
134 | mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_); | |
135 | } | |
136 | ||
137 | inline bool operator!() const | |
138 | { | |
139 | return (0 == table_size_); | |
140 | } | |
141 | ||
142 | inline void clear() | |
143 | { | |
144 | if (bit_table_) | |
145 | std::fill_n(bit_table_, table_size_, 0x00); | |
146 | insert_count_ = 0; | |
147 | } | |
148 | ||
149 | /** | |
150 | * insert a u32 into the set | |
151 | * | |
152 | * NOTE: the internal hash is weak enough that consecutive inputs do | |
153 | * not achieve the desired fpp. Well-mixed values should be used | |
154 | * here (e.g., put rjhash(x) into the filter instead of just x). | |
155 | * | |
156 | * @param val integer value to insert | |
157 | */ | |
158 | inline void insert(uint32_t val) { | |
159 | assert(bit_table_); | |
160 | std::size_t bit_index = 0; | |
161 | std::size_t bit = 0; | |
162 | for (std::size_t i = 0; i < salt_.size(); ++i) | |
163 | { | |
164 | compute_indices(hash_ap(val,salt_[i]),bit_index,bit); | |
165 | bit_table_[bit_index >> 3] |= bit_mask[bit]; | |
166 | } | |
167 | ++insert_count_; | |
168 | } | |
169 | ||
170 | inline void insert(const unsigned char* key_begin, const std::size_t& length) | |
171 | { | |
172 | assert(bit_table_); | |
173 | std::size_t bit_index = 0; | |
174 | std::size_t bit = 0; | |
175 | for (std::size_t i = 0; i < salt_.size(); ++i) | |
176 | { | |
177 | compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); | |
178 | bit_table_[bit_index >> 3] |= bit_mask[bit]; | |
179 | } | |
180 | ++insert_count_; | |
181 | } | |
182 | ||
183 | template<typename T> | |
184 | inline void insert(const T& t) | |
185 | { | |
186 | // Note: T must be a C++ POD type. | |
187 | insert(reinterpret_cast<const unsigned char*>(&t),sizeof(T)); | |
188 | } | |
189 | ||
190 | inline void insert(const std::string& key) | |
191 | { | |
192 | insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); | |
193 | } | |
194 | ||
195 | inline void insert(const char* data, const std::size_t& length) | |
196 | { | |
197 | insert(reinterpret_cast<const unsigned char*>(data),length); | |
198 | } | |
199 | ||
200 | template<typename InputIterator> | |
201 | inline void insert(const InputIterator begin, const InputIterator end) | |
202 | { | |
203 | InputIterator itr = begin; | |
204 | while (end != itr) | |
205 | { | |
206 | insert(*(itr++)); | |
207 | } | |
208 | } | |
209 | ||
210 | /** | |
211 | * check if a u32 is contained by set | |
212 | * | |
213 | * NOTE: the internal hash is weak enough that consecutive inputs do | |
214 | * not achieve the desired fpp. Well-mixed values should be used | |
215 | * here (e.g., put rjhash(x) into the filter instead of just x). | |
216 | * | |
217 | * @param val integer value to query | |
218 | * @returns true if value is (probably) in the set, false if it definitely is not | |
219 | */ | |
220 | inline virtual bool contains(uint32_t val) const | |
221 | { | |
222 | if (!bit_table_) | |
223 | return false; | |
224 | std::size_t bit_index = 0; | |
225 | std::size_t bit = 0; | |
226 | for (std::size_t i = 0; i < salt_.size(); ++i) | |
227 | { | |
228 | compute_indices(hash_ap(val,salt_[i]),bit_index,bit); | |
229 | if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) | |
230 | { | |
231 | return false; | |
232 | } | |
233 | } | |
234 | return true; | |
235 | } | |
236 | ||
237 | inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const | |
238 | { | |
239 | if (!bit_table_) | |
240 | return false; | |
241 | std::size_t bit_index = 0; | |
242 | std::size_t bit = 0; | |
243 | for (std::size_t i = 0; i < salt_.size(); ++i) | |
244 | { | |
245 | compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); | |
246 | if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) | |
247 | { | |
248 | return false; | |
249 | } | |
250 | } | |
251 | return true; | |
252 | } | |
253 | ||
254 | template<typename T> | |
255 | inline bool contains(const T& t) const | |
256 | { | |
257 | return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(sizeof(T))); | |
258 | } | |
259 | ||
260 | inline bool contains(const std::string& key) const | |
261 | { | |
262 | return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); | |
263 | } | |
264 | ||
265 | inline bool contains(const char* data, const std::size_t& length) const | |
266 | { | |
267 | return contains(reinterpret_cast<const unsigned char*>(data),length); | |
268 | } | |
269 | ||
270 | template<typename InputIterator> | |
271 | inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const | |
272 | { | |
273 | InputIterator itr = begin; | |
274 | while (end != itr) | |
275 | { | |
276 | if (!contains(*itr)) | |
277 | { | |
278 | return itr; | |
279 | } | |
280 | ++itr; | |
281 | } | |
282 | return end; | |
283 | } | |
284 | ||
285 | template<typename InputIterator> | |
286 | inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const | |
287 | { | |
288 | InputIterator itr = begin; | |
289 | while (end != itr) | |
290 | { | |
291 | if (contains(*itr)) | |
292 | { | |
293 | return itr; | |
294 | } | |
295 | ++itr; | |
296 | } | |
297 | return end; | |
298 | } | |
299 | ||
300 | inline virtual std::size_t size() const | |
301 | { | |
302 | return table_size_ * bits_per_char; | |
303 | } | |
304 | ||
305 | inline std::size_t element_count() const | |
306 | { | |
307 | return insert_count_; | |
308 | } | |
309 | ||
310 | inline bool is_full() const | |
311 | { | |
312 | return insert_count_ >= target_element_count_; | |
313 | } | |
314 | ||
315 | /* | |
316 | * density of bits set. inconvenient units, but: | |
317 | * .3 = ~50% target insertions | |
318 | * .5 = 100% target insertions, "perfectly full" | |
319 | * .75 = 200% target insertions | |
320 | * 1.0 = all bits set... infinite insertions | |
321 | */ | |
322 | inline double density() const | |
323 | { | |
324 | if (!bit_table_) | |
325 | return 0.0; | |
326 | size_t set = 0; | |
327 | uint8_t *p = bit_table_; | |
328 | size_t left = table_size_; | |
329 | while (left-- > 0) { | |
330 | uint8_t c = *p; | |
331 | for (; c; ++set) | |
332 | c &= c - 1; | |
333 | ++p; | |
334 | } | |
335 | return (double)set / (double)(table_size_ << 3); | |
336 | } | |
337 | ||
338 | virtual inline double approx_unique_element_count() const { | |
339 | // this is not a very good estimate; a better solution should have | |
340 | // some asymptotic behavior as density() approaches 1.0. | |
341 | return (double)target_element_count_ * 2.0 * density(); | |
342 | } | |
343 | ||
344 | inline double effective_fpp() const | |
345 | { | |
346 | /* | |
347 | Note: | |
348 | The effective false positive probability is calculated using the | |
349 | designated table size and hash function count in conjunction with | |
350 | the current number of inserted elements - not the user defined | |
351 | predicated/expected number of inserted elements. | |
352 | */ | |
353 | return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size()); | |
354 | } | |
355 | ||
356 | inline const cell_type* table() const | |
357 | { | |
358 | return bit_table_; | |
359 | } | |
360 | ||
361 | protected: | |
362 | ||
363 | inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const | |
364 | { | |
365 | bit_index = hash % (table_size_ << 3); | |
366 | bit = bit_index & 7; | |
367 | } | |
368 | ||
369 | void generate_unique_salt() | |
370 | { | |
371 | /* | |
372 | Note: | |
373 | A distinct hash function need not be implementation-wise | |
374 | distinct. In the current implementation "seeding" a common | |
375 | hash function with different values seems to be adequate. | |
376 | */ | |
377 | const unsigned int predef_salt_count = 128; | |
378 | static const bloom_type predef_salt[predef_salt_count] = { | |
379 | 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, | |
380 | 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B, | |
381 | 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66, | |
382 | 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA, | |
383 | 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99, | |
384 | 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33, | |
385 | 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5, | |
386 | 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000, | |
387 | 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F, | |
388 | 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63, | |
389 | 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7, | |
390 | 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492, | |
391 | 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A, | |
392 | 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B, | |
393 | 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, | |
394 | 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432, | |
395 | 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC, | |
396 | 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB, | |
397 | 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331, | |
398 | 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68, | |
399 | 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8, | |
400 | 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A, | |
401 | 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF, | |
402 | 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E, | |
403 | 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39, | |
404 | 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E, | |
405 | 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355, | |
406 | 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E, | |
407 | 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79, | |
408 | 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075, | |
409 | 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC, | |
410 | 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421 | |
411 | }; | |
412 | ||
413 | if (salt_count_ <= predef_salt_count) | |
414 | { | |
415 | std::copy(predef_salt, | |
416 | predef_salt + salt_count_, | |
417 | std::back_inserter(salt_)); | |
418 | for (unsigned int i = 0; i < salt_.size(); ++i) | |
419 | { | |
420 | /* | |
421 | Note: | |
422 | This is done to integrate the user defined random seed, | |
423 | so as to allow for the generation of unique bloom filter | |
424 | instances. | |
425 | */ | |
426 | salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + random_seed_; | |
427 | } | |
428 | } | |
429 | else | |
430 | { | |
431 | std::copy(predef_salt,predef_salt + predef_salt_count, | |
432 | std::back_inserter(salt_)); | |
433 | srand(static_cast<unsigned int>(random_seed_)); | |
434 | while (salt_.size() < salt_count_) | |
435 | { | |
436 | bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand()); | |
437 | if (0 == current_salt) | |
438 | continue; | |
439 | if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt)) | |
440 | { | |
441 | salt_.push_back(current_salt); | |
442 | } | |
443 | } | |
444 | } | |
445 | } | |
446 | ||
447 | static void find_optimal_parameters(std::size_t target_insert_count, | |
448 | double target_fpp, | |
449 | std::size_t *salt_count, | |
450 | std::size_t *table_size) | |
451 | { | |
452 | /* | |
453 | Note: | |
454 | The following will attempt to find the number of hash functions | |
455 | and minimum amount of storage bits required to construct a bloom | |
456 | filter consistent with the user defined false positive probability | |
457 | and estimated element insertion count. | |
458 | */ | |
459 | ||
460 | double min_m = std::numeric_limits<double>::infinity(); | |
461 | double min_k = 0.0; | |
462 | double curr_m = 0.0; | |
463 | double k = 1.0; | |
464 | while (k < 1000.0) | |
465 | { | |
466 | double numerator = (- k * target_insert_count); | |
467 | double denominator = std::log(1.0 - std::pow(target_fpp, 1.0 / k)); | |
468 | curr_m = numerator / denominator; | |
469 | ||
470 | if (curr_m < min_m) | |
471 | { | |
472 | min_m = curr_m; | |
473 | min_k = k; | |
474 | } | |
475 | k += 1.0; | |
476 | } | |
477 | ||
478 | *salt_count = static_cast<std::size_t>(min_k); | |
479 | size_t t = static_cast<std::size_t>(min_m); | |
480 | t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0); | |
481 | *table_size = t >> 3; | |
482 | } | |
483 | ||
484 | inline bloom_type hash_ap(uint32_t val, bloom_type hash) const | |
485 | { | |
486 | hash ^= (hash << 7) ^ ((val & 0xff000000) >> 24) * (hash >> 3); | |
487 | hash ^= (~((hash << 11) + (((val & 0xff0000) >> 16) ^ (hash >> 5)))); | |
488 | hash ^= (hash << 7) ^ ((val & 0xff00) >> 8) * (hash >> 3); | |
489 | hash ^= (~((hash << 11) + (((val & 0xff)) ^ (hash >> 5)))); | |
490 | return hash; | |
491 | } | |
492 | ||
493 | inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const | |
494 | { | |
495 | const unsigned char* itr = begin; | |
496 | ||
497 | while (remaining_length >= 4) | |
498 | { | |
499 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
500 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
501 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
502 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
503 | remaining_length -= 4; | |
504 | } | |
505 | ||
506 | while (remaining_length >= 2) | |
507 | { | |
508 | hash ^= (hash << 7) ^ (*itr++) * (hash >> 3); | |
509 | hash ^= (~((hash << 11) + ((*itr++) ^ (hash >> 5)))); | |
510 | remaining_length -= 2; | |
511 | } | |
512 | ||
513 | if (remaining_length) | |
514 | { | |
515 | hash ^= (hash << 7) ^ (*itr) * (hash >> 3); | |
516 | } | |
517 | ||
518 | return hash; | |
519 | } | |
520 | ||
521 | public: | |
522 | void encode(bufferlist& bl) const; | |
523 | void decode(bufferlist::iterator& bl); | |
524 | void dump(Formatter *f) const; | |
525 | static void generate_test_instances(std::list<bloom_filter*>& ls); | |
526 | }; | |
527 | WRITE_CLASS_ENCODER(bloom_filter) | |
528 | ||
529 | ||
530 | class compressible_bloom_filter : public bloom_filter | |
531 | { | |
532 | public: | |
533 | ||
534 | compressible_bloom_filter() : bloom_filter() {} | |
535 | ||
536 | compressible_bloom_filter(const std::size_t& predicted_element_count, | |
537 | const double& false_positive_probability, | |
538 | const std::size_t& random_seed) | |
539 | : bloom_filter(predicted_element_count, false_positive_probability, random_seed) | |
540 | { | |
541 | size_list.push_back(table_size_); | |
542 | } | |
543 | ||
544 | compressible_bloom_filter(const std::size_t& salt_count, | |
545 | std::size_t table_size, | |
546 | const std::size_t& random_seed, | |
547 | std::size_t target_count) | |
548 | : bloom_filter(salt_count, table_size, random_seed, target_count) | |
549 | { | |
550 | size_list.push_back(table_size_); | |
551 | } | |
552 | ||
553 | inline std::size_t size() const override | |
554 | { | |
555 | return size_list.back() * bits_per_char; | |
556 | } | |
557 | ||
558 | inline bool compress(const double& target_ratio) | |
559 | { | |
560 | if (!bit_table_) | |
561 | return false; | |
562 | ||
563 | if ((0.0 >= target_ratio) || (target_ratio >= 1.0)) | |
564 | { | |
565 | return false; | |
566 | } | |
567 | ||
568 | std::size_t original_table_size = size_list.back(); | |
569 | std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio); | |
570 | ||
571 | if ((!new_table_size) || (new_table_size >= original_table_size)) | |
572 | { | |
573 | return false; | |
574 | } | |
575 | ||
576 | cell_type* tmp = mempool::bloom_filter::alloc_byte.allocate(new_table_size); | |
577 | std::copy(bit_table_, bit_table_ + (new_table_size), tmp); | |
578 | cell_type* itr = bit_table_ + (new_table_size); | |
579 | cell_type* end = bit_table_ + (original_table_size); | |
580 | cell_type* itr_tmp = tmp; | |
581 | cell_type* itr_end = tmp + (new_table_size); | |
582 | while (end != itr) | |
583 | { | |
584 | *(itr_tmp++) |= (*itr++); | |
585 | if (itr_tmp == itr_end) | |
586 | itr_tmp = tmp; | |
587 | } | |
588 | ||
589 | mempool::bloom_filter::alloc_byte.deallocate(bit_table_, table_size_); | |
590 | bit_table_ = tmp; | |
591 | size_list.push_back(new_table_size); | |
592 | table_size_ = new_table_size; | |
593 | ||
594 | return true; | |
595 | } | |
596 | ||
597 | inline double approx_unique_element_count() const override { | |
598 | // this is not a very good estimate; a better solution should have | |
599 | // some asymptotic behavior as density() approaches 1.0. | |
600 | // | |
601 | // the compress() correction is also bad; it tends to under-estimate. | |
602 | return (double)target_element_count_ * 2.0 * density() * (double)size_list.back() / (double)size_list.front(); | |
603 | } | |
604 | ||
605 | private: | |
606 | ||
607 | inline void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const override | |
608 | { | |
609 | bit_index = hash; | |
610 | for (std::size_t i = 0; i < size_list.size(); ++i) | |
611 | { | |
612 | bit_index %= size_list[i] << 3; | |
613 | } | |
614 | bit = bit_index & 7; | |
615 | } | |
616 | ||
617 | std::vector<std::size_t> size_list; | |
618 | public: | |
619 | void encode(bufferlist& bl) const; | |
620 | void decode(bufferlist::iterator& bl); | |
621 | void dump(Formatter *f) const; | |
622 | static void generate_test_instances(std::list<compressible_bloom_filter*>& ls); | |
623 | }; | |
624 | WRITE_CLASS_ENCODER(compressible_bloom_filter) | |
625 | ||
626 | #endif | |
627 | ||
628 | ||
629 | /* | |
630 | Note 1: | |
631 | If it can be guaranteed that bits_per_char will be of the form 2^n then | |
632 | the following optimization can be used: | |
633 | ||
634 | hash_table[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)]; | |
635 | ||
636 | Note 2: | |
637 | For performance reasons where possible when allocating memory it should | |
638 | be aligned (aligned_alloc) according to the architecture being used. | |
639 | */ |