1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL2.1 (see COPYING-LGPL2.1) or later
12 #include <gtest/gtest.h>
14 #include "include/stringify.h"
15 #include "common/bloom_filter.hpp"
17 TEST(BloomFilter
, Basic
) {
18 bloom_filter
bf(10, .1, 1);
22 ASSERT_TRUE(bf
.contains("foo"));
23 ASSERT_TRUE(bf
.contains("bar"));
25 ASSERT_EQ(2U, bf
.element_count());
28 TEST(BloomFilter
, Empty
) {
30 for (int i
=0; i
<100; ++i
) {
31 ASSERT_FALSE(bf
.contains(i
));
32 ASSERT_FALSE(bf
.contains(stringify(i
)));
36 TEST(BloomFilter
, Sweep
) {
37 std::cout
.setf(std::ios_base::fixed
, std::ios_base::floatfield
);
38 std::cout
.precision(5);
39 std::cout
<< "# max\tfpp\tactual\tsize\tB/insert" << std::endl
;
40 for (int ex
= 3; ex
< 12; ex
+= 2) {
41 for (float fpp
= .001; fpp
< .5; fpp
*= 4.0) {
43 bloom_filter
bf(max
, fpp
, 1);
47 ASSERT_TRUE(bf
.contains("foo"));
48 ASSERT_TRUE(bf
.contains("bar"));
50 for (int n
= 0; n
< max
; n
++)
51 bf
.insert("ok" + stringify(n
));
55 for (int n
= 0; n
< test
; n
++)
56 if (bf
.contains("asdf" + stringify(n
)))
59 ASSERT_TRUE(bf
.contains("foo"));
60 ASSERT_TRUE(bf
.contains("bar"));
62 double actual
= (double)hit
/ (double)test
;
67 double byte_per_insert
= (double)bl
.length() / (double)max
;
69 std::cout
<< max
<< "\t" << fpp
<< "\t" << actual
<< "\t" << bl
.length() << "\t" << byte_per_insert
<< std::endl
;
70 ASSERT_TRUE(actual
< fpp
* 10);
76 TEST(BloomFilter
, SweepInt
) {
77 std::cout
.setf(std::ios_base::fixed
, std::ios_base::floatfield
);
78 std::cout
.precision(5);
79 std::cout
<< "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl
;
80 for (int ex
= 3; ex
< 12; ex
+= 2) {
81 for (float fpp
= .001; fpp
< .5; fpp
*= 4.0) {
83 bloom_filter
bf(max
, fpp
, 1);
90 for (int n
= 0; n
< max
; n
++)
95 for (int n
= 0; n
< test
; n
++)
96 if (bf
.contains(100000 + n
))
102 double actual
= (double)hit
/ (double)test
;
107 double byte_per_insert
= (double)bl
.length() / (double)max
;
109 std::cout
<< max
<< "\t" << fpp
<< "\t" << actual
<< "\t" << bl
.length() << "\t" << byte_per_insert
110 << "\t" << bf
.density() << "\t" << bf
.approx_unique_element_count() << std::endl
;
111 ASSERT_TRUE(actual
< fpp
* 10);
112 ASSERT_TRUE(actual
> fpp
/ 10);
113 ASSERT_TRUE(bf
.density() > 0.40);
114 ASSERT_TRUE(bf
.density() < 0.60);
120 TEST(BloomFilter
, CompressibleSweep
) {
121 std::cout
.setf(std::ios_base::fixed
, std::ios_base::floatfield
);
122 std::cout
.precision(5);
123 std::cout
<< "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n";
126 for (int div
= 1; div
< 10; div
++) {
127 compressible_bloom_filter
bf(max
, fpp
, 1);
129 for (int n
= 0; n
< t
; n
++)
132 unsigned est
= bf
.approx_unique_element_count();
134 bf
.compress(1.0 / div
);
136 for (int n
= 0; n
< t
; n
++)
137 ASSERT_TRUE(bf
.contains(n
));
139 int test
= max
* 100;
141 for (int n
= 0; n
< test
; n
++)
142 if (bf
.contains(100000 + n
))
145 double actual
= (double)hit
/ (double)test
;
150 double byte_per_insert
= (double)bl
.length() / (double)max
;
151 unsigned est_after
= bf
.approx_unique_element_count();
158 << "\t" << bl
.length() << "\t" << byte_per_insert
161 ASSERT_TRUE(actual
< fpp
* 2.0);
162 ASSERT_TRUE(actual
> fpp
/ 2.0);
163 ASSERT_TRUE(est_after
< est
* 2);
164 ASSERT_TRUE(est_after
> est
/ 2);
170 TEST(BloomFilter
, BinSweep
) {
171 std::cout
.setf(std::ios_base::fixed
, std::ios_base::floatfield
);
172 std::cout
.precision(5);
173 int total_max
= 16384;
174 float total_fpp
= .01;
175 std::cout
<< "total_inserts " << total_max
<< " target-fpp " << total_fpp
<< std::endl
;
176 for (int bins
= 1; bins
< 16; ++bins
) {
177 int max
= total_max
/ bins
;
178 float fpp
= total_fpp
/ bins
;//pow(total_fpp, bins);
180 std::vector
<bloom_filter
*> ls
;
182 for (int i
=0; i
<bins
; i
++) {
183 ls
.push_back(new bloom_filter(max
, fpp
, i
));
184 for (int j
=0; j
<max
; j
++) {
185 ls
.back()->insert(10000 * (i
+1) + j
);
187 ::encode(*ls
.front(), bl
);
191 int test
= max
* 100;
192 for (int i
=0; i
<test
; ++i
) {
193 for (std::vector
<bloom_filter
*>::iterator j
= ls
.begin(); j
!= ls
.end(); ++j
) {
194 if ((*j
)->contains(i
* 732)) { // note: sequential i does not work here; the intenral int hash is weak!!
201 double actual
= (double)hit
/ (double)test
;
202 std::cout
<< "bins " << bins
<< " bin-max " << max
<< " bin-fpp " << fpp
203 << " actual-fpp " << actual
204 << " total-size " << bl
.length() << std::endl
;
208 // disable these tests; doing dual insertions in consecutive filters
209 // appears to be equivalent to doing a single insertion in a bloom
210 // filter that is twice as big.
213 // test the fpp over a sequence of bloom filters, each with unique
214 // items inserted into it.
216 // we expect: actual_fpp = num_filters * per_filter_fpp
217 TEST(BloomFilter
, Sequence
) {
221 for (int seq
= 2; seq
<= 128; seq
*= 2) {
222 std::vector
<bloom_filter
*> ls
;
223 for (int i
=0; i
<seq
; i
++) {
224 ls
.push_back(new bloom_filter(max
*2, fpp
, i
));
225 for (int j
=0; j
<max
; j
++) {
226 ls
.back()->insert("ok" + stringify(j
) + "_" + stringify(i
));
228 ls
[ls
.size() - 2]->insert("ok" + stringify(j
) + "_" + stringify(i
));
233 int test
= max
* 100;
234 for (int i
=0; i
<test
; ++i
) {
235 for (std::vector
<bloom_filter
*>::iterator j
= ls
.begin(); j
!= ls
.end(); ++j
) {
236 if ((*j
)->contains("bad" + stringify(i
))) {
243 double actual
= (double)hit
/ (double)test
;
244 std::cout
<< "seq " << seq
<< " max " << max
<< " fpp " << fpp
<< " actual " << actual
<< std::endl
;
248 // test the ffp over a sequence of bloom filters, where actual values
249 // are always inserted into a consecutive pair of filters. in order
250 // to have a false positive, we need to falsely match two consecutive
253 // we expect: actual_fpp = num_filters * per_filter_fpp^2
254 TEST(BloomFilter
, SequenceDouble
) {
257 for (int seq
= 2; seq
<= 128; seq
*= 2) {
258 std::vector
<bloom_filter
*> ls
;
259 for (int i
=0; i
<seq
; i
++) {
260 ls
.push_back(new bloom_filter(max
*2, fpp
, i
));
261 for (int j
=0; j
<max
; j
++) {
262 ls
.back()->insert("ok" + stringify(j
) + "_" + stringify(i
));
264 ls
[ls
.size() - 2]->insert("ok" + stringify(j
) + "_" + stringify(i
));
269 int test
= max
* 100;
271 for (int i
=0; i
<test
; ++i
) {
272 for (std::vector
<bloom_filter
*>::iterator j
= ls
.begin(); j
!= ls
.end(); ++j
) {
273 if ((*j
)->contains("bad" + stringify(i
))) {
285 double actual
= (double)hit
/ (double)test
;
286 std::cout
<< "seq " << seq
<< " max " << max
<< " fpp " << fpp
<< " actual " << actual
287 << " expected " << (fpp
*fpp
*(double)seq
) << std::endl
;
293 TEST(BloomFilter
, Assignement
) {
294 bloom_filter
bf1(10, .1, 1), bf2
;
300 ASSERT_TRUE(bf2
.contains("foo"));
301 ASSERT_FALSE(bf2
.contains("bar"));
303 ASSERT_EQ(2U, bf1
.element_count());
304 ASSERT_EQ(1U, bf2
.element_count());