]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/bloom_test.cc
bbf1d3ae9a84da1b335ea602c212f93ead6e6377
[ceph.git] / ceph / src / rocksdb / util / bloom_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef GFLAGS
11 #include <cstdio>
12 int main() {
13 fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14 return 0;
15 }
16 #else
17
18 #include <vector>
19
20 #include "rocksdb/filter_policy.h"
21 #include "table/full_filter_bits_builder.h"
22 #include "util/arena.h"
23 #include "util/gflags_compat.h"
24 #include "util/logging.h"
25 #include "util/testharness.h"
26 #include "util/testutil.h"
27
28 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
29
30 DEFINE_int32(bits_per_key, 10, "");
31
32 namespace rocksdb {
33
34 static const int kVerbose = 1;
35
36 static Slice Key(int i, char* buffer) {
37 std::string s;
38 PutFixed32(&s, static_cast<uint32_t>(i));
39 memcpy(buffer, s.c_str(), sizeof(i));
40 return Slice(buffer, sizeof(i));
41 }
42
43 static int NextLength(int length) {
44 if (length < 10) {
45 length += 1;
46 } else if (length < 100) {
47 length += 10;
48 } else if (length < 1000) {
49 length += 100;
50 } else {
51 length += 1000;
52 }
53 return length;
54 }
55
56 class BloomTest : public testing::Test {
57 private:
58 const FilterPolicy* policy_;
59 std::string filter_;
60 std::vector<std::string> keys_;
61
62 public:
63 BloomTest() : policy_(
64 NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
65
66 ~BloomTest() {
67 delete policy_;
68 }
69
70 void Reset() {
71 keys_.clear();
72 filter_.clear();
73 }
74
75 void Add(const Slice& s) {
76 keys_.push_back(s.ToString());
77 }
78
79 void Build() {
80 std::vector<Slice> key_slices;
81 for (size_t i = 0; i < keys_.size(); i++) {
82 key_slices.push_back(Slice(keys_[i]));
83 }
84 filter_.clear();
85 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
86 &filter_);
87 keys_.clear();
88 if (kVerbose >= 2) DumpFilter();
89 }
90
91 size_t FilterSize() const {
92 return filter_.size();
93 }
94
95 void DumpFilter() {
96 fprintf(stderr, "F(");
97 for (size_t i = 0; i+1 < filter_.size(); i++) {
98 const unsigned int c = static_cast<unsigned int>(filter_[i]);
99 for (int j = 0; j < 8; j++) {
100 fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
101 }
102 }
103 fprintf(stderr, ")\n");
104 }
105
106 bool Matches(const Slice& s) {
107 if (!keys_.empty()) {
108 Build();
109 }
110 return policy_->KeyMayMatch(s, filter_);
111 }
112
113 double FalsePositiveRate() {
114 char buffer[sizeof(int)];
115 int result = 0;
116 for (int i = 0; i < 10000; i++) {
117 if (Matches(Key(i + 1000000000, buffer))) {
118 result++;
119 }
120 }
121 return result / 10000.0;
122 }
123 };
124
125 TEST_F(BloomTest, EmptyFilter) {
126 ASSERT_TRUE(! Matches("hello"));
127 ASSERT_TRUE(! Matches("world"));
128 }
129
130 TEST_F(BloomTest, Small) {
131 Add("hello");
132 Add("world");
133 ASSERT_TRUE(Matches("hello"));
134 ASSERT_TRUE(Matches("world"));
135 ASSERT_TRUE(! Matches("x"));
136 ASSERT_TRUE(! Matches("foo"));
137 }
138
139 TEST_F(BloomTest, VaryingLengths) {
140 char buffer[sizeof(int)];
141
142 // Count number of filters that significantly exceed the false positive rate
143 int mediocre_filters = 0;
144 int good_filters = 0;
145
146 for (int length = 1; length <= 10000; length = NextLength(length)) {
147 Reset();
148 for (int i = 0; i < length; i++) {
149 Add(Key(i, buffer));
150 }
151 Build();
152
153 ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
154
155 // All added keys must match
156 for (int i = 0; i < length; i++) {
157 ASSERT_TRUE(Matches(Key(i, buffer)))
158 << "Length " << length << "; key " << i;
159 }
160
161 // Check false positive rate
162 double rate = FalsePositiveRate();
163 if (kVerbose >= 1) {
164 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
165 rate*100.0, length, static_cast<int>(FilterSize()));
166 }
167 ASSERT_LE(rate, 0.02); // Must not be over 2%
168 if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
169 else good_filters++;
170 }
171 if (kVerbose >= 1) {
172 fprintf(stderr, "Filters: %d good, %d mediocre\n",
173 good_filters, mediocre_filters);
174 }
175 ASSERT_LE(mediocre_filters, good_filters/5);
176 }
177
178 // Different bits-per-byte
179
180 class FullBloomTest : public testing::Test {
181 private:
182 const FilterPolicy* policy_;
183 std::unique_ptr<FilterBitsBuilder> bits_builder_;
184 std::unique_ptr<FilterBitsReader> bits_reader_;
185 std::unique_ptr<const char[]> buf_;
186 size_t filter_size_;
187
188 public:
189 FullBloomTest() :
190 policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, false)),
191 filter_size_(0) {
192 Reset();
193 }
194
195 ~FullBloomTest() {
196 delete policy_;
197 }
198
199 FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
200 return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
201 }
202
203 void Reset() {
204 bits_builder_.reset(policy_->GetFilterBitsBuilder());
205 bits_reader_.reset(nullptr);
206 buf_.reset(nullptr);
207 filter_size_ = 0;
208 }
209
210 void Add(const Slice& s) {
211 bits_builder_->AddKey(s);
212 }
213
214 void Build() {
215 Slice filter = bits_builder_->Finish(&buf_);
216 bits_reader_.reset(policy_->GetFilterBitsReader(filter));
217 filter_size_ = filter.size();
218 }
219
220 size_t FilterSize() const {
221 return filter_size_;
222 }
223
224 bool Matches(const Slice& s) {
225 if (bits_reader_ == nullptr) {
226 Build();
227 }
228 return bits_reader_->MayMatch(s);
229 }
230
231 double FalsePositiveRate() {
232 char buffer[sizeof(int)];
233 int result = 0;
234 for (int i = 0; i < 10000; i++) {
235 if (Matches(Key(i + 1000000000, buffer))) {
236 result++;
237 }
238 }
239 return result / 10000.0;
240 }
241 };
242
243 TEST_F(FullBloomTest, FilterSize) {
244 uint32_t dont_care1, dont_care2;
245 auto full_bits_builder = GetFullFilterBitsBuilder();
246 for (int n = 1; n < 100; n++) {
247 auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
248 auto n2 = full_bits_builder->CalculateNumEntry(space);
249 ASSERT_GE(n2, n);
250 auto space2 =
251 full_bits_builder->CalculateSpace(n2, &dont_care1, &dont_care2);
252 ASSERT_EQ(space, space2);
253 }
254 }
255
256 TEST_F(FullBloomTest, FullEmptyFilter) {
257 // Empty filter is not match, at this level
258 ASSERT_TRUE(!Matches("hello"));
259 ASSERT_TRUE(!Matches("world"));
260 }
261
262 TEST_F(FullBloomTest, FullSmall) {
263 Add("hello");
264 Add("world");
265 ASSERT_TRUE(Matches("hello"));
266 ASSERT_TRUE(Matches("world"));
267 ASSERT_TRUE(!Matches("x"));
268 ASSERT_TRUE(!Matches("foo"));
269 }
270
271 TEST_F(FullBloomTest, FullVaryingLengths) {
272 char buffer[sizeof(int)];
273
274 // Count number of filters that significantly exceed the false positive rate
275 int mediocre_filters = 0;
276 int good_filters = 0;
277
278 for (int length = 1; length <= 10000; length = NextLength(length)) {
279 Reset();
280 for (int i = 0; i < length; i++) {
281 Add(Key(i, buffer));
282 }
283 Build();
284
285 ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 128 + 5)) << length;
286
287 // All added keys must match
288 for (int i = 0; i < length; i++) {
289 ASSERT_TRUE(Matches(Key(i, buffer)))
290 << "Length " << length << "; key " << i;
291 }
292
293 // Check false positive rate
294 double rate = FalsePositiveRate();
295 if (kVerbose >= 1) {
296 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
297 rate*100.0, length, static_cast<int>(FilterSize()));
298 }
299 ASSERT_LE(rate, 0.02); // Must not be over 2%
300 if (rate > 0.0125)
301 mediocre_filters++; // Allowed, but not too often
302 else
303 good_filters++;
304 }
305 if (kVerbose >= 1) {
306 fprintf(stderr, "Filters: %d good, %d mediocre\n",
307 good_filters, mediocre_filters);
308 }
309 ASSERT_LE(mediocre_filters, good_filters/5);
310 }
311
312 } // namespace rocksdb
313
314 int main(int argc, char** argv) {
315 ::testing::InitGoogleTest(&argc, argv);
316 ParseCommandLineFlags(&argc, &argv, true);
317
318 return RUN_ALL_TESTS();
319 }
320
321 #endif // GFLAGS