]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #include "rocksdb/filter_policy.h" | |
11 | ||
12 | #include "rocksdb/slice.h" | |
13 | #include "table/block_based_filter_block.h" | |
11fdf7f2 | 14 | #include "table/full_filter_bits_builder.h" |
7c673cae | 15 | #include "table/full_filter_block.h" |
7c673cae | 16 | #include "util/coding.h" |
11fdf7f2 | 17 | #include "util/hash.h" |
7c673cae FG |
18 | |
19 | namespace rocksdb { | |
20 | ||
21 | class BlockBasedFilterBlockBuilder; | |
22 | class FullFilterBlockBuilder; | |
23 | ||
11fdf7f2 TL |
24 | FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key, |
25 | const size_t num_probes) | |
26 | : bits_per_key_(bits_per_key), num_probes_(num_probes) { | |
27 | assert(bits_per_key_); | |
7c673cae FG |
28 | } |
29 | ||
11fdf7f2 | 30 | FullFilterBitsBuilder::~FullFilterBitsBuilder() {} |
7c673cae | 31 | |
11fdf7f2 | 32 | void FullFilterBitsBuilder::AddKey(const Slice& key) { |
7c673cae FG |
33 | uint32_t hash = BloomHash(key); |
34 | if (hash_entries_.size() == 0 || hash != hash_entries_.back()) { | |
35 | hash_entries_.push_back(hash); | |
36 | } | |
37 | } | |
38 | ||
11fdf7f2 | 39 | Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) { |
7c673cae FG |
40 | uint32_t total_bits, num_lines; |
41 | char* data = ReserveSpace(static_cast<int>(hash_entries_.size()), | |
42 | &total_bits, &num_lines); | |
43 | assert(data); | |
44 | ||
45 | if (total_bits != 0 && num_lines != 0) { | |
46 | for (auto h : hash_entries_) { | |
47 | AddHash(h, data, num_lines, total_bits); | |
48 | } | |
49 | } | |
50 | data[total_bits/8] = static_cast<char>(num_probes_); | |
51 | EncodeFixed32(data + total_bits/8 + 1, static_cast<uint32_t>(num_lines)); | |
52 | ||
53 | const char* const_data = data; | |
54 | buf->reset(const_data); | |
55 | hash_entries_.clear(); | |
56 | ||
57 | return Slice(data, total_bits / 8 + 5); | |
58 | } | |
59 | ||
7c673cae FG |
60 | uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) { |
61 | uint32_t num_lines = | |
62 | (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8); | |
63 | ||
64 | // Make num_lines an odd number to make sure more bits are involved | |
65 | // when determining which block. | |
66 | if (num_lines % 2 == 0) { | |
67 | num_lines++; | |
68 | } | |
69 | return num_lines * (CACHE_LINE_SIZE * 8); | |
70 | } | |
71 | ||
11fdf7f2 TL |
72 | uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry, |
73 | uint32_t* total_bits, | |
74 | uint32_t* num_lines) { | |
7c673cae | 75 | assert(bits_per_key_); |
7c673cae FG |
76 | if (num_entry != 0) { |
77 | uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_); | |
78 | ||
79 | *total_bits = GetTotalBitsForLocality(total_bits_tmp); | |
80 | *num_lines = *total_bits / (CACHE_LINE_SIZE * 8); | |
81 | assert(*total_bits > 0 && *total_bits % 8 == 0); | |
82 | } else { | |
83 | // filter is empty, just leave space for metadata | |
84 | *total_bits = 0; | |
85 | *num_lines = 0; | |
86 | } | |
87 | ||
88 | // Reserve space for Filter | |
89 | uint32_t sz = *total_bits / 8; | |
90 | sz += 5; // 4 bytes for num_lines, 1 byte for num_probes | |
11fdf7f2 TL |
91 | return sz; |
92 | } | |
7c673cae | 93 | |
11fdf7f2 TL |
94 | char* FullFilterBitsBuilder::ReserveSpace(const int num_entry, |
95 | uint32_t* total_bits, | |
96 | uint32_t* num_lines) { | |
97 | uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines); | |
98 | char* data = new char[sz]; | |
7c673cae FG |
99 | memset(data, 0, sz); |
100 | return data; | |
101 | } | |
102 | ||
11fdf7f2 TL |
103 | int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) { |
104 | assert(bits_per_key_); | |
105 | assert(space > 0); | |
106 | uint32_t dont_care1, dont_care2; | |
107 | int high = (int) (space * 8 / bits_per_key_ + 1); | |
108 | int low = 1; | |
109 | int n = high; | |
110 | for (; n >= low; n--) { | |
111 | uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2); | |
112 | if (sz <= space) { | |
113 | break; | |
114 | } | |
115 | } | |
116 | assert(n < high); // High should be an overestimation | |
117 | return n; | |
118 | } | |
119 | ||
7c673cae FG |
120 | inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data, |
121 | uint32_t num_lines, uint32_t total_bits) { | |
11fdf7f2 TL |
122 | #ifdef NDEBUG |
123 | (void)total_bits; | |
124 | #endif | |
7c673cae FG |
125 | assert(num_lines > 0 && total_bits > 0); |
126 | ||
127 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |
128 | uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8); | |
129 | ||
130 | for (uint32_t i = 0; i < num_probes_; ++i) { | |
131 | // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized | |
132 | // to a simple operation by compiler. | |
133 | const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8)); | |
134 | data[bitpos / 8] |= (1 << (bitpos % 8)); | |
135 | ||
136 | h += delta; | |
137 | } | |
138 | } | |
139 | ||
11fdf7f2 | 140 | namespace { |
7c673cae FG |
141 | class FullFilterBitsReader : public FilterBitsReader { |
142 | public: | |
143 | explicit FullFilterBitsReader(const Slice& contents) | |
144 | : data_(const_cast<char*>(contents.data())), | |
145 | data_len_(static_cast<uint32_t>(contents.size())), | |
146 | num_probes_(0), | |
11fdf7f2 TL |
147 | num_lines_(0), |
148 | log2_cache_line_size_(0) { | |
7c673cae FG |
149 | assert(data_); |
150 | GetFilterMeta(contents, &num_probes_, &num_lines_); | |
151 | // Sanitize broken parameter | |
152 | if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) { | |
153 | num_lines_ = 0; | |
154 | num_probes_ = 0; | |
11fdf7f2 TL |
155 | } else if (num_lines_ != 0) { |
156 | while (true) { | |
157 | uint32_t num_lines_at_curr_cache_size = | |
158 | (data_len_ - 5) >> log2_cache_line_size_; | |
159 | if (num_lines_at_curr_cache_size == 0) { | |
160 | // The cache line size seems not a power of two. It's not supported | |
161 | // and indicates a corruption so disable using this filter. | |
162 | assert(false); | |
163 | num_lines_ = 0; | |
164 | num_probes_ = 0; | |
165 | break; | |
166 | } | |
167 | if (num_lines_at_curr_cache_size == num_lines_) { | |
168 | break; | |
169 | } | |
170 | ++log2_cache_line_size_; | |
171 | } | |
7c673cae FG |
172 | } |
173 | } | |
174 | ||
494da23a | 175 | ~FullFilterBitsReader() override {} |
7c673cae | 176 | |
494da23a | 177 | bool MayMatch(const Slice& entry) override { |
7c673cae FG |
178 | if (data_len_ <= 5) { // remain same with original filter |
179 | return false; | |
180 | } | |
181 | // Other Error params, including a broken filter, regarded as match | |
182 | if (num_probes_ == 0 || num_lines_ == 0) return true; | |
183 | uint32_t hash = BloomHash(entry); | |
184 | return HashMayMatch(hash, Slice(data_, data_len_), | |
185 | num_probes_, num_lines_); | |
186 | } | |
187 | ||
188 | private: | |
189 | // Filter meta data | |
190 | char* data_; | |
191 | uint32_t data_len_; | |
192 | size_t num_probes_; | |
193 | uint32_t num_lines_; | |
11fdf7f2 | 194 | uint32_t log2_cache_line_size_; |
7c673cae FG |
195 | |
196 | // Get num_probes, and num_lines from filter | |
197 | // If filter format broken, set both to 0. | |
198 | void GetFilterMeta(const Slice& filter, size_t* num_probes, | |
199 | uint32_t* num_lines); | |
200 | ||
201 | // "filter" contains the data appended by a preceding call to | |
11fdf7f2 TL |
202 | // FilterBitsBuilder::Finish. This method must return true if the key was |
203 | // passed to FilterBitsBuilder::AddKey. This method may return true or false | |
204 | // if the key was not on the list, but it should aim to return false with a | |
205 | // high probability. | |
7c673cae FG |
206 | // |
207 | // hash: target to be checked | |
208 | // filter: the whole filter, including meta data bytes | |
209 | // num_probes: number of probes, read before hand | |
210 | // num_lines: filter metadata, read before hand | |
211 | // Before calling this function, need to ensure the input meta data | |
212 | // is valid. | |
213 | bool HashMayMatch(const uint32_t& hash, const Slice& filter, | |
214 | const size_t& num_probes, const uint32_t& num_lines); | |
215 | ||
216 | // No Copy allowed | |
217 | FullFilterBitsReader(const FullFilterBitsReader&); | |
218 | void operator=(const FullFilterBitsReader&); | |
219 | }; | |
220 | ||
221 | void FullFilterBitsReader::GetFilterMeta(const Slice& filter, | |
222 | size_t* num_probes, uint32_t* num_lines) { | |
223 | uint32_t len = static_cast<uint32_t>(filter.size()); | |
224 | if (len <= 5) { | |
225 | // filter is empty or broken | |
226 | *num_probes = 0; | |
227 | *num_lines = 0; | |
228 | return; | |
229 | } | |
230 | ||
231 | *num_probes = filter.data()[len - 5]; | |
232 | *num_lines = DecodeFixed32(filter.data() + len - 4); | |
233 | } | |
234 | ||
235 | bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash, | |
236 | const Slice& filter, const size_t& num_probes, | |
237 | const uint32_t& num_lines) { | |
238 | uint32_t len = static_cast<uint32_t>(filter.size()); | |
239 | if (len <= 5) return false; // remain the same with original filter | |
240 | ||
241 | // It is ensured the params are valid before calling it | |
242 | assert(num_probes != 0); | |
243 | assert(num_lines != 0 && (len - 5) % num_lines == 0); | |
7c673cae FG |
244 | const char* data = filter.data(); |
245 | ||
246 | uint32_t h = hash; | |
247 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |
11fdf7f2 TL |
248 | // Left shift by an extra 3 to convert bytes to bits |
249 | uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3); | |
250 | PREFETCH(&data[b / 8], 0 /* rw */, 1 /* locality */); | |
251 | PREFETCH(&data[b / 8 + (1 << log2_cache_line_size_) - 1], 0 /* rw */, | |
252 | 1 /* locality */); | |
7c673cae FG |
253 | |
254 | for (uint32_t i = 0; i < num_probes; ++i) { | |
255 | // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized | |
256 | // to a simple and operation by compiler. | |
11fdf7f2 | 257 | const uint32_t bitpos = b + (h & ((1 << (log2_cache_line_size_ + 3)) - 1)); |
7c673cae FG |
258 | if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) { |
259 | return false; | |
260 | } | |
261 | ||
262 | h += delta; | |
263 | } | |
264 | ||
265 | return true; | |
266 | } | |
267 | ||
268 | // An implementation of filter policy | |
269 | class BloomFilterPolicy : public FilterPolicy { | |
270 | public: | |
271 | explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder) | |
272 | : bits_per_key_(bits_per_key), hash_func_(BloomHash), | |
273 | use_block_based_builder_(use_block_based_builder) { | |
274 | initialize(); | |
275 | } | |
276 | ||
494da23a | 277 | ~BloomFilterPolicy() override {} |
7c673cae | 278 | |
494da23a | 279 | const char* Name() const override { return "rocksdb.BuiltinBloomFilter"; } |
7c673cae | 280 | |
494da23a | 281 | void CreateFilter(const Slice* keys, int n, std::string* dst) const override { |
7c673cae FG |
282 | // Compute bloom filter size (in both bits and bytes) |
283 | size_t bits = n * bits_per_key_; | |
284 | ||
285 | // For small n, we can see a very high false positive rate. Fix it | |
286 | // by enforcing a minimum bloom filter length. | |
287 | if (bits < 64) bits = 64; | |
288 | ||
289 | size_t bytes = (bits + 7) / 8; | |
290 | bits = bytes * 8; | |
291 | ||
292 | const size_t init_size = dst->size(); | |
293 | dst->resize(init_size + bytes, 0); | |
294 | dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes | |
295 | char* array = &(*dst)[init_size]; | |
296 | for (size_t i = 0; i < (size_t)n; i++) { | |
297 | // Use double-hashing to generate a sequence of hash values. | |
298 | // See analysis in [Kirsch,Mitzenmacher 2006]. | |
299 | uint32_t h = hash_func_(keys[i]); | |
300 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |
301 | for (size_t j = 0; j < num_probes_; j++) { | |
302 | const uint32_t bitpos = h % bits; | |
303 | array[bitpos/8] |= (1 << (bitpos % 8)); | |
304 | h += delta; | |
305 | } | |
306 | } | |
307 | } | |
308 | ||
494da23a | 309 | bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { |
7c673cae FG |
310 | const size_t len = bloom_filter.size(); |
311 | if (len < 2) return false; | |
312 | ||
313 | const char* array = bloom_filter.data(); | |
314 | const size_t bits = (len - 1) * 8; | |
315 | ||
316 | // Use the encoded k so that we can read filters generated by | |
317 | // bloom filters created using different parameters. | |
318 | const size_t k = array[len-1]; | |
319 | if (k > 30) { | |
320 | // Reserved for potentially new encodings for short bloom filters. | |
321 | // Consider it a match. | |
322 | return true; | |
323 | } | |
324 | ||
325 | uint32_t h = hash_func_(key); | |
326 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |
327 | for (size_t j = 0; j < k; j++) { | |
328 | const uint32_t bitpos = h % bits; | |
329 | if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; | |
330 | h += delta; | |
331 | } | |
332 | return true; | |
333 | } | |
334 | ||
494da23a | 335 | FilterBitsBuilder* GetFilterBitsBuilder() const override { |
7c673cae FG |
336 | if (use_block_based_builder_) { |
337 | return nullptr; | |
338 | } | |
339 | ||
340 | return new FullFilterBitsBuilder(bits_per_key_, num_probes_); | |
341 | } | |
342 | ||
494da23a | 343 | FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { |
7c673cae FG |
344 | return new FullFilterBitsReader(contents); |
345 | } | |
346 | ||
347 | // If choose to use block based builder | |
348 | bool UseBlockBasedBuilder() { return use_block_based_builder_; } | |
349 | ||
350 | private: | |
351 | size_t bits_per_key_; | |
352 | size_t num_probes_; | |
353 | uint32_t (*hash_func_)(const Slice& key); | |
354 | ||
355 | const bool use_block_based_builder_; | |
356 | ||
357 | void initialize() { | |
358 | // We intentionally round down to reduce probing cost a little bit | |
359 | num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2) | |
360 | if (num_probes_ < 1) num_probes_ = 1; | |
361 | if (num_probes_ > 30) num_probes_ = 30; | |
362 | } | |
363 | }; | |
364 | ||
365 | } // namespace | |
366 | ||
367 | const FilterPolicy* NewBloomFilterPolicy(int bits_per_key, | |
368 | bool use_block_based_builder) { | |
369 | return new BloomFilterPolicy(bits_per_key, use_block_based_builder); | |
370 | } | |
371 | ||
372 | } // namespace rocksdb |