]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/bloom.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / util / bloom.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "rocksdb/filter_policy.h"
11
12 #include "rocksdb/slice.h"
13 #include "table/block_based_filter_block.h"
14 #include "table/full_filter_bits_builder.h"
15 #include "table/full_filter_block.h"
16 #include "util/coding.h"
17 #include "util/hash.h"
18
19 namespace rocksdb {
20
21 class BlockBasedFilterBlockBuilder;
22 class FullFilterBlockBuilder;
23
24 FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key,
25 const size_t num_probes)
26 : bits_per_key_(bits_per_key), num_probes_(num_probes) {
27 assert(bits_per_key_);
28 }
29
30 FullFilterBitsBuilder::~FullFilterBitsBuilder() {}
31
32 void FullFilterBitsBuilder::AddKey(const Slice& key) {
33 uint32_t hash = BloomHash(key);
34 if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
35 hash_entries_.push_back(hash);
36 }
37 }
38
39 Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
40 uint32_t total_bits, num_lines;
41 char* data = ReserveSpace(static_cast<int>(hash_entries_.size()),
42 &total_bits, &num_lines);
43 assert(data);
44
45 if (total_bits != 0 && num_lines != 0) {
46 for (auto h : hash_entries_) {
47 AddHash(h, data, num_lines, total_bits);
48 }
49 }
50 data[total_bits/8] = static_cast<char>(num_probes_);
51 EncodeFixed32(data + total_bits/8 + 1, static_cast<uint32_t>(num_lines));
52
53 const char* const_data = data;
54 buf->reset(const_data);
55 hash_entries_.clear();
56
57 return Slice(data, total_bits / 8 + 5);
58 }
59
60 uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
61 uint32_t num_lines =
62 (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
63
64 // Make num_lines an odd number to make sure more bits are involved
65 // when determining which block.
66 if (num_lines % 2 == 0) {
67 num_lines++;
68 }
69 return num_lines * (CACHE_LINE_SIZE * 8);
70 }
71
72 uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry,
73 uint32_t* total_bits,
74 uint32_t* num_lines) {
75 assert(bits_per_key_);
76 if (num_entry != 0) {
77 uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_);
78
79 *total_bits = GetTotalBitsForLocality(total_bits_tmp);
80 *num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
81 assert(*total_bits > 0 && *total_bits % 8 == 0);
82 } else {
83 // filter is empty, just leave space for metadata
84 *total_bits = 0;
85 *num_lines = 0;
86 }
87
88 // Reserve space for Filter
89 uint32_t sz = *total_bits / 8;
90 sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
91 return sz;
92 }
93
94 char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
95 uint32_t* total_bits,
96 uint32_t* num_lines) {
97 uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
98 char* data = new char[sz];
99 memset(data, 0, sz);
100 return data;
101 }
102
103 int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
104 assert(bits_per_key_);
105 assert(space > 0);
106 uint32_t dont_care1, dont_care2;
107 int high = (int) (space * 8 / bits_per_key_ + 1);
108 int low = 1;
109 int n = high;
110 for (; n >= low; n--) {
111 uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
112 if (sz <= space) {
113 break;
114 }
115 }
116 assert(n < high); // High should be an overestimation
117 return n;
118 }
119
120 inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
121 uint32_t num_lines, uint32_t total_bits) {
122 #ifdef NDEBUG
123 (void)total_bits;
124 #endif
125 assert(num_lines > 0 && total_bits > 0);
126
127 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
128 uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8);
129
130 for (uint32_t i = 0; i < num_probes_; ++i) {
131 // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
132 // to a simple operation by compiler.
133 const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
134 data[bitpos / 8] |= (1 << (bitpos % 8));
135
136 h += delta;
137 }
138 }
139
140 namespace {
141 class FullFilterBitsReader : public FilterBitsReader {
142 public:
143 explicit FullFilterBitsReader(const Slice& contents)
144 : data_(const_cast<char*>(contents.data())),
145 data_len_(static_cast<uint32_t>(contents.size())),
146 num_probes_(0),
147 num_lines_(0),
148 log2_cache_line_size_(0) {
149 assert(data_);
150 GetFilterMeta(contents, &num_probes_, &num_lines_);
151 // Sanitize broken parameter
152 if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) {
153 num_lines_ = 0;
154 num_probes_ = 0;
155 } else if (num_lines_ != 0) {
156 while (true) {
157 uint32_t num_lines_at_curr_cache_size =
158 (data_len_ - 5) >> log2_cache_line_size_;
159 if (num_lines_at_curr_cache_size == 0) {
160 // The cache line size seems not a power of two. It's not supported
161 // and indicates a corruption so disable using this filter.
162 assert(false);
163 num_lines_ = 0;
164 num_probes_ = 0;
165 break;
166 }
167 if (num_lines_at_curr_cache_size == num_lines_) {
168 break;
169 }
170 ++log2_cache_line_size_;
171 }
172 }
173 }
174
175 ~FullFilterBitsReader() override {}
176
177 bool MayMatch(const Slice& entry) override {
178 if (data_len_ <= 5) { // remain same with original filter
179 return false;
180 }
181 // Other Error params, including a broken filter, regarded as match
182 if (num_probes_ == 0 || num_lines_ == 0) return true;
183 uint32_t hash = BloomHash(entry);
184 return HashMayMatch(hash, Slice(data_, data_len_),
185 num_probes_, num_lines_);
186 }
187
188 private:
189 // Filter meta data
190 char* data_;
191 uint32_t data_len_;
192 size_t num_probes_;
193 uint32_t num_lines_;
194 uint32_t log2_cache_line_size_;
195
196 // Get num_probes, and num_lines from filter
197 // If filter format broken, set both to 0.
198 void GetFilterMeta(const Slice& filter, size_t* num_probes,
199 uint32_t* num_lines);
200
201 // "filter" contains the data appended by a preceding call to
202 // FilterBitsBuilder::Finish. This method must return true if the key was
203 // passed to FilterBitsBuilder::AddKey. This method may return true or false
204 // if the key was not on the list, but it should aim to return false with a
205 // high probability.
206 //
207 // hash: target to be checked
208 // filter: the whole filter, including meta data bytes
209 // num_probes: number of probes, read before hand
210 // num_lines: filter metadata, read before hand
211 // Before calling this function, need to ensure the input meta data
212 // is valid.
213 bool HashMayMatch(const uint32_t& hash, const Slice& filter,
214 const size_t& num_probes, const uint32_t& num_lines);
215
216 // No Copy allowed
217 FullFilterBitsReader(const FullFilterBitsReader&);
218 void operator=(const FullFilterBitsReader&);
219 };
220
221 void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
222 size_t* num_probes, uint32_t* num_lines) {
223 uint32_t len = static_cast<uint32_t>(filter.size());
224 if (len <= 5) {
225 // filter is empty or broken
226 *num_probes = 0;
227 *num_lines = 0;
228 return;
229 }
230
231 *num_probes = filter.data()[len - 5];
232 *num_lines = DecodeFixed32(filter.data() + len - 4);
233 }
234
235 bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
236 const Slice& filter, const size_t& num_probes,
237 const uint32_t& num_lines) {
238 uint32_t len = static_cast<uint32_t>(filter.size());
239 if (len <= 5) return false; // remain the same with original filter
240
241 // It is ensured the params are valid before calling it
242 assert(num_probes != 0);
243 assert(num_lines != 0 && (len - 5) % num_lines == 0);
244 const char* data = filter.data();
245
246 uint32_t h = hash;
247 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
248 // Left shift by an extra 3 to convert bytes to bits
249 uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3);
250 PREFETCH(&data[b / 8], 0 /* rw */, 1 /* locality */);
251 PREFETCH(&data[b / 8 + (1 << log2_cache_line_size_) - 1], 0 /* rw */,
252 1 /* locality */);
253
254 for (uint32_t i = 0; i < num_probes; ++i) {
255 // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
256 // to a simple and operation by compiler.
257 const uint32_t bitpos = b + (h & ((1 << (log2_cache_line_size_ + 3)) - 1));
258 if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
259 return false;
260 }
261
262 h += delta;
263 }
264
265 return true;
266 }
267
268 // An implementation of filter policy
269 class BloomFilterPolicy : public FilterPolicy {
270 public:
271 explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder)
272 : bits_per_key_(bits_per_key), hash_func_(BloomHash),
273 use_block_based_builder_(use_block_based_builder) {
274 initialize();
275 }
276
277 ~BloomFilterPolicy() override {}
278
279 const char* Name() const override { return "rocksdb.BuiltinBloomFilter"; }
280
281 void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
282 // Compute bloom filter size (in both bits and bytes)
283 size_t bits = n * bits_per_key_;
284
285 // For small n, we can see a very high false positive rate. Fix it
286 // by enforcing a minimum bloom filter length.
287 if (bits < 64) bits = 64;
288
289 size_t bytes = (bits + 7) / 8;
290 bits = bytes * 8;
291
292 const size_t init_size = dst->size();
293 dst->resize(init_size + bytes, 0);
294 dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes
295 char* array = &(*dst)[init_size];
296 for (size_t i = 0; i < (size_t)n; i++) {
297 // Use double-hashing to generate a sequence of hash values.
298 // See analysis in [Kirsch,Mitzenmacher 2006].
299 uint32_t h = hash_func_(keys[i]);
300 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
301 for (size_t j = 0; j < num_probes_; j++) {
302 const uint32_t bitpos = h % bits;
303 array[bitpos/8] |= (1 << (bitpos % 8));
304 h += delta;
305 }
306 }
307 }
308
309 bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
310 const size_t len = bloom_filter.size();
311 if (len < 2) return false;
312
313 const char* array = bloom_filter.data();
314 const size_t bits = (len - 1) * 8;
315
316 // Use the encoded k so that we can read filters generated by
317 // bloom filters created using different parameters.
318 const size_t k = array[len-1];
319 if (k > 30) {
320 // Reserved for potentially new encodings for short bloom filters.
321 // Consider it a match.
322 return true;
323 }
324
325 uint32_t h = hash_func_(key);
326 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
327 for (size_t j = 0; j < k; j++) {
328 const uint32_t bitpos = h % bits;
329 if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
330 h += delta;
331 }
332 return true;
333 }
334
335 FilterBitsBuilder* GetFilterBitsBuilder() const override {
336 if (use_block_based_builder_) {
337 return nullptr;
338 }
339
340 return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
341 }
342
343 FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
344 return new FullFilterBitsReader(contents);
345 }
346
347 // If choose to use block based builder
348 bool UseBlockBasedBuilder() { return use_block_based_builder_; }
349
350 private:
351 size_t bits_per_key_;
352 size_t num_probes_;
353 uint32_t (*hash_func_)(const Slice& key);
354
355 const bool use_block_based_builder_;
356
357 void initialize() {
358 // We intentionally round down to reduce probing cost a little bit
359 num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
360 if (num_probes_ < 1) num_probes_ = 1;
361 if (num_probes_ > 30) num_probes_ = 30;
362 }
363 };
364
365 } // namespace
366
367 const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
368 bool use_block_based_builder) {
369 return new BloomFilterPolicy(bits_per_key, use_block_based_builder);
370 }
371
372 } // namespace rocksdb