]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | ||
f67539c2 | 8 | #include <array> |
1e59de90 TL |
9 | #include <atomic> |
10 | #include <memory> | |
7c673cae | 11 | #include <string> |
1e59de90 | 12 | |
7c673cae | 13 | #include "port/port.h" |
f67539c2 TL |
14 | #include "rocksdb/slice.h" |
15 | #include "table/multiget_context.h" | |
494da23a | 16 | #include "util/hash.h" |
7c673cae | 17 | |
f67539c2 | 18 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
19 | |
20 | class Slice; | |
21 | class Allocator; | |
22 | class Logger; | |
23 | ||
f67539c2 TL |
24 | // A Bloom filter intended only to be used in memory, never serialized in a way |
25 | // that could lead to schema incompatibility. Supports opt-in lock-free | |
26 | // concurrent access. | |
27 | // | |
28 | // This implementation is also intended for applications generally preferring | |
29 | // speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate. | |
30 | // For 1% FP rate, that means that the latency of a look-up triggered by an FP | |
31 | // should be less than roughly 100x the cost of a Bloom filter op. | |
32 | // | |
33 | // For simplicity and performance, the current implementation requires | |
34 | // num_probes to be a multiple of two and <= 10. | |
35 | // | |
7c673cae FG |
36 | class DynamicBloom { |
37 | public: | |
38 | // allocator: pass allocator to bloom filter, hence trace the usage of memory | |
39 | // total_bits: fixed total bits for the bloom | |
40 | // num_probes: number of hash probes for a single key | |
7c673cae FG |
41 | // hash_func: customized hash function |
42 | // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB | |
11fdf7f2 | 43 | // within this page size. Need to reserve huge pages for |
7c673cae FG |
44 | // it to be allocated, like: |
45 | // sysctl -w vm.nr_hugepages=20 | |
46 | // See linux doc Documentation/vm/hugetlbpage.txt | |
f67539c2 TL |
47 | explicit DynamicBloom(Allocator* allocator, uint32_t total_bits, |
48 | uint32_t num_probes = 6, size_t huge_page_tlb_size = 0, | |
7c673cae FG |
49 | Logger* logger = nullptr); |
50 | ||
7c673cae FG |
51 | ~DynamicBloom() {} |
52 | ||
53 | // Assuming single threaded access to this function. | |
54 | void Add(const Slice& key); | |
55 | ||
56 | // Like Add, but may be called concurrent with other functions. | |
57 | void AddConcurrently(const Slice& key); | |
58 | ||
59 | // Assuming single threaded access to this function. | |
60 | void AddHash(uint32_t hash); | |
61 | ||
62 | // Like AddHash, but may be called concurrent with other functions. | |
63 | void AddHashConcurrently(uint32_t hash); | |
64 | ||
65 | // Multithreaded access to this function is OK | |
66 | bool MayContain(const Slice& key) const; | |
67 | ||
1e59de90 | 68 | void MayContain(int num_keys, Slice* keys, bool* may_match) const; |
f67539c2 | 69 | |
7c673cae FG |
70 | // Multithreaded access to this function is OK |
71 | bool MayContainHash(uint32_t hash) const; | |
72 | ||
73 | void Prefetch(uint32_t h); | |
74 | ||
7c673cae | 75 | private: |
f67539c2 TL |
76 | // Length of the structure, in 64-bit words. For this structure, "word" |
77 | // will always refer to 64-bit words. | |
78 | uint32_t kLen; | |
79 | // We make the k probes in pairs, two for each 64-bit read/write. Thus, | |
80 | // this stores k/2, the number of words to double-probe. | |
81 | const uint32_t kNumDoubleProbes; | |
7c673cae | 82 | |
f67539c2 | 83 | std::atomic<uint64_t>* data_; |
7c673cae FG |
84 | |
85 | // or_func(ptr, mask) should effect *ptr |= mask with the appropriate | |
86 | // concurrency safety, working with bytes. | |
87 | template <typename OrFunc> | |
88 | void AddHash(uint32_t hash, const OrFunc& or_func); | |
f67539c2 TL |
89 | |
90 | bool DoubleProbe(uint32_t h32, size_t a) const; | |
7c673cae FG |
91 | }; |
92 | ||
494da23a | 93 | inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); } |
7c673cae FG |
94 | |
95 | inline void DynamicBloom::AddConcurrently(const Slice& key) { | |
494da23a | 96 | AddHashConcurrently(BloomHash(key)); |
7c673cae FG |
97 | } |
98 | ||
99 | inline void DynamicBloom::AddHash(uint32_t hash) { | |
f67539c2 | 100 | AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) { |
7c673cae FG |
101 | ptr->store(ptr->load(std::memory_order_relaxed) | mask, |
102 | std::memory_order_relaxed); | |
103 | }); | |
104 | } | |
105 | ||
106 | inline void DynamicBloom::AddHashConcurrently(uint32_t hash) { | |
f67539c2 | 107 | AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) { |
7c673cae FG |
108 | // Happens-before between AddHash and MaybeContains is handled by |
109 | // access to versions_->LastSequence(), so all we have to do here is | |
110 | // avoid races (so we don't give the compiler a license to mess up | |
111 | // our code) and not lose bits. std::memory_order_relaxed is enough | |
112 | // for that. | |
113 | if ((mask & ptr->load(std::memory_order_relaxed)) != mask) { | |
114 | ptr->fetch_or(mask, std::memory_order_relaxed); | |
115 | } | |
116 | }); | |
117 | } | |
118 | ||
119 | inline bool DynamicBloom::MayContain(const Slice& key) const { | |
494da23a | 120 | return (MayContainHash(BloomHash(key))); |
7c673cae FG |
121 | } |
122 | ||
1e59de90 | 123 | inline void DynamicBloom::MayContain(int num_keys, Slice* keys, |
f67539c2 TL |
124 | bool* may_match) const { |
125 | std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes; | |
126 | std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets; | |
127 | for (int i = 0; i < num_keys; ++i) { | |
1e59de90 | 128 | hashes[i] = BloomHash(keys[i]); |
20effc67 | 129 | size_t a = FastRange32(kLen, hashes[i]); |
f67539c2 TL |
130 | PREFETCH(data_ + a, 0, 3); |
131 | byte_offsets[i] = a; | |
132 | } | |
133 | ||
134 | for (int i = 0; i < num_keys; i++) { | |
135 | may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]); | |
136 | } | |
137 | } | |
138 | ||
11fdf7f2 TL |
139 | #if defined(_MSC_VER) |
140 | #pragma warning(push) | |
141 | // local variable is initialized but not referenced | |
f67539c2 | 142 | #pragma warning(disable : 4189) |
11fdf7f2 | 143 | #endif |
f67539c2 | 144 | inline void DynamicBloom::Prefetch(uint32_t h32) { |
20effc67 | 145 | size_t a = FastRange32(kLen, h32); |
f67539c2 | 146 | PREFETCH(data_ + a, 0, 3); |
7c673cae | 147 | } |
11fdf7f2 TL |
148 | #if defined(_MSC_VER) |
149 | #pragma warning(pop) | |
150 | #endif | |
7c673cae | 151 | |
f67539c2 TL |
152 | // Speed hacks in this implementation: |
153 | // * Uses fastrange instead of % | |
154 | // * Minimum logic to determine first (and all) probed memory addresses. | |
155 | // (Uses constant bit-xor offsets from the starting probe address.) | |
156 | // * (Major) Two probes per 64-bit memory fetch/write. | |
157 | // Code simplification / optimization: only allow even number of probes. | |
158 | // * Very fast and effective (murmur-like) hash expansion/re-mixing. (At | |
159 | // least on recent CPUs, integer multiplication is very cheap. Each 64-bit | |
160 | // remix provides five pairs of bit addresses within a uint64_t.) | |
161 | // Code simplification / optimization: only allow up to 10 probes, from a | |
162 | // single 64-bit remix. | |
163 | // | |
164 | // The FP rate penalty for this implementation, vs. standard Bloom filter, is | |
165 | // roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom. | |
166 | // This implementation does not explicitly use the cache line size, but is | |
167 | // effectively cache-local (up to 16 probes) because of the bit-xor offsetting. | |
168 | // | |
169 | // NB: could easily be upgraded to support a 64-bit hash and | |
170 | // total_bits > 2^32 (512MB). (The latter is a bad idea without the former, | |
171 | // because of false positives.) | |
172 | ||
173 | inline bool DynamicBloom::MayContainHash(uint32_t h32) const { | |
20effc67 | 174 | size_t a = FastRange32(kLen, h32); |
f67539c2 TL |
175 | PREFETCH(data_ + a, 0, 3); |
176 | return DoubleProbe(h32, a); | |
177 | } | |
178 | ||
179 | inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const { | |
180 | // Expand/remix with 64-bit golden ratio | |
181 | uint64_t h = 0x9e3779b97f4a7c13ULL * h32; | |
182 | for (unsigned i = 0;; ++i) { | |
183 | // Two bit probes per uint64_t probe | |
184 | uint64_t mask = | |
185 | ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63)); | |
186 | uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed); | |
187 | if (i + 1 >= kNumDoubleProbes) { | |
188 | return (val & mask) == mask; | |
189 | } else if ((val & mask) != mask) { | |
190 | return false; | |
7c673cae | 191 | } |
f67539c2 | 192 | h = (h >> 12) | (h << 52); |
7c673cae | 193 | } |
7c673cae FG |
194 | } |
195 | ||
196 | template <typename OrFunc> | |
f67539c2 | 197 | inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) { |
20effc67 | 198 | size_t a = FastRange32(kLen, h32); |
f67539c2 TL |
199 | PREFETCH(data_ + a, 0, 3); |
200 | // Expand/remix with 64-bit golden ratio | |
201 | uint64_t h = 0x9e3779b97f4a7c13ULL * h32; | |
202 | for (unsigned i = 0;; ++i) { | |
203 | // Two bit probes per uint64_t probe | |
204 | uint64_t mask = | |
205 | ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63)); | |
206 | or_func(&data_[a ^ i], mask); | |
207 | if (i + 1 >= kNumDoubleProbes) { | |
208 | return; | |
7c673cae | 209 | } |
f67539c2 | 210 | h = (h >> 12) | (h << 52); |
7c673cae FG |
211 | } |
212 | } | |
213 | ||
f67539c2 | 214 | } // namespace ROCKSDB_NAMESPACE |