]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/util/dynamic_bloom.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / util / dynamic_bloom.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#pragma once
7
f67539c2 8#include <array>
1e59de90
TL
9#include <atomic>
10#include <memory>
7c673cae 11#include <string>
1e59de90 12
7c673cae 13#include "port/port.h"
f67539c2
TL
14#include "rocksdb/slice.h"
15#include "table/multiget_context.h"
494da23a 16#include "util/hash.h"
7c673cae 17
f67539c2 18namespace ROCKSDB_NAMESPACE {
7c673cae
FG
19
20class Slice;
21class Allocator;
22class Logger;
23
f67539c2
TL
24// A Bloom filter intended only to be used in memory, never serialized in a way
25// that could lead to schema incompatibility. Supports opt-in lock-free
26// concurrent access.
27//
28// This implementation is also intended for applications generally preferring
29// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
30// For 1% FP rate, that means that the latency of a look-up triggered by an FP
31// should be less than roughly 100x the cost of a Bloom filter op.
32//
33// For simplicity and performance, the current implementation requires
34// num_probes to be a multiple of two and <= 10.
35//
7c673cae
FG
36class DynamicBloom {
37 public:
38 // allocator: pass allocator to bloom filter, hence trace the usage of memory
39 // total_bits: fixed total bits for the bloom
40 // num_probes: number of hash probes for a single key
7c673cae
FG
41 // hash_func: customized hash function
42 // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
11fdf7f2 43 // within this page size. Need to reserve huge pages for
7c673cae
FG
44 // it to be allocated, like:
45 // sysctl -w vm.nr_hugepages=20
46 // See linux doc Documentation/vm/hugetlbpage.txt
f67539c2
TL
47 explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
48 uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
7c673cae
FG
49 Logger* logger = nullptr);
50
7c673cae
FG
51 ~DynamicBloom() {}
52
53 // Assuming single threaded access to this function.
54 void Add(const Slice& key);
55
56 // Like Add, but may be called concurrent with other functions.
57 void AddConcurrently(const Slice& key);
58
59 // Assuming single threaded access to this function.
60 void AddHash(uint32_t hash);
61
62 // Like AddHash, but may be called concurrent with other functions.
63 void AddHashConcurrently(uint32_t hash);
64
65 // Multithreaded access to this function is OK
66 bool MayContain(const Slice& key) const;
67
1e59de90 68 void MayContain(int num_keys, Slice* keys, bool* may_match) const;
f67539c2 69
7c673cae
FG
70 // Multithreaded access to this function is OK
71 bool MayContainHash(uint32_t hash) const;
72
73 void Prefetch(uint32_t h);
74
7c673cae 75 private:
f67539c2
TL
76 // Length of the structure, in 64-bit words. For this structure, "word"
77 // will always refer to 64-bit words.
78 uint32_t kLen;
79 // We make the k probes in pairs, two for each 64-bit read/write. Thus,
80 // this stores k/2, the number of words to double-probe.
81 const uint32_t kNumDoubleProbes;
7c673cae 82
f67539c2 83 std::atomic<uint64_t>* data_;
7c673cae
FG
84
85 // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
86 // concurrency safety, working with bytes.
87 template <typename OrFunc>
88 void AddHash(uint32_t hash, const OrFunc& or_func);
f67539c2
TL
89
90 bool DoubleProbe(uint32_t h32, size_t a) const;
7c673cae
FG
91};
92
494da23a 93inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
7c673cae
FG
94
95inline void DynamicBloom::AddConcurrently(const Slice& key) {
494da23a 96 AddHashConcurrently(BloomHash(key));
7c673cae
FG
97}
98
99inline void DynamicBloom::AddHash(uint32_t hash) {
f67539c2 100 AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
7c673cae
FG
101 ptr->store(ptr->load(std::memory_order_relaxed) | mask,
102 std::memory_order_relaxed);
103 });
104}
105
106inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
f67539c2 107 AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
7c673cae
FG
108 // Happens-before between AddHash and MaybeContains is handled by
109 // access to versions_->LastSequence(), so all we have to do here is
110 // avoid races (so we don't give the compiler a license to mess up
111 // our code) and not lose bits. std::memory_order_relaxed is enough
112 // for that.
113 if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
114 ptr->fetch_or(mask, std::memory_order_relaxed);
115 }
116 });
117}
118
119inline bool DynamicBloom::MayContain(const Slice& key) const {
494da23a 120 return (MayContainHash(BloomHash(key)));
7c673cae
FG
121}
122
1e59de90 123inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
f67539c2
TL
124 bool* may_match) const {
125 std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
126 std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
127 for (int i = 0; i < num_keys; ++i) {
1e59de90 128 hashes[i] = BloomHash(keys[i]);
20effc67 129 size_t a = FastRange32(kLen, hashes[i]);
f67539c2
TL
130 PREFETCH(data_ + a, 0, 3);
131 byte_offsets[i] = a;
132 }
133
134 for (int i = 0; i < num_keys; i++) {
135 may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
136 }
137}
138
11fdf7f2
TL
139#if defined(_MSC_VER)
140#pragma warning(push)
141// local variable is initialized but not referenced
f67539c2 142#pragma warning(disable : 4189)
11fdf7f2 143#endif
f67539c2 144inline void DynamicBloom::Prefetch(uint32_t h32) {
20effc67 145 size_t a = FastRange32(kLen, h32);
f67539c2 146 PREFETCH(data_ + a, 0, 3);
7c673cae 147}
11fdf7f2
TL
148#if defined(_MSC_VER)
149#pragma warning(pop)
150#endif
7c673cae 151
f67539c2
TL
152// Speed hacks in this implementation:
153// * Uses fastrange instead of %
154// * Minimum logic to determine first (and all) probed memory addresses.
155// (Uses constant bit-xor offsets from the starting probe address.)
156// * (Major) Two probes per 64-bit memory fetch/write.
157// Code simplification / optimization: only allow even number of probes.
158// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
159// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
160// remix provides five pairs of bit addresses within a uint64_t.)
161// Code simplification / optimization: only allow up to 10 probes, from a
162// single 64-bit remix.
163//
164// The FP rate penalty for this implementation, vs. standard Bloom filter, is
165// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
166// This implementation does not explicitly use the cache line size, but is
167// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
168//
169// NB: could easily be upgraded to support a 64-bit hash and
170// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
171// because of false positives.)
172
173inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
20effc67 174 size_t a = FastRange32(kLen, h32);
f67539c2
TL
175 PREFETCH(data_ + a, 0, 3);
176 return DoubleProbe(h32, a);
177}
178
179inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
180 // Expand/remix with 64-bit golden ratio
181 uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
182 for (unsigned i = 0;; ++i) {
183 // Two bit probes per uint64_t probe
184 uint64_t mask =
185 ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
186 uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
187 if (i + 1 >= kNumDoubleProbes) {
188 return (val & mask) == mask;
189 } else if ((val & mask) != mask) {
190 return false;
7c673cae 191 }
f67539c2 192 h = (h >> 12) | (h << 52);
7c673cae 193 }
7c673cae
FG
194}
195
196template <typename OrFunc>
f67539c2 197inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
20effc67 198 size_t a = FastRange32(kLen, h32);
f67539c2
TL
199 PREFETCH(data_ + a, 0, 3);
200 // Expand/remix with 64-bit golden ratio
201 uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
202 for (unsigned i = 0;; ++i) {
203 // Two bit probes per uint64_t probe
204 uint64_t mask =
205 ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
206 or_func(&data_[a ^ i], mask);
207 if (i + 1 >= kNumDoubleProbes) {
208 return;
7c673cae 209 }
f67539c2 210 h = (h >> 12) | (h << 52);
7c673cae
FG
211 }
212}
213
f67539c2 214} // namespace ROCKSDB_NAMESPACE