]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/persistent_cache/hash_table.h
1 // Copyright (c) 2013, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
18 #include "rocksdb/env.h"
19 #include "util/mutexlock.h"
21 namespace ROCKSDB_NAMESPACE
{
23 // HashTable<T, Hash, Equal>
25 // Traditional implementation of hash table with synchronization built on top
26 // don't perform very well in multi-core scenarios. This is an implementation
27 // designed for multi-core scenarios with high lock contention.
29 // |<-------- alpha ------------->|
30 // Buckets Collision list
31 // ---- +----+ +---+---+--- ...... ---+---+---+
32 // / | |--->| | | | | |
33 // / +----+ +---+---+--- ...... ---+---+---+
49 // The lock contention is spread over an array of locks. This helps improve
50 // concurrent access. The spine is designed for a certain capacity and load
51 // factor. When the capacity planning is done correctly we can expect
52 // O(load_factor = 1) insert, access and remove time.
54 // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
55 // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
56 // blocks were of 4K, the hash table can support a virtual throughput of
59 // T Object type (contains both key and value)
60 // Hash Function that returns an hash from type T
61 // Equal Returns if two objects are equal
62 // (We need explicit equal for pointer type)
64 template <class T
, class Hash
, class Equal
>
67 explicit HashTable(const size_t capacity
= 1024 * 1024,
68 const float load_factor
= 2.0, const uint32_t nlocks
= 256)
70 static_cast<uint32_t>(load_factor
? capacity
/ load_factor
: 0)),
78 buckets_
.reset(new Bucket
[nbuckets_
]);
80 mlock(buckets_
.get(), nbuckets_
* sizeof(Bucket
));
84 locks_
.reset(new port::RWMutex
[nlocks_
]);
86 mlock(locks_
.get(), nlocks_
* sizeof(port::RWMutex
));
94 virtual ~HashTable() { AssertEmptyBuckets(); }
97 // Insert given record to hash table
99 bool Insert(const T
& t
) {
100 const uint64_t h
= Hash()(t
);
101 const uint32_t bucket_idx
= h
% nbuckets_
;
102 const uint32_t lock_idx
= bucket_idx
% nlocks_
;
104 WriteLock
_(&locks_
[lock_idx
]);
105 auto& bucket
= buckets_
[bucket_idx
];
106 return Insert(&bucket
, t
);
111 // Please note that read lock should be held by the caller. This is because
112 // the caller owns the data, and should hold the read lock as long as he
113 // operates on the data.
114 bool Find(const T
& t
, T
* ret
, port::RWMutex
** ret_lock
) {
115 const uint64_t h
= Hash()(t
);
116 const uint32_t bucket_idx
= h
% nbuckets_
;
117 const uint32_t lock_idx
= bucket_idx
% nlocks_
;
119 port::RWMutex
& lock
= locks_
[lock_idx
];
122 auto& bucket
= buckets_
[bucket_idx
];
123 if (Find(&bucket
, t
, ret
)) {
133 // Erase a given key from the hash table
135 bool Erase(const T
& t
, T
* ret
) {
136 const uint64_t h
= Hash()(t
);
137 const uint32_t bucket_idx
= h
% nbuckets_
;
138 const uint32_t lock_idx
= bucket_idx
% nlocks_
;
140 WriteLock
_(&locks_
[lock_idx
]);
142 auto& bucket
= buckets_
[bucket_idx
];
143 return Erase(&bucket
, t
, ret
);
146 // Fetch the mutex associated with a key
147 // This call is used to hold the lock for a given data for extended period of
149 port::RWMutex
* GetMutex(const T
& t
) {
150 const uint64_t h
= Hash()(t
);
151 const uint32_t bucket_idx
= h
% nbuckets_
;
152 const uint32_t lock_idx
= bucket_idx
% nlocks_
;
154 return &locks_
[lock_idx
];
157 void Clear(void (*fn
)(T
)) {
158 for (uint32_t i
= 0; i
< nbuckets_
; ++i
) {
159 const uint32_t lock_idx
= i
% nlocks_
;
160 WriteLock
_(&locks_
[lock_idx
]);
161 for (auto& t
: buckets_
[i
].list_
) {
164 buckets_
[i
].list_
.clear();
169 // Models bucket of keys that hash to the same bucket number
174 // Substitute for std::find with custom comparator operator
175 typename
std::list
<T
>::iterator
Find(std::list
<T
>* list
, const T
& t
) {
176 for (auto it
= list
->begin(); it
!= list
->end(); ++it
) {
177 if (Equal()(*it
, t
)) {
184 bool Insert(Bucket
* bucket
, const T
& t
) {
185 // Check if the key already exists
186 auto it
= Find(&bucket
->list_
, t
);
187 if (it
!= bucket
->list_
.end()) {
192 bucket
->list_
.push_back(t
);
196 bool Find(Bucket
* bucket
, const T
& t
, T
* ret
) {
197 auto it
= Find(&bucket
->list_
, t
);
198 if (it
!= bucket
->list_
.end()) {
207 bool Erase(Bucket
* bucket
, const T
& t
, T
* ret
) {
208 auto it
= Find(&bucket
->list_
, t
);
209 if (it
!= bucket
->list_
.end()) {
214 bucket
->list_
.erase(it
);
220 // assert that all buckets are empty
221 void AssertEmptyBuckets() {
223 for (size_t i
= 0; i
< nbuckets_
; ++i
) {
224 WriteLock
_(&locks_
[i
% nlocks_
]);
225 assert(buckets_
[i
].list_
.empty());
230 const uint32_t nbuckets_
; // No. of buckets in the spine
231 std::unique_ptr
<Bucket
[]> buckets_
; // Spine of the hash buckets
232 const uint32_t nlocks_
; // No. of locks
233 std::unique_ptr
<port::RWMutex
[]> locks_
; // Granular locks
236 } // namespace ROCKSDB_NAMESPACE