]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/memtable/hash_skiplist_rep.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / memtable / hash_skiplist_rep.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6
7 #ifndef ROCKSDB_LITE
8 #include "memtable/hash_skiplist_rep.h"
9
10 #include <atomic>
11
12 #include "rocksdb/memtablerep.h"
13 #include "util/arena.h"
14 #include "rocksdb/slice.h"
15 #include "rocksdb/slice_transform.h"
16 #include "port/port.h"
17 #include "util/murmurhash.h"
18 #include "db/memtable.h"
19 #include "memtable/skiplist.h"
20
21 namespace rocksdb {
22 namespace {
23
24 class HashSkipListRep : public MemTableRep {
25 public:
26 HashSkipListRep(const MemTableRep::KeyComparator& compare,
27 Allocator* allocator, const SliceTransform* transform,
28 size_t bucket_size, int32_t skiplist_height,
29 int32_t skiplist_branching_factor);
30
31 void Insert(KeyHandle handle) override;
32
33 bool Contains(const char* key) const override;
34
35 size_t ApproximateMemoryUsage() override;
36
37 void Get(const LookupKey& k, void* callback_args,
38 bool (*callback_func)(void* arg, const char* entry)) override;
39
40 ~HashSkipListRep() override;
41
42 MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override;
43
44 MemTableRep::Iterator* GetDynamicPrefixIterator(
45 Arena* arena = nullptr) override;
46
47 private:
48 friend class DynamicIterator;
49 typedef SkipList<const char*, const MemTableRep::KeyComparator&> Bucket;
50
51 size_t bucket_size_;
52
53 const int32_t skiplist_height_;
54 const int32_t skiplist_branching_factor_;
55
56 // Maps slices (which are transformed user keys) to buckets of keys sharing
57 // the same transform.
58 std::atomic<Bucket*>* buckets_;
59
60 // The user-supplied transform whose domain is the user keys.
61 const SliceTransform* transform_;
62
63 const MemTableRep::KeyComparator& compare_;
64 // immutable after construction
65 Allocator* const allocator_;
66
67 inline size_t GetHash(const Slice& slice) const {
68 return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0) %
69 bucket_size_;
70 }
71 inline Bucket* GetBucket(size_t i) const {
72 return buckets_[i].load(std::memory_order_acquire);
73 }
74 inline Bucket* GetBucket(const Slice& slice) const {
75 return GetBucket(GetHash(slice));
76 }
77 // Get a bucket from buckets_. If the bucket hasn't been initialized yet,
78 // initialize it before returning.
79 Bucket* GetInitializedBucket(const Slice& transformed);
80
81 class Iterator : public MemTableRep::Iterator {
82 public:
83 explicit Iterator(Bucket* list, bool own_list = true,
84 Arena* arena = nullptr)
85 : list_(list), iter_(list), own_list_(own_list), arena_(arena) {}
86
87 ~Iterator() override {
88 // if we own the list, we should also delete it
89 if (own_list_) {
90 assert(list_ != nullptr);
91 delete list_;
92 }
93 }
94
95 // Returns true iff the iterator is positioned at a valid node.
96 bool Valid() const override { return list_ != nullptr && iter_.Valid(); }
97
98 // Returns the key at the current position.
99 // REQUIRES: Valid()
100 const char* key() const override {
101 assert(Valid());
102 return iter_.key();
103 }
104
105 // Advances to the next position.
106 // REQUIRES: Valid()
107 void Next() override {
108 assert(Valid());
109 iter_.Next();
110 }
111
112 // Advances to the previous position.
113 // REQUIRES: Valid()
114 void Prev() override {
115 assert(Valid());
116 iter_.Prev();
117 }
118
119 // Advance to the first entry with a key >= target
120 void Seek(const Slice& internal_key, const char* memtable_key) override {
121 if (list_ != nullptr) {
122 const char* encoded_key =
123 (memtable_key != nullptr) ?
124 memtable_key : EncodeKey(&tmp_, internal_key);
125 iter_.Seek(encoded_key);
126 }
127 }
128
129 // Retreat to the last entry with a key <= target
130 void SeekForPrev(const Slice& /*internal_key*/,
131 const char* /*memtable_key*/) override {
132 // not supported
133 assert(false);
134 }
135
136 // Position at the first entry in collection.
137 // Final state of iterator is Valid() iff collection is not empty.
138 void SeekToFirst() override {
139 if (list_ != nullptr) {
140 iter_.SeekToFirst();
141 }
142 }
143
144 // Position at the last entry in collection.
145 // Final state of iterator is Valid() iff collection is not empty.
146 void SeekToLast() override {
147 if (list_ != nullptr) {
148 iter_.SeekToLast();
149 }
150 }
151
152 protected:
153 void Reset(Bucket* list) {
154 if (own_list_) {
155 assert(list_ != nullptr);
156 delete list_;
157 }
158 list_ = list;
159 iter_.SetList(list);
160 own_list_ = false;
161 }
162 private:
163 // if list_ is nullptr, we should NEVER call any methods on iter_
164 // if list_ is nullptr, this Iterator is not Valid()
165 Bucket* list_;
166 Bucket::Iterator iter_;
167 // here we track if we own list_. If we own it, we are also
168 // responsible for it's cleaning. This is a poor man's std::shared_ptr
169 bool own_list_;
170 std::unique_ptr<Arena> arena_;
171 std::string tmp_; // For passing to EncodeKey
172 };
173
174 class DynamicIterator : public HashSkipListRep::Iterator {
175 public:
176 explicit DynamicIterator(const HashSkipListRep& memtable_rep)
177 : HashSkipListRep::Iterator(nullptr, false),
178 memtable_rep_(memtable_rep) {}
179
180 // Advance to the first entry with a key >= target
181 void Seek(const Slice& k, const char* memtable_key) override {
182 auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k));
183 Reset(memtable_rep_.GetBucket(transformed));
184 HashSkipListRep::Iterator::Seek(k, memtable_key);
185 }
186
187 // Position at the first entry in collection.
188 // Final state of iterator is Valid() iff collection is not empty.
189 void SeekToFirst() override {
190 // Prefix iterator does not support total order.
191 // We simply set the iterator to invalid state
192 Reset(nullptr);
193 }
194
195 // Position at the last entry in collection.
196 // Final state of iterator is Valid() iff collection is not empty.
197 void SeekToLast() override {
198 // Prefix iterator does not support total order.
199 // We simply set the iterator to invalid state
200 Reset(nullptr);
201 }
202
203 private:
204 // the underlying memtable
205 const HashSkipListRep& memtable_rep_;
206 };
207
208 class EmptyIterator : public MemTableRep::Iterator {
209 // This is used when there wasn't a bucket. It is cheaper than
210 // instantiating an empty bucket over which to iterate.
211 public:
212 EmptyIterator() { }
213 bool Valid() const override { return false; }
214 const char* key() const override {
215 assert(false);
216 return nullptr;
217 }
218 void Next() override {}
219 void Prev() override {}
220 void Seek(const Slice& /*internal_key*/,
221 const char* /*memtable_key*/) override {}
222 void SeekForPrev(const Slice& /*internal_key*/,
223 const char* /*memtable_key*/) override {}
224 void SeekToFirst() override {}
225 void SeekToLast() override {}
226
227 private:
228 };
229 };
230
231 HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare,
232 Allocator* allocator,
233 const SliceTransform* transform,
234 size_t bucket_size, int32_t skiplist_height,
235 int32_t skiplist_branching_factor)
236 : MemTableRep(allocator),
237 bucket_size_(bucket_size),
238 skiplist_height_(skiplist_height),
239 skiplist_branching_factor_(skiplist_branching_factor),
240 transform_(transform),
241 compare_(compare),
242 allocator_(allocator) {
243 auto mem = allocator->AllocateAligned(
244 sizeof(std::atomic<void*>) * bucket_size);
245 buckets_ = new (mem) std::atomic<Bucket*>[bucket_size];
246
247 for (size_t i = 0; i < bucket_size_; ++i) {
248 buckets_[i].store(nullptr, std::memory_order_relaxed);
249 }
250 }
251
252 HashSkipListRep::~HashSkipListRep() {
253 }
254
255 HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
256 const Slice& transformed) {
257 size_t hash = GetHash(transformed);
258 auto bucket = GetBucket(hash);
259 if (bucket == nullptr) {
260 auto addr = allocator_->AllocateAligned(sizeof(Bucket));
261 bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_,
262 skiplist_branching_factor_);
263 buckets_[hash].store(bucket, std::memory_order_release);
264 }
265 return bucket;
266 }
267
268 void HashSkipListRep::Insert(KeyHandle handle) {
269 auto* key = static_cast<char*>(handle);
270 assert(!Contains(key));
271 auto transformed = transform_->Transform(UserKey(key));
272 auto bucket = GetInitializedBucket(transformed);
273 bucket->Insert(key);
274 }
275
276 bool HashSkipListRep::Contains(const char* key) const {
277 auto transformed = transform_->Transform(UserKey(key));
278 auto bucket = GetBucket(transformed);
279 if (bucket == nullptr) {
280 return false;
281 }
282 return bucket->Contains(key);
283 }
284
285 size_t HashSkipListRep::ApproximateMemoryUsage() {
286 return 0;
287 }
288
289 void HashSkipListRep::Get(const LookupKey& k, void* callback_args,
290 bool (*callback_func)(void* arg, const char* entry)) {
291 auto transformed = transform_->Transform(k.user_key());
292 auto bucket = GetBucket(transformed);
293 if (bucket != nullptr) {
294 Bucket::Iterator iter(bucket);
295 for (iter.Seek(k.memtable_key().data());
296 iter.Valid() && callback_func(callback_args, iter.key());
297 iter.Next()) {
298 }
299 }
300 }
301
302 MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) {
303 // allocate a new arena of similar size to the one currently in use
304 Arena* new_arena = new Arena(allocator_->BlockSize());
305 auto list = new Bucket(compare_, new_arena);
306 for (size_t i = 0; i < bucket_size_; ++i) {
307 auto bucket = GetBucket(i);
308 if (bucket != nullptr) {
309 Bucket::Iterator itr(bucket);
310 for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
311 list->Insert(itr.key());
312 }
313 }
314 }
315 if (arena == nullptr) {
316 return new Iterator(list, true, new_arena);
317 } else {
318 auto mem = arena->AllocateAligned(sizeof(Iterator));
319 return new (mem) Iterator(list, true, new_arena);
320 }
321 }
322
323 MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) {
324 if (arena == nullptr) {
325 return new DynamicIterator(*this);
326 } else {
327 auto mem = arena->AllocateAligned(sizeof(DynamicIterator));
328 return new (mem) DynamicIterator(*this);
329 }
330 }
331
332 } // anon namespace
333
334 MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
335 const MemTableRep::KeyComparator& compare, Allocator* allocator,
336 const SliceTransform* transform, Logger* /*logger*/) {
337 return new HashSkipListRep(compare, allocator, transform, bucket_count_,
338 skiplist_height_, skiplist_branching_factor_);
339 }
340
341 MemTableRepFactory* NewHashSkipListRepFactory(
342 size_t bucket_count, int32_t skiplist_height,
343 int32_t skiplist_branching_factor) {
344 return new HashSkipListRepFactory(bucket_count, skiplist_height,
345 skiplist_branching_factor);
346 }
347
348 } // namespace rocksdb
349 #endif // ROCKSDB_LITE