]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | ||
7 | #ifndef ROCKSDB_LITE | |
8 | #include "memtable/hash_cuckoo_rep.h" | |
9 | ||
10 | #include <algorithm> | |
11 | #include <atomic> | |
12 | #include <limits> | |
13 | #include <memory> | |
14 | #include <queue> | |
15 | #include <string> | |
16 | #include <vector> | |
17 | ||
18 | #include "db/memtable.h" | |
19 | #include "memtable/skiplist.h" | |
20 | #include "memtable/stl_wrappers.h" | |
21 | #include "port/port.h" | |
22 | #include "rocksdb/memtablerep.h" | |
23 | #include "util/murmurhash.h" | |
24 | ||
25 | namespace rocksdb { | |
26 | namespace { | |
27 | ||
28 | // the default maximum size of the cuckoo path searching queue | |
29 | static const int kCuckooPathMaxSearchSteps = 100; | |
30 | ||
31 | struct CuckooStep { | |
32 | static const int kNullStep = -1; | |
33 | // the bucket id in the cuckoo array. | |
34 | int bucket_id_; | |
35 | // index of cuckoo-step array that points to its previous step, | |
36 | // -1 if it the beginning step. | |
37 | int prev_step_id_; | |
38 | // the depth of the current step. | |
39 | unsigned int depth_; | |
40 | ||
41 | CuckooStep() : bucket_id_(-1), prev_step_id_(kNullStep), depth_(1) {} | |
42 | ||
43 | CuckooStep(CuckooStep&& o) = default; | |
44 | ||
45 | CuckooStep& operator=(CuckooStep&& rhs) { | |
46 | bucket_id_ = std::move(rhs.bucket_id_); | |
47 | prev_step_id_ = std::move(rhs.prev_step_id_); | |
48 | depth_ = std::move(rhs.depth_); | |
49 | return *this; | |
50 | } | |
51 | ||
52 | CuckooStep(const CuckooStep&) = delete; | |
53 | CuckooStep& operator=(const CuckooStep&) = delete; | |
54 | ||
55 | CuckooStep(int bucket_id, int prev_step_id, int depth) | |
56 | : bucket_id_(bucket_id), prev_step_id_(prev_step_id), depth_(depth) {} | |
57 | }; | |
58 | ||
59 | class HashCuckooRep : public MemTableRep { | |
60 | public: | |
61 | explicit HashCuckooRep(const MemTableRep::KeyComparator& compare, | |
11fdf7f2 | 62 | Allocator* allocator, const size_t bucket_count, |
7c673cae FG |
63 | const unsigned int hash_func_count, |
64 | const size_t approximate_entry_size) | |
65 | : MemTableRep(allocator), | |
66 | compare_(compare), | |
67 | allocator_(allocator), | |
68 | bucket_count_(bucket_count), | |
69 | approximate_entry_size_(approximate_entry_size), | |
70 | cuckoo_path_max_depth_(kDefaultCuckooPathMaxDepth), | |
71 | occupied_count_(0), | |
72 | hash_function_count_(hash_func_count), | |
73 | backup_table_(nullptr) { | |
74 | char* mem = reinterpret_cast<char*>( | |
75 | allocator_->Allocate(sizeof(std::atomic<const char*>) * bucket_count_)); | |
76 | cuckoo_array_ = new (mem) std::atomic<char*>[bucket_count_]; | |
77 | for (unsigned int bid = 0; bid < bucket_count_; ++bid) { | |
78 | cuckoo_array_[bid].store(nullptr, std::memory_order_relaxed); | |
79 | } | |
80 | ||
81 | cuckoo_path_ = reinterpret_cast<int*>( | |
82 | allocator_->Allocate(sizeof(int) * (cuckoo_path_max_depth_ + 1))); | |
83 | is_nearly_full_ = false; | |
84 | } | |
85 | ||
86 | // return false, indicating HashCuckooRep does not support merge operator. | |
87 | virtual bool IsMergeOperatorSupported() const override { return false; } | |
88 | ||
89 | // return false, indicating HashCuckooRep does not support snapshot. | |
90 | virtual bool IsSnapshotSupported() const override { return false; } | |
91 | ||
92 | // Returns true iff an entry that compares equal to key is in the collection. | |
93 | virtual bool Contains(const char* internal_key) const override; | |
94 | ||
95 | virtual ~HashCuckooRep() override {} | |
96 | ||
97 | // Insert the specified key (internal_key) into the mem-table. Assertion | |
98 | // fails if | |
99 | // the current mem-table already contains the specified key. | |
100 | virtual void Insert(KeyHandle handle) override; | |
101 | ||
102 | // This function returns bucket_count_ * approximate_entry_size_ when any | |
103 | // of the followings happen to disallow further write operations: | |
104 | // 1. when the fullness reaches kMaxFullnes. | |
105 | // 2. when the backup_table_ is used. | |
106 | // | |
107 | // otherwise, this function will always return 0. | |
108 | virtual size_t ApproximateMemoryUsage() override { | |
109 | if (is_nearly_full_) { | |
110 | return bucket_count_ * approximate_entry_size_; | |
111 | } | |
112 | return 0; | |
113 | } | |
114 | ||
115 | virtual void Get(const LookupKey& k, void* callback_args, | |
116 | bool (*callback_func)(void* arg, | |
117 | const char* entry)) override; | |
118 | ||
119 | class Iterator : public MemTableRep::Iterator { | |
120 | std::shared_ptr<std::vector<const char*>> bucket_; | |
121 | std::vector<const char*>::const_iterator mutable cit_; | |
122 | const KeyComparator& compare_; | |
123 | std::string tmp_; // For passing to EncodeKey | |
124 | bool mutable sorted_; | |
125 | void DoSort() const; | |
126 | ||
127 | public: | |
128 | explicit Iterator(std::shared_ptr<std::vector<const char*>> bucket, | |
129 | const KeyComparator& compare); | |
130 | ||
131 | // Initialize an iterator over the specified collection. | |
132 | // The returned iterator is not valid. | |
133 | // explicit Iterator(const MemTableRep* collection); | |
134 | virtual ~Iterator() override{}; | |
135 | ||
136 | // Returns true iff the iterator is positioned at a valid node. | |
137 | virtual bool Valid() const override; | |
138 | ||
139 | // Returns the key at the current position. | |
140 | // REQUIRES: Valid() | |
141 | virtual const char* key() const override; | |
142 | ||
143 | // Advances to the next position. | |
144 | // REQUIRES: Valid() | |
145 | virtual void Next() override; | |
146 | ||
147 | // Advances to the previous position. | |
148 | // REQUIRES: Valid() | |
149 | virtual void Prev() override; | |
150 | ||
151 | // Advance to the first entry with a key >= target | |
152 | virtual void Seek(const Slice& user_key, const char* memtable_key) override; | |
153 | ||
154 | // Retreat to the last entry with a key <= target | |
155 | virtual void SeekForPrev(const Slice& user_key, | |
156 | const char* memtable_key) override; | |
157 | ||
158 | // Position at the first entry in collection. | |
159 | // Final state of iterator is Valid() iff collection is not empty. | |
160 | virtual void SeekToFirst() override; | |
161 | ||
162 | // Position at the last entry in collection. | |
163 | // Final state of iterator is Valid() iff collection is not empty. | |
164 | virtual void SeekToLast() override; | |
165 | }; | |
166 | ||
167 | struct CuckooStepBuffer { | |
168 | CuckooStepBuffer() : write_index_(0), read_index_(0) {} | |
169 | ~CuckooStepBuffer() {} | |
170 | ||
171 | int write_index_; | |
172 | int read_index_; | |
173 | CuckooStep steps_[kCuckooPathMaxSearchSteps]; | |
174 | ||
175 | CuckooStep& NextWriteBuffer() { return steps_[write_index_++]; } | |
176 | ||
177 | inline const CuckooStep& ReadNext() { return steps_[read_index_++]; } | |
178 | ||
179 | inline bool HasNewWrite() { return write_index_ > read_index_; } | |
180 | ||
181 | inline void reset() { | |
182 | write_index_ = 0; | |
183 | read_index_ = 0; | |
184 | } | |
185 | ||
186 | inline bool IsFull() { return write_index_ >= kCuckooPathMaxSearchSteps; } | |
187 | ||
188 | // returns the number of steps that has been read | |
189 | inline int ReadCount() { return read_index_; } | |
190 | ||
191 | // returns the number of steps that has been written to the buffer. | |
192 | inline int WriteCount() { return write_index_; } | |
193 | }; | |
194 | ||
195 | private: | |
196 | const MemTableRep::KeyComparator& compare_; | |
197 | // the pointer to Allocator to allocate memory, immutable after construction. | |
11fdf7f2 | 198 | Allocator* const allocator_; |
7c673cae FG |
199 | // the number of hash bucket in the hash table. |
200 | const size_t bucket_count_; | |
201 | // approximate size of each entry | |
202 | const size_t approximate_entry_size_; | |
203 | // the maxinum depth of the cuckoo path. | |
204 | const unsigned int cuckoo_path_max_depth_; | |
205 | // the current number of entries in cuckoo_array_ which has been occupied. | |
206 | size_t occupied_count_; | |
207 | // the current number of hash functions used in the cuckoo hash. | |
208 | unsigned int hash_function_count_; | |
209 | // the backup MemTableRep to handle the case where cuckoo hash cannot find | |
210 | // a vacant bucket for inserting the key of a put request. | |
211 | std::shared_ptr<MemTableRep> backup_table_; | |
212 | // the array to store pointers, pointing to the actual data. | |
213 | std::atomic<char*>* cuckoo_array_; | |
214 | // a buffer to store cuckoo path | |
215 | int* cuckoo_path_; | |
216 | // a boolean flag indicating whether the fullness of bucket array | |
217 | // reaches the point to make the current memtable immutable. | |
218 | bool is_nearly_full_; | |
219 | ||
220 | // the default maximum depth of the cuckoo path. | |
221 | static const unsigned int kDefaultCuckooPathMaxDepth = 10; | |
222 | ||
223 | CuckooStepBuffer step_buffer_; | |
224 | ||
225 | // returns the bucket id assogied to the input slice based on the | |
226 | unsigned int GetHash(const Slice& slice, const int hash_func_id) const { | |
227 | // the seeds used in the Murmur hash to produce different hash functions. | |
228 | static const int kMurmurHashSeeds[HashCuckooRepFactory::kMaxHashCount] = { | |
229 | 545609244, 1769731426, 763324157, 13099088, 592422103, | |
230 | 1899789565, 248369300, 1984183468, 1613664382, 1491157517}; | |
231 | return static_cast<unsigned int>( | |
232 | MurmurHash(slice.data(), static_cast<int>(slice.size()), | |
233 | kMurmurHashSeeds[hash_func_id]) % | |
234 | bucket_count_); | |
235 | } | |
236 | ||
237 | // A cuckoo path is a sequence of bucket ids, where each id points to a | |
238 | // location of cuckoo_array_. This path describes the displacement sequence | |
239 | // of entries in order to store the desired data specified by the input user | |
240 | // key. The path starts from one of the locations associated with the | |
241 | // specified user key and ends at a vacant space in the cuckoo array. This | |
242 | // function will update the cuckoo_path. | |
243 | // | |
244 | // @return true if it found a cuckoo path. | |
245 | bool FindCuckooPath(const char* internal_key, const Slice& user_key, | |
246 | int* cuckoo_path, size_t* cuckoo_path_length, | |
247 | int initial_hash_id = 0); | |
248 | ||
249 | // Perform quick insert by checking whether there is a vacant bucket in one | |
250 | // of the possible locations of the input key. If so, then the function will | |
251 | // return true and the key will be stored in that vacant bucket. | |
252 | // | |
253 | // This function is a helper function of FindCuckooPath that discovers the | |
254 | // first possible steps of a cuckoo path. It begins by first computing | |
255 | // the possible locations of the input keys (and stores them in bucket_ids.) | |
256 | // Then, if one of its possible locations is vacant, then the input key will | |
257 | // be stored in that vacant space and the function will return true. | |
258 | // Otherwise, the function will return false indicating a complete search | |
259 | // of cuckoo-path is needed. | |
260 | bool QuickInsert(const char* internal_key, const Slice& user_key, | |
261 | int bucket_ids[], const int initial_hash_id); | |
262 | ||
263 | // Returns the pointer to the internal iterator to the buckets where buckets | |
264 | // are sorted according to the user specified KeyComparator. Note that | |
265 | // any insert after this function call may affect the sorted nature of | |
266 | // the returned iterator. | |
267 | virtual MemTableRep::Iterator* GetIterator(Arena* arena) override { | |
268 | std::vector<const char*> compact_buckets; | |
269 | for (unsigned int bid = 0; bid < bucket_count_; ++bid) { | |
270 | const char* bucket = cuckoo_array_[bid].load(std::memory_order_relaxed); | |
271 | if (bucket != nullptr) { | |
272 | compact_buckets.push_back(bucket); | |
273 | } | |
274 | } | |
275 | MemTableRep* backup_table = backup_table_.get(); | |
276 | if (backup_table != nullptr) { | |
277 | std::unique_ptr<MemTableRep::Iterator> iter(backup_table->GetIterator()); | |
278 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |
279 | compact_buckets.push_back(iter->key()); | |
280 | } | |
281 | } | |
282 | if (arena == nullptr) { | |
283 | return new Iterator( | |
284 | std::shared_ptr<std::vector<const char*>>( | |
285 | new std::vector<const char*>(std::move(compact_buckets))), | |
286 | compare_); | |
287 | } else { | |
288 | auto mem = arena->AllocateAligned(sizeof(Iterator)); | |
289 | return new (mem) Iterator( | |
290 | std::shared_ptr<std::vector<const char*>>( | |
291 | new std::vector<const char*>(std::move(compact_buckets))), | |
292 | compare_); | |
293 | } | |
294 | } | |
295 | }; | |
296 | ||
297 | void HashCuckooRep::Get(const LookupKey& key, void* callback_args, | |
298 | bool (*callback_func)(void* arg, const char* entry)) { | |
299 | Slice user_key = key.user_key(); | |
300 | for (unsigned int hid = 0; hid < hash_function_count_; ++hid) { | |
301 | const char* bucket = | |
302 | cuckoo_array_[GetHash(user_key, hid)].load(std::memory_order_acquire); | |
303 | if (bucket != nullptr) { | |
304 | Slice bucket_user_key = UserKey(bucket); | |
305 | if (user_key == bucket_user_key) { | |
306 | callback_func(callback_args, bucket); | |
307 | break; | |
308 | } | |
309 | } else { | |
310 | // as Put() always stores at the vacant bucket located by the | |
311 | // hash function with the smallest possible id, when we first | |
312 | // find a vacant bucket in Get(), that means a miss. | |
313 | break; | |
314 | } | |
315 | } | |
316 | MemTableRep* backup_table = backup_table_.get(); | |
317 | if (backup_table != nullptr) { | |
318 | backup_table->Get(key, callback_args, callback_func); | |
319 | } | |
320 | } | |
321 | ||
322 | void HashCuckooRep::Insert(KeyHandle handle) { | |
323 | static const float kMaxFullness = 0.90f; | |
324 | ||
325 | auto* key = static_cast<char*>(handle); | |
326 | int initial_hash_id = 0; | |
327 | size_t cuckoo_path_length = 0; | |
328 | auto user_key = UserKey(key); | |
329 | // find cuckoo path | |
330 | if (FindCuckooPath(key, user_key, cuckoo_path_, &cuckoo_path_length, | |
331 | initial_hash_id) == false) { | |
332 | // if true, then we can't find a vacant bucket for this key even we | |
333 | // have used up all the hash functions. Then use a backup memtable to | |
334 | // store such key, which will further make this mem-table become | |
335 | // immutable. | |
336 | if (backup_table_.get() == nullptr) { | |
337 | VectorRepFactory factory(10); | |
338 | backup_table_.reset( | |
339 | factory.CreateMemTableRep(compare_, allocator_, nullptr, nullptr)); | |
340 | is_nearly_full_ = true; | |
341 | } | |
342 | backup_table_->Insert(key); | |
343 | return; | |
344 | } | |
345 | // when reaching this point, means the insert can be done successfully. | |
346 | occupied_count_++; | |
347 | if (occupied_count_ >= bucket_count_ * kMaxFullness) { | |
348 | is_nearly_full_ = true; | |
349 | } | |
350 | ||
351 | // perform kickout process if the length of cuckoo path > 1. | |
352 | if (cuckoo_path_length == 0) return; | |
353 | ||
354 | // the cuckoo path stores the kickout path in reverse order. | |
355 | // so the kickout or displacement is actually performed | |
356 | // in reverse order, which avoids false-negatives on read | |
357 | // by moving each key involved in the cuckoo path to the new | |
358 | // location before replacing it. | |
359 | for (size_t i = 1; i < cuckoo_path_length; ++i) { | |
360 | int kicked_out_bid = cuckoo_path_[i - 1]; | |
361 | int current_bid = cuckoo_path_[i]; | |
362 | // since we only allow one writer at a time, it is safe to do relaxed read. | |
363 | cuckoo_array_[kicked_out_bid] | |
364 | .store(cuckoo_array_[current_bid].load(std::memory_order_relaxed), | |
365 | std::memory_order_release); | |
366 | } | |
367 | int insert_key_bid = cuckoo_path_[cuckoo_path_length - 1]; | |
368 | cuckoo_array_[insert_key_bid].store(key, std::memory_order_release); | |
369 | } | |
370 | ||
371 | bool HashCuckooRep::Contains(const char* internal_key) const { | |
372 | auto user_key = UserKey(internal_key); | |
373 | for (unsigned int hid = 0; hid < hash_function_count_; ++hid) { | |
374 | const char* stored_key = | |
375 | cuckoo_array_[GetHash(user_key, hid)].load(std::memory_order_acquire); | |
376 | if (stored_key != nullptr) { | |
377 | if (compare_(internal_key, stored_key) == 0) { | |
378 | return true; | |
379 | } | |
380 | } | |
381 | } | |
382 | return false; | |
383 | } | |
384 | ||
385 | bool HashCuckooRep::QuickInsert(const char* internal_key, const Slice& user_key, | |
386 | int bucket_ids[], const int initial_hash_id) { | |
387 | int cuckoo_bucket_id = -1; | |
388 | ||
389 | // Below does the followings: | |
390 | // 0. Calculate all possible locations of the input key. | |
391 | // 1. Check if there is a bucket having same user_key as the input does. | |
392 | // 2. If there exists such bucket, then replace this bucket by the newly | |
393 | // insert data and return. This step also performs duplication check. | |
394 | // 3. If no such bucket exists but exists a vacant bucket, then insert the | |
395 | // input data into it. | |
396 | // 4. If step 1 to 3 all fail, then return false. | |
397 | for (unsigned int hid = initial_hash_id; hid < hash_function_count_; ++hid) { | |
398 | bucket_ids[hid] = GetHash(user_key, hid); | |
399 | // since only one PUT is allowed at a time, and this is part of the PUT | |
400 | // operation, so we can safely perform relaxed load. | |
401 | const char* stored_key = | |
402 | cuckoo_array_[bucket_ids[hid]].load(std::memory_order_relaxed); | |
403 | if (stored_key == nullptr) { | |
404 | if (cuckoo_bucket_id == -1) { | |
405 | cuckoo_bucket_id = bucket_ids[hid]; | |
406 | } | |
407 | } else { | |
408 | const auto bucket_user_key = UserKey(stored_key); | |
409 | if (bucket_user_key.compare(user_key) == 0) { | |
410 | cuckoo_bucket_id = bucket_ids[hid]; | |
11fdf7f2 | 411 | assert(cuckoo_bucket_id != -1); |
7c673cae FG |
412 | break; |
413 | } | |
414 | } | |
415 | } | |
416 | ||
417 | if (cuckoo_bucket_id != -1) { | |
418 | cuckoo_array_[cuckoo_bucket_id].store(const_cast<char*>(internal_key), | |
419 | std::memory_order_release); | |
420 | return true; | |
421 | } | |
422 | ||
423 | return false; | |
424 | } | |
425 | ||
426 | // Perform pre-check and find the shortest cuckoo path. A cuckoo path | |
427 | // is a displacement sequence for inserting the specified input key. | |
428 | // | |
429 | // @return true if it successfully found a vacant space or cuckoo-path. | |
430 | // If the return value is true but the length of cuckoo_path is zero, | |
431 | // then it indicates that a vacant bucket or an bucket with matched user | |
432 | // key with the input is found, and a quick insertion is done. | |
433 | bool HashCuckooRep::FindCuckooPath(const char* internal_key, | |
434 | const Slice& user_key, int* cuckoo_path, | |
435 | size_t* cuckoo_path_length, | |
436 | const int initial_hash_id) { | |
437 | int bucket_ids[HashCuckooRepFactory::kMaxHashCount]; | |
438 | *cuckoo_path_length = 0; | |
439 | ||
440 | if (QuickInsert(internal_key, user_key, bucket_ids, initial_hash_id)) { | |
441 | return true; | |
442 | } | |
443 | // If this step is reached, then it means: | |
444 | // 1. no vacant bucket in any of the possible locations of the input key. | |
445 | // 2. none of the possible locations of the input key has the same user | |
446 | // key as the input `internal_key`. | |
447 | ||
448 | // the front and back indices for the step_queue_ | |
449 | step_buffer_.reset(); | |
450 | ||
451 | for (unsigned int hid = initial_hash_id; hid < hash_function_count_; ++hid) { | |
452 | /// CuckooStep& current_step = step_queue_[front_pos++]; | |
453 | CuckooStep& current_step = step_buffer_.NextWriteBuffer(); | |
454 | current_step.bucket_id_ = bucket_ids[hid]; | |
455 | current_step.prev_step_id_ = CuckooStep::kNullStep; | |
456 | current_step.depth_ = 1; | |
457 | } | |
458 | ||
459 | while (step_buffer_.HasNewWrite()) { | |
460 | int step_id = step_buffer_.read_index_; | |
461 | const CuckooStep& step = step_buffer_.ReadNext(); | |
462 | // Since it's a BFS process, then the first step with its depth deeper | |
463 | // than the maximum allowed depth indicates all the remaining steps | |
464 | // in the step buffer queue will all exceed the maximum depth. | |
465 | // Return false immediately indicating we can't find a vacant bucket | |
466 | // for the input key before the maximum allowed depth. | |
467 | if (step.depth_ >= cuckoo_path_max_depth_) { | |
468 | return false; | |
469 | } | |
470 | // again, we can perform no barrier load safely here as the current | |
471 | // thread is the only writer. | |
472 | Slice bucket_user_key = | |
473 | UserKey(cuckoo_array_[step.bucket_id_].load(std::memory_order_relaxed)); | |
474 | if (step.prev_step_id_ != CuckooStep::kNullStep) { | |
475 | if (bucket_user_key == user_key) { | |
476 | // then there is a loop in the current path, stop discovering this path. | |
477 | continue; | |
478 | } | |
479 | } | |
480 | // if the current bucket stores at its nth location, then we only consider | |
481 | // its mth location where m > n. This property makes sure that all reads | |
482 | // will not miss if we do have data associated to the query key. | |
483 | // | |
484 | // The n and m in the above statement is the start_hid and hid in the code. | |
485 | unsigned int start_hid = hash_function_count_; | |
486 | for (unsigned int hid = 0; hid < hash_function_count_; ++hid) { | |
487 | bucket_ids[hid] = GetHash(bucket_user_key, hid); | |
488 | if (step.bucket_id_ == bucket_ids[hid]) { | |
489 | start_hid = hid; | |
490 | } | |
491 | } | |
492 | // must found a bucket which is its current "home". | |
493 | assert(start_hid != hash_function_count_); | |
494 | ||
495 | // explore all possible next steps from the current step. | |
496 | for (unsigned int hid = start_hid + 1; hid < hash_function_count_; ++hid) { | |
497 | CuckooStep& next_step = step_buffer_.NextWriteBuffer(); | |
498 | next_step.bucket_id_ = bucket_ids[hid]; | |
499 | next_step.prev_step_id_ = step_id; | |
500 | next_step.depth_ = step.depth_ + 1; | |
501 | // once a vacant bucket is found, trace back all its previous steps | |
502 | // to generate a cuckoo path. | |
503 | if (cuckoo_array_[next_step.bucket_id_].load(std::memory_order_relaxed) == | |
504 | nullptr) { | |
505 | // store the last step in the cuckoo path. Note that cuckoo_path | |
506 | // stores steps in reverse order. This allows us to move keys along | |
507 | // the cuckoo path by storing each key to the new place first before | |
508 | // removing it from the old place. This property ensures reads will | |
509 | // not missed due to moving keys along the cuckoo path. | |
510 | cuckoo_path[(*cuckoo_path_length)++] = next_step.bucket_id_; | |
511 | int depth; | |
512 | for (depth = step.depth_; depth > 0 && step_id != CuckooStep::kNullStep; | |
513 | depth--) { | |
514 | const CuckooStep& prev_step = step_buffer_.steps_[step_id]; | |
515 | cuckoo_path[(*cuckoo_path_length)++] = prev_step.bucket_id_; | |
516 | step_id = prev_step.prev_step_id_; | |
517 | } | |
518 | assert(depth == 0 && step_id == CuckooStep::kNullStep); | |
519 | return true; | |
520 | } | |
521 | if (step_buffer_.IsFull()) { | |
522 | // if true, then it reaches maxinum number of cuckoo search steps. | |
523 | return false; | |
524 | } | |
525 | } | |
526 | } | |
527 | ||
528 | // tried all possible paths but still not unable to find a cuckoo path | |
529 | // which path leads to a vacant bucket. | |
530 | return false; | |
531 | } | |
532 | ||
533 | HashCuckooRep::Iterator::Iterator( | |
534 | std::shared_ptr<std::vector<const char*>> bucket, | |
535 | const KeyComparator& compare) | |
536 | : bucket_(bucket), | |
537 | cit_(bucket_->end()), | |
538 | compare_(compare), | |
539 | sorted_(false) {} | |
540 | ||
541 | void HashCuckooRep::Iterator::DoSort() const { | |
542 | if (!sorted_) { | |
543 | std::sort(bucket_->begin(), bucket_->end(), | |
544 | stl_wrappers::Compare(compare_)); | |
545 | cit_ = bucket_->begin(); | |
546 | sorted_ = true; | |
547 | } | |
548 | } | |
549 | ||
550 | // Returns true iff the iterator is positioned at a valid node. | |
551 | bool HashCuckooRep::Iterator::Valid() const { | |
552 | DoSort(); | |
553 | return cit_ != bucket_->end(); | |
554 | } | |
555 | ||
556 | // Returns the key at the current position. | |
557 | // REQUIRES: Valid() | |
558 | const char* HashCuckooRep::Iterator::key() const { | |
559 | assert(Valid()); | |
560 | return *cit_; | |
561 | } | |
562 | ||
563 | // Advances to the next position. | |
564 | // REQUIRES: Valid() | |
565 | void HashCuckooRep::Iterator::Next() { | |
566 | assert(Valid()); | |
567 | if (cit_ == bucket_->end()) { | |
568 | return; | |
569 | } | |
570 | ++cit_; | |
571 | } | |
572 | ||
573 | // Advances to the previous position. | |
574 | // REQUIRES: Valid() | |
575 | void HashCuckooRep::Iterator::Prev() { | |
576 | assert(Valid()); | |
577 | if (cit_ == bucket_->begin()) { | |
578 | // If you try to go back from the first element, the iterator should be | |
579 | // invalidated. So we set it to past-the-end. This means that you can | |
580 | // treat the container circularly. | |
581 | cit_ = bucket_->end(); | |
582 | } else { | |
583 | --cit_; | |
584 | } | |
585 | } | |
586 | ||
587 | // Advance to the first entry with a key >= target | |
588 | void HashCuckooRep::Iterator::Seek(const Slice& user_key, | |
589 | const char* memtable_key) { | |
590 | DoSort(); | |
591 | // Do binary search to find first value not less than the target | |
592 | const char* encoded_key = | |
593 | (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key); | |
594 | cit_ = std::equal_range(bucket_->begin(), bucket_->end(), encoded_key, | |
595 | [this](const char* a, const char* b) { | |
596 | return compare_(a, b) < 0; | |
597 | }).first; | |
598 | } | |
599 | ||
600 | // Retreat to the last entry with a key <= target | |
11fdf7f2 TL |
601 | void HashCuckooRep::Iterator::SeekForPrev(const Slice& /*user_key*/, |
602 | const char* /*memtable_key*/) { | |
7c673cae FG |
603 | assert(false); |
604 | } | |
605 | ||
606 | // Position at the first entry in collection. | |
607 | // Final state of iterator is Valid() iff collection is not empty. | |
608 | void HashCuckooRep::Iterator::SeekToFirst() { | |
609 | DoSort(); | |
610 | cit_ = bucket_->begin(); | |
611 | } | |
612 | ||
613 | // Position at the last entry in collection. | |
614 | // Final state of iterator is Valid() iff collection is not empty. | |
615 | void HashCuckooRep::Iterator::SeekToLast() { | |
616 | DoSort(); | |
617 | cit_ = bucket_->end(); | |
618 | if (bucket_->size() != 0) { | |
619 | --cit_; | |
620 | } | |
621 | } | |
622 | ||
623 | } // anom namespace | |
624 | ||
625 | MemTableRep* HashCuckooRepFactory::CreateMemTableRep( | |
11fdf7f2 TL |
626 | const MemTableRep::KeyComparator& compare, Allocator* allocator, |
627 | const SliceTransform* /*transform*/, Logger* /*logger*/) { | |
7c673cae FG |
628 | // The estimated average fullness. The write performance of any close hash |
629 | // degrades as the fullness of the mem-table increases. Setting kFullness | |
630 | // to a value around 0.7 can better avoid write performance degradation while | |
631 | // keeping efficient memory usage. | |
632 | static const float kFullness = 0.7f; | |
633 | size_t pointer_size = sizeof(std::atomic<const char*>); | |
634 | assert(write_buffer_size_ >= (average_data_size_ + pointer_size)); | |
635 | size_t bucket_count = | |
636 | static_cast<size_t>( | |
637 | (write_buffer_size_ / (average_data_size_ + pointer_size)) / kFullness + | |
638 | 1); | |
639 | unsigned int hash_function_count = hash_function_count_; | |
640 | if (hash_function_count < 2) { | |
641 | hash_function_count = 2; | |
642 | } | |
643 | if (hash_function_count > kMaxHashCount) { | |
644 | hash_function_count = kMaxHashCount; | |
645 | } | |
646 | return new HashCuckooRep(compare, allocator, bucket_count, | |
647 | hash_function_count, | |
648 | static_cast<size_t>( | |
649 | (average_data_size_ + pointer_size) / kFullness) | |
650 | ); | |
651 | } | |
652 | ||
653 | MemTableRepFactory* NewHashCuckooRepFactory(size_t write_buffer_size, | |
654 | size_t average_data_size, | |
655 | unsigned int hash_function_count) { | |
656 | return new HashCuckooRepFactory(write_buffer_size, average_data_size, | |
657 | hash_function_count); | |
658 | } | |
659 | ||
660 | } // namespace rocksdb | |
661 | #endif // ROCKSDB_LITE |