]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae | 5 | // |
1e59de90 TL |
6 | #include <random> |
7 | ||
7c673cae | 8 | #include "db/memtable.h" |
f67539c2 TL |
9 | #include "memory/arena.h" |
10 | #include "memtable/inlineskiplist.h" | |
7c673cae | 11 | #include "rocksdb/memtablerep.h" |
1e59de90 TL |
12 | #include "rocksdb/utilities/options_type.h" |
13 | #include "util/string_util.h" | |
7c673cae | 14 | |
f67539c2 | 15 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
16 | namespace { |
17 | class SkipListRep : public MemTableRep { | |
18 | InlineSkipList<const MemTableRep::KeyComparator&> skip_list_; | |
19 | const MemTableRep::KeyComparator& cmp_; | |
20 | const SliceTransform* transform_; | |
21 | const size_t lookahead_; | |
22 | ||
23 | friend class LookaheadIterator; | |
1e59de90 TL |
24 | |
25 | public: | |
26 | explicit SkipListRep(const MemTableRep::KeyComparator& compare, | |
27 | Allocator* allocator, const SliceTransform* transform, | |
28 | const size_t lookahead) | |
29 | : MemTableRep(allocator), | |
30 | skip_list_(compare, allocator), | |
31 | cmp_(compare), | |
32 | transform_(transform), | |
33 | lookahead_(lookahead) {} | |
34 | ||
35 | KeyHandle Allocate(const size_t len, char** buf) override { | |
36 | *buf = skip_list_.AllocateKey(len); | |
37 | return static_cast<KeyHandle>(*buf); | |
38 | } | |
7c673cae FG |
39 | |
40 | // Insert key into the list. | |
41 | // REQUIRES: nothing that compares equal to key is currently in the list. | |
1e59de90 TL |
42 | void Insert(KeyHandle handle) override { |
43 | skip_list_.Insert(static_cast<char*>(handle)); | |
44 | } | |
7c673cae | 45 | |
1e59de90 TL |
46 | bool InsertKey(KeyHandle handle) override { |
47 | return skip_list_.Insert(static_cast<char*>(handle)); | |
48 | } | |
11fdf7f2 | 49 | |
1e59de90 TL |
50 | void InsertWithHint(KeyHandle handle, void** hint) override { |
51 | skip_list_.InsertWithHint(static_cast<char*>(handle), hint); | |
52 | } | |
7c673cae | 53 | |
1e59de90 TL |
54 | bool InsertKeyWithHint(KeyHandle handle, void** hint) override { |
55 | return skip_list_.InsertWithHint(static_cast<char*>(handle), hint); | |
56 | } | |
11fdf7f2 | 57 | |
1e59de90 TL |
58 | void InsertWithHintConcurrently(KeyHandle handle, void** hint) override { |
59 | skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint); | |
60 | } | |
f67539c2 | 61 | |
1e59de90 TL |
62 | bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override { |
63 | return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), | |
64 | hint); | |
65 | } | |
f67539c2 | 66 | |
1e59de90 TL |
67 | void InsertConcurrently(KeyHandle handle) override { |
68 | skip_list_.InsertConcurrently(static_cast<char*>(handle)); | |
69 | } | |
7c673cae | 70 | |
1e59de90 TL |
71 | bool InsertKeyConcurrently(KeyHandle handle) override { |
72 | return skip_list_.InsertConcurrently(static_cast<char*>(handle)); | |
73 | } | |
11fdf7f2 | 74 | |
7c673cae | 75 | // Returns true iff an entry that compares equal to key is in the list. |
1e59de90 TL |
76 | bool Contains(const char* key) const override { |
77 | return skip_list_.Contains(key); | |
78 | } | |
79 | ||
80 | size_t ApproximateMemoryUsage() override { | |
81 | // All memory is allocated through allocator; nothing to report here | |
82 | return 0; | |
83 | } | |
84 | ||
85 | void Get(const LookupKey& k, void* callback_args, | |
86 | bool (*callback_func)(void* arg, const char* entry)) override { | |
87 | SkipListRep::Iterator iter(&skip_list_); | |
88 | Slice dummy_slice; | |
89 | for (iter.Seek(dummy_slice, k.memtable_key().data()); | |
90 | iter.Valid() && callback_func(callback_args, iter.key()); | |
91 | iter.Next()) { | |
92 | } | |
93 | } | |
7c673cae FG |
94 | |
95 | uint64_t ApproximateNumEntries(const Slice& start_ikey, | |
96 | const Slice& end_ikey) override { | |
97 | std::string tmp; | |
98 | uint64_t start_count = | |
99 | skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey)); | |
100 | uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey)); | |
101 | return (end_count >= start_count) ? (end_count - start_count) : 0; | |
102 | } | |
103 | ||
1e59de90 TL |
104 | void UniqueRandomSample(const uint64_t num_entries, |
105 | const uint64_t target_sample_size, | |
106 | std::unordered_set<const char*>* entries) override { | |
107 | entries->clear(); | |
108 | // Avoid divide-by-0. | |
109 | assert(target_sample_size > 0); | |
110 | assert(num_entries > 0); | |
111 | // NOTE: the size of entries is not enforced to be exactly | |
112 | // target_sample_size at the end of this function, it might be slightly | |
113 | // greater or smaller. | |
114 | SkipListRep::Iterator iter(&skip_list_); | |
115 | // There are two methods to create the subset of samples (size m) | |
116 | // from the table containing N elements: | |
117 | // 1-Iterate linearly through the N memtable entries. For each entry i, | |
118 | // add it to the sample set with a probability | |
119 | // (target_sample_size - entries.size() ) / (N-i). | |
120 | // | |
121 | // 2-Pick m random elements without repetition. | |
122 | // We pick Option 2 when m<sqrt(N) and | |
123 | // Option 1 when m > sqrt(N). | |
124 | if (target_sample_size > | |
125 | static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) { | |
126 | Random* rnd = Random::GetTLSInstance(); | |
127 | iter.SeekToFirst(); | |
128 | uint64_t counter = 0, num_samples_left = target_sample_size; | |
129 | for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) { | |
130 | // Add entry to sample set with probability | |
131 | // num_samples_left/(num_entries - counter). | |
132 | if (rnd->Next() % (num_entries - counter) < num_samples_left) { | |
133 | entries->insert(iter.key()); | |
134 | num_samples_left--; | |
135 | } | |
136 | } | |
137 | } else { | |
138 | // Option 2: pick m random elements with no duplicates. | |
139 | // If Option 2 is picked, then target_sample_size<sqrt(N) | |
140 | // Using a set spares the need to check for duplicates. | |
141 | for (uint64_t i = 0; i < target_sample_size; i++) { | |
142 | // We give it 5 attempts to find a non-duplicate | |
143 | // With 5 attempts, the chances of returning `entries` set | |
144 | // of size target_sample_size is: | |
145 | // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5] | |
146 | // which is monotonically increasing with N in the worse case | |
147 | // of target_sample_size=sqrt(N), and is always >99.9% for N>4. | |
148 | // At worst, for the final pick , when m=sqrt(N) there is | |
149 | // a probability of p= 1/sqrt(N) chances to find a duplicate. | |
150 | for (uint64_t j = 0; j < 5; j++) { | |
151 | iter.RandomSeek(); | |
152 | // unordered_set::insert returns pair<iterator, bool>. | |
153 | // The second element is true if an insert successfully happened. | |
154 | // If element is already in the set, this bool will be false, and | |
155 | // true otherwise. | |
156 | if ((entries->insert(iter.key())).second) { | |
157 | break; | |
158 | } | |
159 | } | |
160 | } | |
161 | } | |
162 | } | |
163 | ||
494da23a | 164 | ~SkipListRep() override {} |
7c673cae FG |
165 | |
166 | // Iteration over the contents of a skip list | |
167 | class Iterator : public MemTableRep::Iterator { | |
168 | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; | |
169 | ||
170 | public: | |
171 | // Initialize an iterator over the specified list. | |
172 | // The returned iterator is not valid. | |
173 | explicit Iterator( | |
174 | const InlineSkipList<const MemTableRep::KeyComparator&>* list) | |
175 | : iter_(list) {} | |
176 | ||
494da23a | 177 | ~Iterator() override {} |
7c673cae FG |
178 | |
179 | // Returns true iff the iterator is positioned at a valid node. | |
494da23a | 180 | bool Valid() const override { return iter_.Valid(); } |
7c673cae FG |
181 | |
182 | // Returns the key at the current position. | |
183 | // REQUIRES: Valid() | |
494da23a | 184 | const char* key() const override { return iter_.key(); } |
7c673cae FG |
185 | |
186 | // Advances to the next position. | |
187 | // REQUIRES: Valid() | |
494da23a | 188 | void Next() override { iter_.Next(); } |
7c673cae FG |
189 | |
190 | // Advances to the previous position. | |
191 | // REQUIRES: Valid() | |
494da23a | 192 | void Prev() override { iter_.Prev(); } |
7c673cae FG |
193 | |
194 | // Advance to the first entry with a key >= target | |
494da23a | 195 | void Seek(const Slice& user_key, const char* memtable_key) override { |
7c673cae FG |
196 | if (memtable_key != nullptr) { |
197 | iter_.Seek(memtable_key); | |
198 | } else { | |
199 | iter_.Seek(EncodeKey(&tmp_, user_key)); | |
200 | } | |
201 | } | |
202 | ||
203 | // Retreat to the last entry with a key <= target | |
494da23a | 204 | void SeekForPrev(const Slice& user_key, const char* memtable_key) override { |
7c673cae FG |
205 | if (memtable_key != nullptr) { |
206 | iter_.SeekForPrev(memtable_key); | |
207 | } else { | |
208 | iter_.SeekForPrev(EncodeKey(&tmp_, user_key)); | |
209 | } | |
210 | } | |
211 | ||
1e59de90 TL |
212 | void RandomSeek() override { iter_.RandomSeek(); } |
213 | ||
7c673cae FG |
214 | // Position at the first entry in list. |
215 | // Final state of iterator is Valid() iff list is not empty. | |
494da23a | 216 | void SeekToFirst() override { iter_.SeekToFirst(); } |
7c673cae FG |
217 | |
218 | // Position at the last entry in list. | |
219 | // Final state of iterator is Valid() iff list is not empty. | |
494da23a TL |
220 | void SeekToLast() override { iter_.SeekToLast(); } |
221 | ||
7c673cae | 222 | protected: |
1e59de90 | 223 | std::string tmp_; // For passing to EncodeKey |
7c673cae FG |
224 | }; |
225 | ||
226 | // Iterator over the contents of a skip list which also keeps track of the | |
227 | // previously visited node. In Seek(), it examines a few nodes after it | |
228 | // first, falling back to O(log n) search from the head of the list only if | |
229 | // the target key hasn't been found. | |
230 | class LookaheadIterator : public MemTableRep::Iterator { | |
231 | public: | |
1e59de90 TL |
232 | explicit LookaheadIterator(const SkipListRep& rep) |
233 | : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {} | |
7c673cae | 234 | |
494da23a | 235 | ~LookaheadIterator() override {} |
7c673cae | 236 | |
494da23a | 237 | bool Valid() const override { return iter_.Valid(); } |
7c673cae | 238 | |
494da23a | 239 | const char* key() const override { |
7c673cae FG |
240 | assert(Valid()); |
241 | return iter_.key(); | |
242 | } | |
243 | ||
494da23a | 244 | void Next() override { |
7c673cae FG |
245 | assert(Valid()); |
246 | ||
247 | bool advance_prev = true; | |
248 | if (prev_.Valid()) { | |
249 | auto k1 = rep_.UserKey(prev_.key()); | |
250 | auto k2 = rep_.UserKey(iter_.key()); | |
251 | ||
252 | if (k1.compare(k2) == 0) { | |
253 | // same user key, don't move prev_ | |
254 | advance_prev = false; | |
255 | } else if (rep_.transform_) { | |
256 | // only advance prev_ if it has the same prefix as iter_ | |
257 | auto t1 = rep_.transform_->Transform(k1); | |
258 | auto t2 = rep_.transform_->Transform(k2); | |
259 | advance_prev = t1.compare(t2) == 0; | |
260 | } | |
261 | } | |
262 | ||
263 | if (advance_prev) { | |
264 | prev_ = iter_; | |
265 | } | |
266 | iter_.Next(); | |
267 | } | |
268 | ||
494da23a | 269 | void Prev() override { |
7c673cae FG |
270 | assert(Valid()); |
271 | iter_.Prev(); | |
272 | prev_ = iter_; | |
273 | } | |
274 | ||
494da23a | 275 | void Seek(const Slice& internal_key, const char* memtable_key) override { |
1e59de90 TL |
276 | const char* encoded_key = (memtable_key != nullptr) |
277 | ? memtable_key | |
278 | : EncodeKey(&tmp_, internal_key); | |
7c673cae FG |
279 | |
280 | if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) { | |
281 | // prev_.key() is smaller or equal to our target key; do a quick | |
282 | // linear search (at most lookahead_ steps) starting from prev_ | |
283 | iter_ = prev_; | |
284 | ||
285 | size_t cur = 0; | |
286 | while (cur++ <= rep_.lookahead_ && iter_.Valid()) { | |
287 | if (rep_.cmp_(encoded_key, iter_.key()) <= 0) { | |
288 | return; | |
289 | } | |
290 | Next(); | |
291 | } | |
292 | } | |
293 | ||
294 | iter_.Seek(encoded_key); | |
295 | prev_ = iter_; | |
296 | } | |
297 | ||
494da23a TL |
298 | void SeekForPrev(const Slice& internal_key, |
299 | const char* memtable_key) override { | |
7c673cae FG |
300 | const char* encoded_key = (memtable_key != nullptr) |
301 | ? memtable_key | |
302 | : EncodeKey(&tmp_, internal_key); | |
303 | iter_.SeekForPrev(encoded_key); | |
304 | prev_ = iter_; | |
305 | } | |
306 | ||
494da23a | 307 | void SeekToFirst() override { |
7c673cae FG |
308 | iter_.SeekToFirst(); |
309 | prev_ = iter_; | |
310 | } | |
311 | ||
494da23a | 312 | void SeekToLast() override { |
7c673cae FG |
313 | iter_.SeekToLast(); |
314 | prev_ = iter_; | |
315 | } | |
316 | ||
317 | protected: | |
1e59de90 | 318 | std::string tmp_; // For passing to EncodeKey |
7c673cae FG |
319 | |
320 | private: | |
321 | const SkipListRep& rep_; | |
322 | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; | |
323 | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_; | |
324 | }; | |
325 | ||
494da23a | 326 | MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override { |
7c673cae | 327 | if (lookahead_ > 0) { |
1e59de90 TL |
328 | void* mem = |
329 | arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator)) | |
330 | : | |
331 | operator new(sizeof(SkipListRep::LookaheadIterator)); | |
7c673cae FG |
332 | return new (mem) SkipListRep::LookaheadIterator(*this); |
333 | } else { | |
1e59de90 TL |
334 | void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator)) |
335 | : | |
336 | operator new(sizeof(SkipListRep::Iterator)); | |
7c673cae FG |
337 | return new (mem) SkipListRep::Iterator(&skip_list_); |
338 | } | |
339 | } | |
340 | }; | |
1e59de90 TL |
341 | } // namespace |
342 | ||
343 | static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = { | |
344 | #ifndef ROCKSDB_LITE | |
345 | {"lookahead", | |
346 | {0, OptionType::kSizeT, OptionVerificationType::kNormal, | |
347 | OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}}, | |
348 | #endif | |
349 | }; | |
350 | ||
351 | SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) { | |
352 | RegisterOptions("SkipListFactoryOptions", &lookahead_, | |
353 | &skiplist_factory_info); | |
354 | } | |
355 | ||
356 | std::string SkipListFactory::GetId() const { | |
357 | std::string id = Name(); | |
358 | if (lookahead_ > 0) { | |
359 | id.append(":").append(std::to_string(lookahead_)); | |
360 | } | |
361 | return id; | |
7c673cae FG |
362 | } |
363 | ||
364 | MemTableRep* SkipListFactory::CreateMemTableRep( | |
11fdf7f2 TL |
365 | const MemTableRep::KeyComparator& compare, Allocator* allocator, |
366 | const SliceTransform* transform, Logger* /*logger*/) { | |
7c673cae FG |
367 | return new SkipListRep(compare, allocator, transform, lookahead_); |
368 | } | |
369 | ||
f67539c2 | 370 | } // namespace ROCKSDB_NAMESPACE |