]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/memtable/skiplistrep.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "db/memtable.h"
9 #include "memory/arena.h"
10 #include "memtable/inlineskiplist.h"
11 #include "rocksdb/memtablerep.h"
12 #include "rocksdb/utilities/options_type.h"
13 #include "util/string_util.h"
15 namespace ROCKSDB_NAMESPACE
{
17 class SkipListRep
: public MemTableRep
{
18 InlineSkipList
<const MemTableRep::KeyComparator
&> skip_list_
;
19 const MemTableRep::KeyComparator
& cmp_
;
20 const SliceTransform
* transform_
;
21 const size_t lookahead_
;
23 friend class LookaheadIterator
;
26 explicit SkipListRep(const MemTableRep::KeyComparator
& compare
,
27 Allocator
* allocator
, const SliceTransform
* transform
,
28 const size_t lookahead
)
29 : MemTableRep(allocator
),
30 skip_list_(compare
, allocator
),
32 transform_(transform
),
33 lookahead_(lookahead
) {}
35 KeyHandle
Allocate(const size_t len
, char** buf
) override
{
36 *buf
= skip_list_
.AllocateKey(len
);
37 return static_cast<KeyHandle
>(*buf
);
40 // Insert key into the list.
41 // REQUIRES: nothing that compares equal to key is currently in the list.
42 void Insert(KeyHandle handle
) override
{
43 skip_list_
.Insert(static_cast<char*>(handle
));
46 bool InsertKey(KeyHandle handle
) override
{
47 return skip_list_
.Insert(static_cast<char*>(handle
));
50 void InsertWithHint(KeyHandle handle
, void** hint
) override
{
51 skip_list_
.InsertWithHint(static_cast<char*>(handle
), hint
);
54 bool InsertKeyWithHint(KeyHandle handle
, void** hint
) override
{
55 return skip_list_
.InsertWithHint(static_cast<char*>(handle
), hint
);
58 void InsertWithHintConcurrently(KeyHandle handle
, void** hint
) override
{
59 skip_list_
.InsertWithHintConcurrently(static_cast<char*>(handle
), hint
);
62 bool InsertKeyWithHintConcurrently(KeyHandle handle
, void** hint
) override
{
63 return skip_list_
.InsertWithHintConcurrently(static_cast<char*>(handle
),
67 void InsertConcurrently(KeyHandle handle
) override
{
68 skip_list_
.InsertConcurrently(static_cast<char*>(handle
));
71 bool InsertKeyConcurrently(KeyHandle handle
) override
{
72 return skip_list_
.InsertConcurrently(static_cast<char*>(handle
));
75 // Returns true iff an entry that compares equal to key is in the list.
76 bool Contains(const char* key
) const override
{
77 return skip_list_
.Contains(key
);
80 size_t ApproximateMemoryUsage() override
{
81 // All memory is allocated through allocator; nothing to report here
85 void Get(const LookupKey
& k
, void* callback_args
,
86 bool (*callback_func
)(void* arg
, const char* entry
)) override
{
87 SkipListRep::Iterator
iter(&skip_list_
);
89 for (iter
.Seek(dummy_slice
, k
.memtable_key().data());
90 iter
.Valid() && callback_func(callback_args
, iter
.key());
95 uint64_t ApproximateNumEntries(const Slice
& start_ikey
,
96 const Slice
& end_ikey
) override
{
98 uint64_t start_count
=
99 skip_list_
.EstimateCount(EncodeKey(&tmp
, start_ikey
));
100 uint64_t end_count
= skip_list_
.EstimateCount(EncodeKey(&tmp
, end_ikey
));
101 return (end_count
>= start_count
) ? (end_count
- start_count
) : 0;
104 void UniqueRandomSample(const uint64_t num_entries
,
105 const uint64_t target_sample_size
,
106 std::unordered_set
<const char*>* entries
) override
{
108 // Avoid divide-by-0.
109 assert(target_sample_size
> 0);
110 assert(num_entries
> 0);
111 // NOTE: the size of entries is not enforced to be exactly
112 // target_sample_size at the end of this function, it might be slightly
113 // greater or smaller.
114 SkipListRep::Iterator
iter(&skip_list_
);
115 // There are two methods to create the subset of samples (size m)
116 // from the table containing N elements:
117 // 1-Iterate linearly through the N memtable entries. For each entry i,
118 // add it to the sample set with a probability
119 // (target_sample_size - entries.size() ) / (N-i).
121 // 2-Pick m random elements without repetition.
122 // We pick Option 2 when m<sqrt(N) and
123 // Option 1 when m > sqrt(N).
124 if (target_sample_size
>
125 static_cast<uint64_t>(std::sqrt(1.0 * num_entries
))) {
126 Random
* rnd
= Random::GetTLSInstance();
128 uint64_t counter
= 0, num_samples_left
= target_sample_size
;
129 for (; iter
.Valid() && (num_samples_left
> 0); iter
.Next(), counter
++) {
130 // Add entry to sample set with probability
131 // num_samples_left/(num_entries - counter).
132 if (rnd
->Next() % (num_entries
- counter
) < num_samples_left
) {
133 entries
->insert(iter
.key());
138 // Option 2: pick m random elements with no duplicates.
139 // If Option 2 is picked, then target_sample_size<sqrt(N)
140 // Using a set spares the need to check for duplicates.
141 for (uint64_t i
= 0; i
< target_sample_size
; i
++) {
142 // We give it 5 attempts to find a non-duplicate
143 // With 5 attempts, the chances of returning `entries` set
144 // of size target_sample_size is:
145 // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5]
146 // which is monotonically increasing with N in the worse case
147 // of target_sample_size=sqrt(N), and is always >99.9% for N>4.
148 // At worst, for the final pick , when m=sqrt(N) there is
149 // a probability of p= 1/sqrt(N) chances to find a duplicate.
150 for (uint64_t j
= 0; j
< 5; j
++) {
152 // unordered_set::insert returns pair<iterator, bool>.
153 // The second element is true if an insert successfully happened.
154 // If element is already in the set, this bool will be false, and
156 if ((entries
->insert(iter
.key())).second
) {
164 ~SkipListRep() override
{}
166 // Iteration over the contents of a skip list
167 class Iterator
: public MemTableRep::Iterator
{
168 InlineSkipList
<const MemTableRep::KeyComparator
&>::Iterator iter_
;
171 // Initialize an iterator over the specified list.
172 // The returned iterator is not valid.
174 const InlineSkipList
<const MemTableRep::KeyComparator
&>* list
)
177 ~Iterator() override
{}
179 // Returns true iff the iterator is positioned at a valid node.
180 bool Valid() const override
{ return iter_
.Valid(); }
182 // Returns the key at the current position.
184 const char* key() const override
{ return iter_
.key(); }
186 // Advances to the next position.
188 void Next() override
{ iter_
.Next(); }
190 // Advances to the previous position.
192 void Prev() override
{ iter_
.Prev(); }
194 // Advance to the first entry with a key >= target
195 void Seek(const Slice
& user_key
, const char* memtable_key
) override
{
196 if (memtable_key
!= nullptr) {
197 iter_
.Seek(memtable_key
);
199 iter_
.Seek(EncodeKey(&tmp_
, user_key
));
203 // Retreat to the last entry with a key <= target
204 void SeekForPrev(const Slice
& user_key
, const char* memtable_key
) override
{
205 if (memtable_key
!= nullptr) {
206 iter_
.SeekForPrev(memtable_key
);
208 iter_
.SeekForPrev(EncodeKey(&tmp_
, user_key
));
212 void RandomSeek() override
{ iter_
.RandomSeek(); }
214 // Position at the first entry in list.
215 // Final state of iterator is Valid() iff list is not empty.
216 void SeekToFirst() override
{ iter_
.SeekToFirst(); }
218 // Position at the last entry in list.
219 // Final state of iterator is Valid() iff list is not empty.
220 void SeekToLast() override
{ iter_
.SeekToLast(); }
223 std::string tmp_
; // For passing to EncodeKey
226 // Iterator over the contents of a skip list which also keeps track of the
227 // previously visited node. In Seek(), it examines a few nodes after it
228 // first, falling back to O(log n) search from the head of the list only if
229 // the target key hasn't been found.
230 class LookaheadIterator
: public MemTableRep::Iterator
{
232 explicit LookaheadIterator(const SkipListRep
& rep
)
233 : rep_(rep
), iter_(&rep_
.skip_list_
), prev_(iter_
) {}
235 ~LookaheadIterator() override
{}
237 bool Valid() const override
{ return iter_
.Valid(); }
239 const char* key() const override
{
244 void Next() override
{
247 bool advance_prev
= true;
249 auto k1
= rep_
.UserKey(prev_
.key());
250 auto k2
= rep_
.UserKey(iter_
.key());
252 if (k1
.compare(k2
) == 0) {
253 // same user key, don't move prev_
254 advance_prev
= false;
255 } else if (rep_
.transform_
) {
256 // only advance prev_ if it has the same prefix as iter_
257 auto t1
= rep_
.transform_
->Transform(k1
);
258 auto t2
= rep_
.transform_
->Transform(k2
);
259 advance_prev
= t1
.compare(t2
) == 0;
269 void Prev() override
{
275 void Seek(const Slice
& internal_key
, const char* memtable_key
) override
{
276 const char* encoded_key
= (memtable_key
!= nullptr)
278 : EncodeKey(&tmp_
, internal_key
);
280 if (prev_
.Valid() && rep_
.cmp_(encoded_key
, prev_
.key()) >= 0) {
281 // prev_.key() is smaller or equal to our target key; do a quick
282 // linear search (at most lookahead_ steps) starting from prev_
286 while (cur
++ <= rep_
.lookahead_
&& iter_
.Valid()) {
287 if (rep_
.cmp_(encoded_key
, iter_
.key()) <= 0) {
294 iter_
.Seek(encoded_key
);
298 void SeekForPrev(const Slice
& internal_key
,
299 const char* memtable_key
) override
{
300 const char* encoded_key
= (memtable_key
!= nullptr)
302 : EncodeKey(&tmp_
, internal_key
);
303 iter_
.SeekForPrev(encoded_key
);
307 void SeekToFirst() override
{
312 void SeekToLast() override
{
318 std::string tmp_
; // For passing to EncodeKey
321 const SkipListRep
& rep_
;
322 InlineSkipList
<const MemTableRep::KeyComparator
&>::Iterator iter_
;
323 InlineSkipList
<const MemTableRep::KeyComparator
&>::Iterator prev_
;
326 MemTableRep::Iterator
* GetIterator(Arena
* arena
= nullptr) override
{
327 if (lookahead_
> 0) {
329 arena
? arena
->AllocateAligned(sizeof(SkipListRep::LookaheadIterator
))
331 operator new(sizeof(SkipListRep::LookaheadIterator
));
332 return new (mem
) SkipListRep::LookaheadIterator(*this);
334 void* mem
= arena
? arena
->AllocateAligned(sizeof(SkipListRep::Iterator
))
336 operator new(sizeof(SkipListRep::Iterator
));
337 return new (mem
) SkipListRep::Iterator(&skip_list_
);
343 static std::unordered_map
<std::string
, OptionTypeInfo
> skiplist_factory_info
= {
346 {0, OptionType::kSizeT
, OptionVerificationType::kNormal
,
347 OptionTypeFlags::kDontSerialize
/*Since it is part of the ID*/}},
351 SkipListFactory::SkipListFactory(size_t lookahead
) : lookahead_(lookahead
) {
352 RegisterOptions("SkipListFactoryOptions", &lookahead_
,
353 &skiplist_factory_info
);
356 std::string
SkipListFactory::GetId() const {
357 std::string id
= Name();
358 if (lookahead_
> 0) {
359 id
.append(":").append(std::to_string(lookahead_
));
364 MemTableRep
* SkipListFactory::CreateMemTableRep(
365 const MemTableRep::KeyComparator
& compare
, Allocator
* allocator
,
366 const SliceTransform
* transform
, Logger
* /*logger*/) {
367 return new SkipListRep(compare
, allocator
, transform
, lookahead_
);
370 } // namespace ROCKSDB_NAMESPACE