1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
11 #include "table/cuckoo_table_reader.h"
18 #include "rocksdb/iterator.h"
19 #include "rocksdb/table.h"
20 #include "table/internal_iterator.h"
21 #include "table/meta_blocks.h"
22 #include "table/cuckoo_table_factory.h"
23 #include "table/get_context.h"
24 #include "util/arena.h"
25 #include "util/coding.h"
29 const uint64_t CACHE_LINE_MASK
= ~((uint64_t)CACHE_LINE_SIZE
- 1);
30 const uint32_t kInvalidIndex
= std::numeric_limits
<uint32_t>::max();
33 extern const uint64_t kCuckooTableMagicNumber
;
35 CuckooTableReader::CuckooTableReader(
36 const ImmutableCFOptions
& ioptions
,
37 std::unique_ptr
<RandomAccessFileReader
>&& file
, uint64_t file_size
,
38 const Comparator
* comparator
,
39 uint64_t (*get_slice_hash
)(const Slice
&, uint32_t, uint64_t))
40 : file_(std::move(file
)),
41 is_last_level_(false),
42 identity_as_first_hash_(false),
43 use_module_hash_(false),
50 cuckoo_block_size_(0),
51 cuckoo_block_bytes_minus_one_(0),
54 get_slice_hash_(get_slice_hash
) {
55 if (!ioptions
.allow_mmap_reads
) {
56 status_
= Status::InvalidArgument("File is not mmaped");
58 TableProperties
* props
= nullptr;
59 status_
= ReadTableProperties(file_
.get(), file_size
, kCuckooTableMagicNumber
,
60 ioptions
, &props
, true /* compression_type_missing */);
64 table_props_
.reset(props
);
65 auto& user_props
= props
->user_collected_properties
;
66 auto hash_funs
= user_props
.find(CuckooTablePropertyNames::kNumHashFunc
);
67 if (hash_funs
== user_props
.end()) {
68 status_
= Status::Corruption("Number of hash functions not found");
71 num_hash_func_
= *reinterpret_cast<const uint32_t*>(hash_funs
->second
.data());
72 auto unused_key
= user_props
.find(CuckooTablePropertyNames::kEmptyKey
);
73 if (unused_key
== user_props
.end()) {
74 status_
= Status::Corruption("Empty bucket value not found");
77 unused_key_
= unused_key
->second
;
79 key_length_
= static_cast<uint32_t>(props
->fixed_key_len
);
80 auto user_key_len
= user_props
.find(CuckooTablePropertyNames::kUserKeyLength
);
81 if (user_key_len
== user_props
.end()) {
82 status_
= Status::Corruption("User key length not found");
85 user_key_length_
= *reinterpret_cast<const uint32_t*>(
86 user_key_len
->second
.data());
88 auto value_length
= user_props
.find(CuckooTablePropertyNames::kValueLength
);
89 if (value_length
== user_props
.end()) {
90 status_
= Status::Corruption("Value length not found");
93 value_length_
= *reinterpret_cast<const uint32_t*>(
94 value_length
->second
.data());
95 bucket_length_
= key_length_
+ value_length_
;
97 auto hash_table_size
= user_props
.find(
98 CuckooTablePropertyNames::kHashTableSize
);
99 if (hash_table_size
== user_props
.end()) {
100 status_
= Status::Corruption("Hash table size not found");
103 table_size_
= *reinterpret_cast<const uint64_t*>(
104 hash_table_size
->second
.data());
106 auto is_last_level
= user_props
.find(CuckooTablePropertyNames::kIsLastLevel
);
107 if (is_last_level
== user_props
.end()) {
108 status_
= Status::Corruption("Is last level not found");
111 is_last_level_
= *reinterpret_cast<const bool*>(is_last_level
->second
.data());
113 auto identity_as_first_hash
= user_props
.find(
114 CuckooTablePropertyNames::kIdentityAsFirstHash
);
115 if (identity_as_first_hash
== user_props
.end()) {
116 status_
= Status::Corruption("identity as first hash not found");
119 identity_as_first_hash_
= *reinterpret_cast<const bool*>(
120 identity_as_first_hash
->second
.data());
122 auto use_module_hash
= user_props
.find(
123 CuckooTablePropertyNames::kUseModuleHash
);
124 if (use_module_hash
== user_props
.end()) {
125 status_
= Status::Corruption("hash type is not found");
128 use_module_hash_
= *reinterpret_cast<const bool*>(
129 use_module_hash
->second
.data());
130 auto cuckoo_block_size
= user_props
.find(
131 CuckooTablePropertyNames::kCuckooBlockSize
);
132 if (cuckoo_block_size
== user_props
.end()) {
133 status_
= Status::Corruption("Cuckoo block size not found");
136 cuckoo_block_size_
= *reinterpret_cast<const uint32_t*>(
137 cuckoo_block_size
->second
.data());
138 cuckoo_block_bytes_minus_one_
= cuckoo_block_size_
* bucket_length_
- 1;
139 status_
= file_
->Read(0, static_cast<size_t>(file_size
), &file_data_
, nullptr);
142 Status
CuckooTableReader::Get(const ReadOptions
& /*readOptions*/,
143 const Slice
& key
, GetContext
* get_context
,
144 const SliceTransform
* /* prefix_extractor */,
145 bool /*skip_filters*/) {
146 assert(key
.size() == key_length_
+ (is_last_level_
? 8 : 0));
147 Slice user_key
= ExtractUserKey(key
);
148 for (uint32_t hash_cnt
= 0; hash_cnt
< num_hash_func_
; ++hash_cnt
) {
149 uint64_t offset
= bucket_length_
* CuckooHash(
150 user_key
, hash_cnt
, use_module_hash_
, table_size_
,
151 identity_as_first_hash_
, get_slice_hash_
);
152 const char* bucket
= &file_data_
.data()[offset
];
153 for (uint32_t block_idx
= 0; block_idx
< cuckoo_block_size_
;
154 ++block_idx
, bucket
+= bucket_length_
) {
155 if (ucomp_
->Equal(Slice(unused_key_
.data(), user_key
.size()),
156 Slice(bucket
, user_key
.size()))) {
159 // Here, we compare only the user key part as we support only one entry
160 // per user key and we don't support snapshot.
161 if (ucomp_
->Equal(user_key
, Slice(bucket
, user_key
.size()))) {
162 Slice
value(bucket
+ key_length_
, value_length_
);
163 if (is_last_level_
) {
164 // Sequence number is not stored at the last level, so we will use
165 // kMaxSequenceNumber since it is unknown. This could cause some
166 // transactions to fail to lock a key due to known sequence number.
167 // However, it is expected for anyone to use a CuckooTable in a
169 get_context
->SaveValue(value
, kMaxSequenceNumber
);
171 Slice
full_key(bucket
, key_length_
);
172 ParsedInternalKey found_ikey
;
173 ParseInternalKey(full_key
, &found_ikey
);
174 bool dont_care
__attribute__((__unused__
));
175 get_context
->SaveValue(found_ikey
, value
, &dont_care
);
177 // We don't support merge operations. So, we return here.
185 void CuckooTableReader::Prepare(const Slice
& key
) {
186 // Prefetch the first Cuckoo Block.
187 Slice user_key
= ExtractUserKey(key
);
188 uint64_t addr
= reinterpret_cast<uint64_t>(file_data_
.data()) +
189 bucket_length_
* CuckooHash(user_key
, 0, use_module_hash_
, table_size_
,
190 identity_as_first_hash_
, nullptr);
191 uint64_t end_addr
= addr
+ cuckoo_block_bytes_minus_one_
;
192 for (addr
&= CACHE_LINE_MASK
; addr
< end_addr
; addr
+= CACHE_LINE_SIZE
) {
193 PREFETCH(reinterpret_cast<const char*>(addr
), 0, 3);
197 class CuckooTableIterator
: public InternalIterator
{
199 explicit CuckooTableIterator(CuckooTableReader
* reader
);
200 ~CuckooTableIterator() override
{}
201 bool Valid() const override
;
202 void SeekToFirst() override
;
203 void SeekToLast() override
;
204 void Seek(const Slice
& target
) override
;
205 void SeekForPrev(const Slice
& target
) override
;
206 void Next() override
;
207 void Prev() override
;
208 Slice
key() const override
;
209 Slice
value() const override
;
210 Status
status() const override
{ return Status::OK(); }
214 struct BucketComparator
{
215 BucketComparator(const Slice
& file_data
, const Comparator
* ucomp
,
216 uint32_t bucket_len
, uint32_t user_key_len
,
217 const Slice
& target
= Slice())
218 : file_data_(file_data
),
220 bucket_len_(bucket_len
),
221 user_key_len_(user_key_len
),
223 bool operator()(const uint32_t first
, const uint32_t second
) const {
224 const char* first_bucket
=
225 (first
== kInvalidIndex
) ? target_
.data() :
226 &file_data_
.data()[first
* bucket_len_
];
227 const char* second_bucket
=
228 (second
== kInvalidIndex
) ? target_
.data() :
229 &file_data_
.data()[second
* bucket_len_
];
230 return ucomp_
->Compare(Slice(first_bucket
, user_key_len_
),
231 Slice(second_bucket
, user_key_len_
)) < 0;
234 const Slice file_data_
;
235 const Comparator
* ucomp_
;
236 const uint32_t bucket_len_
;
237 const uint32_t user_key_len_
;
241 const BucketComparator bucket_comparator_
;
242 void PrepareKVAtCurrIdx();
243 CuckooTableReader
* reader_
;
245 // Contains a map of keys to bucket_id sorted in key order.
246 std::vector
<uint32_t> sorted_bucket_ids_
;
247 // We assume that the number of items can be stored in uint32 (4 Billion).
248 uint32_t curr_key_idx_
;
251 // No copying allowed
252 CuckooTableIterator(const CuckooTableIterator
&) = delete;
253 void operator=(const Iterator
&) = delete;
256 CuckooTableIterator::CuckooTableIterator(CuckooTableReader
* reader
)
257 : bucket_comparator_(reader
->file_data_
, reader
->ucomp_
,
258 reader
->bucket_length_
, reader
->user_key_length_
),
261 curr_key_idx_(kInvalidIndex
) {
262 sorted_bucket_ids_
.clear();
267 void CuckooTableIterator::InitIfNeeded() {
271 sorted_bucket_ids_
.reserve(static_cast<size_t>(reader_
->GetTableProperties()->num_entries
));
272 uint64_t num_buckets
= reader_
->table_size_
+ reader_
->cuckoo_block_size_
- 1;
273 assert(num_buckets
< kInvalidIndex
);
274 const char* bucket
= reader_
->file_data_
.data();
275 for (uint32_t bucket_id
= 0; bucket_id
< num_buckets
; ++bucket_id
) {
276 if (Slice(bucket
, reader_
->key_length_
) != Slice(reader_
->unused_key_
)) {
277 sorted_bucket_ids_
.push_back(bucket_id
);
279 bucket
+= reader_
->bucket_length_
;
281 assert(sorted_bucket_ids_
.size() ==
282 reader_
->GetTableProperties()->num_entries
);
283 std::sort(sorted_bucket_ids_
.begin(), sorted_bucket_ids_
.end(),
285 curr_key_idx_
= kInvalidIndex
;
289 void CuckooTableIterator::SeekToFirst() {
292 PrepareKVAtCurrIdx();
295 void CuckooTableIterator::SeekToLast() {
297 curr_key_idx_
= static_cast<uint32_t>(sorted_bucket_ids_
.size()) - 1;
298 PrepareKVAtCurrIdx();
301 void CuckooTableIterator::Seek(const Slice
& target
) {
303 const BucketComparator
seek_comparator(
304 reader_
->file_data_
, reader_
->ucomp_
,
305 reader_
->bucket_length_
, reader_
->user_key_length_
,
306 ExtractUserKey(target
));
307 auto seek_it
= std::lower_bound(sorted_bucket_ids_
.begin(),
308 sorted_bucket_ids_
.end(),
312 static_cast<uint32_t>(std::distance(sorted_bucket_ids_
.begin(), seek_it
));
313 PrepareKVAtCurrIdx();
316 void CuckooTableIterator::SeekForPrev(const Slice
& /*target*/) {
321 bool CuckooTableIterator::Valid() const {
322 return curr_key_idx_
< sorted_bucket_ids_
.size();
325 void CuckooTableIterator::PrepareKVAtCurrIdx() {
331 uint32_t id
= sorted_bucket_ids_
[curr_key_idx_
];
332 const char* offset
= reader_
->file_data_
.data() +
333 id
* reader_
->bucket_length_
;
334 if (reader_
->is_last_level_
) {
335 // Always return internal key.
336 curr_key_
.SetInternalKey(Slice(offset
, reader_
->user_key_length_
),
339 curr_key_
.SetInternalKey(Slice(offset
, reader_
->key_length_
));
341 curr_value_
= Slice(offset
+ reader_
->key_length_
, reader_
->value_length_
);
344 void CuckooTableIterator::Next() {
351 PrepareKVAtCurrIdx();
354 void CuckooTableIterator::Prev() {
355 if (curr_key_idx_
== 0) {
356 curr_key_idx_
= static_cast<uint32_t>(sorted_bucket_ids_
.size());
364 PrepareKVAtCurrIdx();
367 Slice
CuckooTableIterator::key() const {
369 return curr_key_
.GetInternalKey();
372 Slice
CuckooTableIterator::value() const {
377 InternalIterator
* CuckooTableReader::NewIterator(
378 const ReadOptions
& /*read_options*/,
379 const SliceTransform
* /* prefix_extractor */, Arena
* arena
,
380 bool /*skip_filters*/, bool /*for_compaction*/) {
381 if (!status().ok()) {
382 return NewErrorInternalIterator
<Slice
>(
383 Status::Corruption("CuckooTableReader status is not okay."), arena
);
385 CuckooTableIterator
* iter
;
386 if (arena
== nullptr) {
387 iter
= new CuckooTableIterator(this);
389 auto iter_mem
= arena
->AllocateAligned(sizeof(CuckooTableIterator
));
390 iter
= new (iter_mem
) CuckooTableIterator(this);
395 size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; }
397 } // namespace rocksdb