1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
11 fprintf(stderr
, "Please install gflags to run this test... Skipping...\n");
16 #ifndef __STDC_FORMAT_MACROS
17 #define __STDC_FORMAT_MACROS
25 #include "table/cuckoo_table_builder.h"
26 #include "table/cuckoo_table_factory.h"
27 #include "table/cuckoo_table_reader.h"
28 #include "table/get_context.h"
29 #include "table/meta_blocks.h"
30 #include "util/arena.h"
31 #include "util/gflags_compat.h"
32 #include "util/random.h"
33 #include "util/string_util.h"
34 #include "util/testharness.h"
35 #include "util/testutil.h"
37 using GFLAGS_NAMESPACE::ParseCommandLineFlags
;
38 using GFLAGS_NAMESPACE::SetUsageMessage
;
40 DEFINE_string(file_dir
, "", "Directory where the files will be created"
41 " for benchmark. Added for using tmpfs.");
42 DEFINE_bool(enable_perf
, false, "Run Benchmark Tests too.");
43 DEFINE_bool(write
, false,
44 "Should write new values to file in performance tests?");
45 DEFINE_bool(identity_as_first_hash
, true, "use identity as first hash");
50 const uint32_t kNumHashFunc
= 10;
51 // Methods, variables related to Hash functions.
52 std::unordered_map
<std::string
, std::vector
<uint64_t>> hash_map
;
54 void AddHashLookups(const std::string
& s
, uint64_t bucket_id
,
55 uint32_t num_hash_fun
) {
56 std::vector
<uint64_t> v
;
57 for (uint32_t i
= 0; i
< num_hash_fun
; i
++) {
58 v
.push_back(bucket_id
+ i
);
63 uint64_t GetSliceHash(const Slice
& s
, uint32_t index
,
64 uint64_t /*max_num_buckets*/) {
65 return hash_map
[s
.ToString()][index
];
69 class CuckooReaderTest
: public testing::Test
{
71 using testing::Test::SetUp
;
74 options
.allow_mmap_reads
= true;
76 env_options
= EnvOptions(options
);
83 keys
.resize(num_items
);
85 user_keys
.resize(num_items
);
87 values
.resize(num_items
);
90 std::string
NumToStr(int64_t i
) {
91 return std::string(reinterpret_cast<char*>(&i
), sizeof(i
));
94 void CreateCuckooFileAndCheckReader(
95 const Comparator
* ucomp
= BytewiseComparator()) {
96 std::unique_ptr
<WritableFile
> writable_file
;
97 ASSERT_OK(env
->NewWritableFile(fname
, &writable_file
, env_options
));
98 std::unique_ptr
<WritableFileWriter
> file_writer(
99 new WritableFileWriter(std::move(writable_file
), fname
, env_options
));
101 CuckooTableBuilder
builder(
102 file_writer
.get(), 0.9, kNumHashFunc
, 100, ucomp
, 2, false, false,
103 GetSliceHash
, 0 /* column_family_id */, kDefaultColumnFamilyName
);
104 ASSERT_OK(builder
.status());
105 for (uint32_t key_idx
= 0; key_idx
< num_items
; ++key_idx
) {
106 builder
.Add(Slice(keys
[key_idx
]), Slice(values
[key_idx
]));
107 ASSERT_OK(builder
.status());
108 ASSERT_EQ(builder
.NumEntries(), key_idx
+ 1);
110 ASSERT_OK(builder
.Finish());
111 ASSERT_EQ(num_items
, builder
.NumEntries());
112 file_size
= builder
.FileSize();
113 ASSERT_OK(file_writer
->Close());
116 std::unique_ptr
<RandomAccessFile
> read_file
;
117 ASSERT_OK(env
->NewRandomAccessFile(fname
, &read_file
, env_options
));
118 std::unique_ptr
<RandomAccessFileReader
> file_reader(
119 new RandomAccessFileReader(std::move(read_file
), fname
));
120 const ImmutableCFOptions
ioptions(options
);
121 CuckooTableReader
reader(ioptions
, std::move(file_reader
), file_size
, ucomp
,
123 ASSERT_OK(reader
.status());
124 // Assume no merge/deletion
125 for (uint32_t i
= 0; i
< num_items
; ++i
) {
127 GetContext
get_context(ucomp
, nullptr, nullptr, nullptr,
128 GetContext::kNotFound
, Slice(user_keys
[i
]), &value
,
129 nullptr, nullptr, nullptr, nullptr);
131 reader
.Get(ReadOptions(), Slice(keys
[i
]), &get_context
, nullptr));
132 ASSERT_STREQ(values
[i
].c_str(), value
.data());
135 void UpdateKeys(bool with_zero_seqno
) {
136 for (uint32_t i
= 0; i
< num_items
; i
++) {
137 ParsedInternalKey
ikey(user_keys
[i
],
138 with_zero_seqno
? 0 : i
+ 1000, kTypeValue
);
140 AppendInternalKey(&keys
[i
], ikey
);
144 void CheckIterator(const Comparator
* ucomp
= BytewiseComparator()) {
145 std::unique_ptr
<RandomAccessFile
> read_file
;
146 ASSERT_OK(env
->NewRandomAccessFile(fname
, &read_file
, env_options
));
147 std::unique_ptr
<RandomAccessFileReader
> file_reader(
148 new RandomAccessFileReader(std::move(read_file
), fname
));
149 const ImmutableCFOptions
ioptions(options
);
150 CuckooTableReader
reader(ioptions
, std::move(file_reader
), file_size
, ucomp
,
152 ASSERT_OK(reader
.status());
153 InternalIterator
* it
=
154 reader
.NewIterator(ReadOptions(), nullptr, nullptr, false);
155 ASSERT_OK(it
->status());
156 ASSERT_TRUE(!it
->Valid());
159 while (it
->Valid()) {
160 ASSERT_OK(it
->status());
161 ASSERT_TRUE(Slice(keys
[cnt
]) == it
->key());
162 ASSERT_TRUE(Slice(values
[cnt
]) == it
->value());
166 ASSERT_EQ(static_cast<uint32_t>(cnt
), num_items
);
169 cnt
= static_cast<int>(num_items
) - 1;
170 ASSERT_TRUE(it
->Valid());
171 while (it
->Valid()) {
172 ASSERT_OK(it
->status());
173 ASSERT_TRUE(Slice(keys
[cnt
]) == it
->key());
174 ASSERT_TRUE(Slice(values
[cnt
]) == it
->value());
180 cnt
= static_cast<int>(num_items
) / 2;
182 while (it
->Valid()) {
183 ASSERT_OK(it
->status());
184 ASSERT_TRUE(Slice(keys
[cnt
]) == it
->key());
185 ASSERT_TRUE(Slice(values
[cnt
]) == it
->value());
189 ASSERT_EQ(static_cast<uint32_t>(cnt
), num_items
);
193 it
= reader
.NewIterator(ReadOptions(), nullptr, &arena
);
194 ASSERT_OK(it
->status());
195 ASSERT_TRUE(!it
->Valid());
196 it
->Seek(keys
[num_items
/2]);
197 ASSERT_TRUE(it
->Valid());
198 ASSERT_OK(it
->status());
199 ASSERT_TRUE(keys
[num_items
/2] == it
->key());
200 ASSERT_TRUE(values
[num_items
/2] == it
->value());
201 ASSERT_OK(it
->status());
202 it
->~InternalIterator();
205 std::vector
<std::string
> keys
;
206 std::vector
<std::string
> user_keys
;
207 std::vector
<std::string
> values
;
213 EnvOptions env_options
;
216 TEST_F(CuckooReaderTest
, WhenKeyExists
) {
218 fname
= test::PerThreadDBPath("CuckooReader_WhenKeyExists");
219 for (uint64_t i
= 0; i
< num_items
; i
++) {
220 user_keys
[i
] = "key" + NumToStr(i
);
221 ParsedInternalKey
ikey(user_keys
[i
], i
+ 1000, kTypeValue
);
222 AppendInternalKey(&keys
[i
], ikey
);
223 values
[i
] = "value" + NumToStr(i
);
224 // Give disjoint hash values.
225 AddHashLookups(user_keys
[i
], i
, kNumHashFunc
);
227 CreateCuckooFileAndCheckReader();
230 CreateCuckooFileAndCheckReader();
231 // Test with collision. Make all hash values collide.
233 for (uint32_t i
= 0; i
< num_items
; i
++) {
234 AddHashLookups(user_keys
[i
], 0, kNumHashFunc
);
237 CreateCuckooFileAndCheckReader();
240 CreateCuckooFileAndCheckReader();
243 TEST_F(CuckooReaderTest
, WhenKeyExistsWithUint64Comparator
) {
245 fname
= test::PerThreadDBPath("CuckooReaderUint64_WhenKeyExists");
246 for (uint64_t i
= 0; i
< num_items
; i
++) {
247 user_keys
[i
].resize(8);
248 memcpy(&user_keys
[i
][0], static_cast<void*>(&i
), 8);
249 ParsedInternalKey
ikey(user_keys
[i
], i
+ 1000, kTypeValue
);
250 AppendInternalKey(&keys
[i
], ikey
);
251 values
[i
] = "value" + NumToStr(i
);
252 // Give disjoint hash values.
253 AddHashLookups(user_keys
[i
], i
, kNumHashFunc
);
255 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
258 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
259 // Test with collision. Make all hash values collide.
261 for (uint32_t i
= 0; i
< num_items
; i
++) {
262 AddHashLookups(user_keys
[i
], 0, kNumHashFunc
);
265 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
268 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
271 TEST_F(CuckooReaderTest
, CheckIterator
) {
272 SetUp(2*kNumHashFunc
);
273 fname
= test::PerThreadDBPath("CuckooReader_CheckIterator");
274 for (uint64_t i
= 0; i
< num_items
; i
++) {
275 user_keys
[i
] = "key" + NumToStr(i
);
276 ParsedInternalKey
ikey(user_keys
[i
], 1000, kTypeValue
);
277 AppendInternalKey(&keys
[i
], ikey
);
278 values
[i
] = "value" + NumToStr(i
);
279 // Give disjoint hash values, in reverse order.
280 AddHashLookups(user_keys
[i
], num_items
-i
-1, kNumHashFunc
);
282 CreateCuckooFileAndCheckReader();
286 CreateCuckooFileAndCheckReader();
290 TEST_F(CuckooReaderTest
, CheckIteratorUint64
) {
291 SetUp(2*kNumHashFunc
);
292 fname
= test::PerThreadDBPath("CuckooReader_CheckIterator");
293 for (uint64_t i
= 0; i
< num_items
; i
++) {
294 user_keys
[i
].resize(8);
295 memcpy(&user_keys
[i
][0], static_cast<void*>(&i
), 8);
296 ParsedInternalKey
ikey(user_keys
[i
], 1000, kTypeValue
);
297 AppendInternalKey(&keys
[i
], ikey
);
298 values
[i
] = "value" + NumToStr(i
);
299 // Give disjoint hash values, in reverse order.
300 AddHashLookups(user_keys
[i
], num_items
-i
-1, kNumHashFunc
);
302 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
303 CheckIterator(test::Uint64Comparator());
306 CreateCuckooFileAndCheckReader(test::Uint64Comparator());
307 CheckIterator(test::Uint64Comparator());
310 TEST_F(CuckooReaderTest
, WhenKeyNotFound
) {
311 // Add keys with colliding hash values.
313 fname
= test::PerThreadDBPath("CuckooReader_WhenKeyNotFound");
314 for (uint64_t i
= 0; i
< num_items
; i
++) {
315 user_keys
[i
] = "key" + NumToStr(i
);
316 ParsedInternalKey
ikey(user_keys
[i
], i
+ 1000, kTypeValue
);
317 AppendInternalKey(&keys
[i
], ikey
);
318 values
[i
] = "value" + NumToStr(i
);
319 // Make all hash values collide.
320 AddHashLookups(user_keys
[i
], 0, kNumHashFunc
);
322 auto* ucmp
= BytewiseComparator();
323 CreateCuckooFileAndCheckReader();
324 std::unique_ptr
<RandomAccessFile
> read_file
;
325 ASSERT_OK(env
->NewRandomAccessFile(fname
, &read_file
, env_options
));
326 std::unique_ptr
<RandomAccessFileReader
> file_reader(
327 new RandomAccessFileReader(std::move(read_file
), fname
));
328 const ImmutableCFOptions
ioptions(options
);
329 CuckooTableReader
reader(ioptions
, std::move(file_reader
), file_size
, ucmp
,
331 ASSERT_OK(reader
.status());
332 // Search for a key with colliding hash values.
333 std::string not_found_user_key
= "key" + NumToStr(num_items
);
334 std::string not_found_key
;
335 AddHashLookups(not_found_user_key
, 0, kNumHashFunc
);
336 ParsedInternalKey
ikey(not_found_user_key
, 1000, kTypeValue
);
337 AppendInternalKey(¬_found_key
, ikey
);
339 GetContext
get_context(ucmp
, nullptr, nullptr, nullptr, GetContext::kNotFound
,
340 Slice(not_found_key
), &value
, nullptr, nullptr,
343 reader
.Get(ReadOptions(), Slice(not_found_key
), &get_context
, nullptr));
344 ASSERT_TRUE(value
.empty());
345 ASSERT_OK(reader
.status());
346 // Search for a key with an independent hash value.
347 std::string not_found_user_key2
= "key" + NumToStr(num_items
+ 1);
348 AddHashLookups(not_found_user_key2
, kNumHashFunc
, kNumHashFunc
);
349 ParsedInternalKey
ikey2(not_found_user_key2
, 1000, kTypeValue
);
350 std::string not_found_key2
;
351 AppendInternalKey(¬_found_key2
, ikey2
);
353 GetContext
get_context2(ucmp
, nullptr, nullptr, nullptr,
354 GetContext::kNotFound
, Slice(not_found_key2
), &value
,
355 nullptr, nullptr, nullptr, nullptr);
357 reader
.Get(ReadOptions(), Slice(not_found_key2
), &get_context2
, nullptr));
358 ASSERT_TRUE(value
.empty());
359 ASSERT_OK(reader
.status());
361 // Test read when key is unused key.
362 std::string unused_key
=
363 reader
.GetTableProperties()->user_collected_properties
.at(
364 CuckooTablePropertyNames::kEmptyKey
);
365 // Add hash values that map to empty buckets.
366 AddHashLookups(ExtractUserKey(unused_key
).ToString(),
367 kNumHashFunc
, kNumHashFunc
);
369 GetContext
get_context3(ucmp
, nullptr, nullptr, nullptr,
370 GetContext::kNotFound
, Slice(unused_key
), &value
,
371 nullptr, nullptr, nullptr, nullptr);
373 reader
.Get(ReadOptions(), Slice(unused_key
), &get_context3
, nullptr));
374 ASSERT_TRUE(value
.empty());
375 ASSERT_OK(reader
.status());
380 void GetKeys(uint64_t num
, std::vector
<std::string
>* keys
) {
383 k
.SetInternalKey("", 0, kTypeValue
);
384 std::string internal_key_suffix
= k
.GetInternalKey().ToString();
385 ASSERT_EQ(static_cast<size_t>(8), internal_key_suffix
.size());
386 for (uint64_t key_idx
= 0; key_idx
< num
; ++key_idx
) {
387 uint64_t value
= 2 * key_idx
;
388 std::string
new_key(reinterpret_cast<char*>(&value
), sizeof(value
));
389 new_key
+= internal_key_suffix
;
390 keys
->push_back(new_key
);
394 std::string
GetFileName(uint64_t num
) {
395 if (FLAGS_file_dir
.empty()) {
396 FLAGS_file_dir
= test::TmpDir();
398 return test::PerThreadDBPath(FLAGS_file_dir
, "cuckoo_read_benchmark") +
399 ToString(num
/ 1000000) + "Mkeys";
402 // Create last level file as we are interested in measuring performance of
403 // last level file only.
404 void WriteFile(const std::vector
<std::string
>& keys
,
405 const uint64_t num
, double hash_ratio
) {
407 options
.allow_mmap_reads
= true;
408 Env
* env
= options
.env
;
409 EnvOptions env_options
= EnvOptions(options
);
410 std::string fname
= GetFileName(num
);
412 std::unique_ptr
<WritableFile
> writable_file
;
413 ASSERT_OK(env
->NewWritableFile(fname
, &writable_file
, env_options
));
414 std::unique_ptr
<WritableFileWriter
> file_writer(
415 new WritableFileWriter(std::move(writable_file
), fname
, env_options
));
416 CuckooTableBuilder
builder(
417 file_writer
.get(), hash_ratio
, 64, 1000, test::Uint64Comparator(), 5,
418 false, FLAGS_identity_as_first_hash
, nullptr, 0 /* column_family_id */,
419 kDefaultColumnFamilyName
);
420 ASSERT_OK(builder
.status());
421 for (uint64_t key_idx
= 0; key_idx
< num
; ++key_idx
) {
422 // Value is just a part of key.
423 builder
.Add(Slice(keys
[key_idx
]), Slice(&keys
[key_idx
][0], 4));
424 ASSERT_EQ(builder
.NumEntries(), key_idx
+ 1);
425 ASSERT_OK(builder
.status());
427 ASSERT_OK(builder
.Finish());
428 ASSERT_EQ(num
, builder
.NumEntries());
429 ASSERT_OK(file_writer
->Close());
432 env
->GetFileSize(fname
, &file_size
);
433 std::unique_ptr
<RandomAccessFile
> read_file
;
434 ASSERT_OK(env
->NewRandomAccessFile(fname
, &read_file
, env_options
));
435 std::unique_ptr
<RandomAccessFileReader
> file_reader(
436 new RandomAccessFileReader(std::move(read_file
), fname
));
438 const ImmutableCFOptions
ioptions(options
);
439 CuckooTableReader
reader(ioptions
, std::move(file_reader
), file_size
,
440 test::Uint64Comparator(), nullptr);
441 ASSERT_OK(reader
.status());
442 ReadOptions r_options
;
444 // Assume only the fast path is triggered
445 GetContext
get_context(nullptr, nullptr, nullptr, nullptr,
446 GetContext::kNotFound
, Slice(), &value
, nullptr,
447 nullptr, nullptr, nullptr);
448 for (uint64_t i
= 0; i
< num
; ++i
) {
451 ASSERT_OK(reader
.Get(r_options
, Slice(keys
[i
]), &get_context
, nullptr));
452 ASSERT_TRUE(Slice(keys
[i
]) == Slice(&keys
[i
][0], 4));
456 void ReadKeys(uint64_t num
, uint32_t batch_size
) {
458 options
.allow_mmap_reads
= true;
459 Env
* env
= options
.env
;
460 EnvOptions env_options
= EnvOptions(options
);
461 std::string fname
= GetFileName(num
);
464 env
->GetFileSize(fname
, &file_size
);
465 std::unique_ptr
<RandomAccessFile
> read_file
;
466 ASSERT_OK(env
->NewRandomAccessFile(fname
, &read_file
, env_options
));
467 std::unique_ptr
<RandomAccessFileReader
> file_reader(
468 new RandomAccessFileReader(std::move(read_file
), fname
));
470 const ImmutableCFOptions
ioptions(options
);
471 CuckooTableReader
reader(ioptions
, std::move(file_reader
), file_size
,
472 test::Uint64Comparator(), nullptr);
473 ASSERT_OK(reader
.status());
474 const UserCollectedProperties user_props
=
475 reader
.GetTableProperties()->user_collected_properties
;
476 const uint32_t num_hash_fun
= *reinterpret_cast<const uint32_t*>(
477 user_props
.at(CuckooTablePropertyNames::kNumHashFunc
).data());
478 const uint64_t table_size
= *reinterpret_cast<const uint64_t*>(
479 user_props
.at(CuckooTablePropertyNames::kHashTableSize
).data());
480 fprintf(stderr
, "With %" PRIu64
" items, utilization is %.2f%%, number of"
481 " hash functions: %u.\n", num
, num
* 100.0 / (table_size
), num_hash_fun
);
482 ReadOptions r_options
;
484 std::vector
<uint64_t> keys
;
486 for (uint64_t i
= 0; i
< num
; ++i
) {
487 keys
.push_back(2 * i
);
489 std::random_shuffle(keys
.begin(), keys
.end());
492 // Assume only the fast path is triggered
493 GetContext
get_context(nullptr, nullptr, nullptr, nullptr,
494 GetContext::kNotFound
, Slice(), &value
, nullptr,
495 nullptr, nullptr, nullptr);
496 uint64_t start_time
= env
->NowMicros();
497 if (batch_size
> 0) {
498 for (uint64_t i
= 0; i
< num
; i
+= batch_size
) {
499 for (uint64_t j
= i
; j
< i
+batch_size
&& j
< num
; ++j
) {
500 reader
.Prepare(Slice(reinterpret_cast<char*>(&keys
[j
]), 16));
502 for (uint64_t j
= i
; j
< i
+batch_size
&& j
< num
; ++j
) {
503 reader
.Get(r_options
, Slice(reinterpret_cast<char*>(&keys
[j
]), 16),
504 &get_context
, nullptr);
508 for (uint64_t i
= 0; i
< num
; i
++) {
509 reader
.Get(r_options
, Slice(reinterpret_cast<char*>(&keys
[i
]), 16),
510 &get_context
, nullptr);
513 float time_per_op
= (env
->NowMicros() - start_time
) * 1.0f
/ num
;
515 "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n",
516 time_per_op
, 1.0 / time_per_op
, batch_size
);
520 TEST_F(CuckooReaderTest
, TestReadPerformance
) {
521 if (!FLAGS_enable_perf
) {
524 double hash_ratio
= 0.95;
525 // These numbers are chosen to have a hash utilization % close to
526 // 0.9, 0.75, 0.6 and 0.5 respectively.
527 // They all create 128 M buckets.
528 std::vector
<uint64_t> nums
= {120*1024*1024, 100*1024*1024, 80*1024*1024,
532 "WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n");
534 for (uint64_t num
: nums
) {
536 Env::Default()->FileExists(GetFileName(num
)).IsNotFound()) {
537 std::vector
<std::string
> all_keys
;
538 GetKeys(num
, &all_keys
);
539 WriteFile(all_keys
, num
, hash_ratio
);
546 fprintf(stderr
, "\n");
549 } // namespace rocksdb
551 int main(int argc
, char** argv
) {
552 if (rocksdb::port::kLittleEndian
) {
553 ::testing::InitGoogleTest(&argc
, argv
);
554 ParseCommandLineFlags(&argc
, &argv
, true);
555 return RUN_ALL_TESTS();
558 fprintf(stderr
, "SKIPPED as Cuckoo table doesn't support Big Endian\n");
568 int main(int /*argc*/, char** /*argv*/) {
569 fprintf(stderr
, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n");
573 #endif // ROCKSDB_LITE