1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
7 #include "table/block_based/block.h"
14 #include <unordered_set>
18 #include "db/dbformat.h"
19 #include "db/memtable.h"
20 #include "db/write_batch_internal.h"
21 #include "rocksdb/db.h"
22 #include "rocksdb/env.h"
23 #include "rocksdb/iterator.h"
24 #include "rocksdb/slice_transform.h"
25 #include "rocksdb/table.h"
26 #include "table/block_based/block_based_table_reader.h"
27 #include "table/block_based/block_builder.h"
28 #include "table/format.h"
29 #include "test_util/testharness.h"
30 #include "test_util/testutil.h"
31 #include "util/random.h"
33 namespace ROCKSDB_NAMESPACE
{
35 std::string
GenerateInternalKey(int primary_key
, int secondary_key
,
36 int padding_size
, Random
*rnd
) {
39 snprintf(buf
, sizeof(buf
), "%6d%4d", primary_key
, secondary_key
);
42 k
+= rnd
->RandomString(padding_size
);
44 AppendInternalKeyFooter(&k
, 0 /* seqno */, kTypeValue
);
49 // Generate random key value pairs.
50 // The generated key will be sorted. You can tune the parameters to generated
51 // different kinds of test key/value pairs for different scenario.
52 void GenerateRandomKVs(std::vector
<std::string
> *keys
,
53 std::vector
<std::string
> *values
, const int from
,
54 const int len
, const int step
= 1,
55 const int padding_size
= 0,
56 const int keys_share_prefix
= 1) {
59 // generate different prefix
60 for (int i
= from
; i
< from
+ len
; i
+= step
) {
61 // generating keys that shares the prefix
62 for (int j
= 0; j
< keys_share_prefix
; ++j
) {
63 // `DataBlockIter` assumes it reads only internal keys.
64 keys
->emplace_back(GenerateInternalKey(i
, j
, padding_size
, &rnd
));
67 values
->emplace_back(rnd
.RandomString(100));
72 class BlockTest
: public testing::Test
{};
75 TEST_F(BlockTest
, SimpleTest
) {
77 Options options
= Options();
79 std::vector
<std::string
> keys
;
80 std::vector
<std::string
> values
;
81 BlockBuilder
builder(16);
82 int num_records
= 100000;
84 GenerateRandomKVs(&keys
, &values
, 0, num_records
);
85 // add a bunch of records to a block
86 for (int i
= 0; i
< num_records
; i
++) {
87 builder
.Add(keys
[i
], values
[i
]);
90 // read serialized contents of the block
91 Slice rawblock
= builder
.Finish();
93 // create block reader
94 BlockContents contents
;
95 contents
.data
= rawblock
;
96 Block
reader(std::move(contents
));
98 // read contents of block sequentially
100 InternalIterator
*iter
=
101 reader
.NewDataIterator(options
.comparator
, kDisableGlobalSequenceNumber
);
102 for (iter
->SeekToFirst(); iter
->Valid(); count
++, iter
->Next()) {
103 // read kv from block
104 Slice k
= iter
->key();
105 Slice v
= iter
->value();
107 // compare with lookaside array
108 ASSERT_EQ(k
.ToString().compare(keys
[count
]), 0);
109 ASSERT_EQ(v
.ToString().compare(values
[count
]), 0);
113 // read block contents randomly
115 reader
.NewDataIterator(options
.comparator
, kDisableGlobalSequenceNumber
);
116 for (int i
= 0; i
< num_records
; i
++) {
117 // find a random key in the lookaside array
118 int index
= rnd
.Uniform(num_records
);
119 Slice
k(keys
[index
]);
121 // search in block for this key
123 ASSERT_TRUE(iter
->Valid());
124 Slice v
= iter
->value();
125 ASSERT_EQ(v
.ToString().compare(values
[index
]), 0);
130 // return the block contents
131 BlockContents
GetBlockContents(std::unique_ptr
<BlockBuilder
> *builder
,
132 const std::vector
<std::string
> &keys
,
133 const std::vector
<std::string
> &values
,
134 const int /*prefix_group_size*/ = 1) {
135 builder
->reset(new BlockBuilder(1 /* restart interval */));
137 // Add only half of the keys
138 for (size_t i
= 0; i
< keys
.size(); ++i
) {
139 (*builder
)->Add(keys
[i
], values
[i
]);
141 Slice rawblock
= (*builder
)->Finish();
143 BlockContents contents
;
144 contents
.data
= rawblock
;
149 void CheckBlockContents(BlockContents contents
, const int max_key
,
150 const std::vector
<std::string
> &keys
,
151 const std::vector
<std::string
> &values
) {
152 const size_t prefix_size
= 6;
153 // create block reader
154 BlockContents
contents_ref(contents
.data
);
155 Block
reader1(std::move(contents
));
156 Block
reader2(std::move(contents_ref
));
158 std::unique_ptr
<const SliceTransform
> prefix_extractor(
159 NewFixedPrefixTransform(prefix_size
));
161 std::unique_ptr
<InternalIterator
> regular_iter(reader2
.NewDataIterator(
162 BytewiseComparator(), kDisableGlobalSequenceNumber
));
164 // Seek existent keys
165 for (size_t i
= 0; i
< keys
.size(); i
++) {
166 regular_iter
->Seek(keys
[i
]);
167 ASSERT_OK(regular_iter
->status());
168 ASSERT_TRUE(regular_iter
->Valid());
170 Slice v
= regular_iter
->value();
171 ASSERT_EQ(v
.ToString().compare(values
[i
]), 0);
174 // Seek non-existent keys.
175 // For hash index, if no key with a given prefix is not found, iterator will
176 // simply be set as invalid; whereas the binary search based iterator will
177 // return the one that is closest.
178 for (int i
= 1; i
< max_key
- 1; i
+= 2) {
179 // `DataBlockIter` assumes its APIs receive only internal keys.
180 auto key
= GenerateInternalKey(i
, 0, 0, nullptr);
181 regular_iter
->Seek(key
);
182 ASSERT_TRUE(regular_iter
->Valid());
186 // In this test case, no two key share same prefix.
187 TEST_F(BlockTest
, SimpleIndexHash
) {
188 const int kMaxKey
= 100000;
189 std::vector
<std::string
> keys
;
190 std::vector
<std::string
> values
;
191 GenerateRandomKVs(&keys
, &values
, 0 /* first key id */,
192 kMaxKey
/* last key id */, 2 /* step */,
193 8 /* padding size (8 bytes randomly generated suffix) */);
195 std::unique_ptr
<BlockBuilder
> builder
;
196 auto contents
= GetBlockContents(&builder
, keys
, values
);
198 CheckBlockContents(std::move(contents
), kMaxKey
, keys
, values
);
201 TEST_F(BlockTest
, IndexHashWithSharedPrefix
) {
202 const int kMaxKey
= 100000;
203 // for each prefix, there will be 5 keys starts with it.
204 const int kPrefixGroup
= 5;
205 std::vector
<std::string
> keys
;
206 std::vector
<std::string
> values
;
207 // Generate keys with same prefix.
208 GenerateRandomKVs(&keys
, &values
, 0, // first key id
209 kMaxKey
, // last key id
214 std::unique_ptr
<BlockBuilder
> builder
;
215 auto contents
= GetBlockContents(&builder
, keys
, values
, kPrefixGroup
);
217 CheckBlockContents(std::move(contents
), kMaxKey
, keys
, values
);
220 // A slow and accurate version of BlockReadAmpBitmap that simply store
221 // all the marked ranges in a set.
222 class BlockReadAmpBitmapSlowAndAccurate
{
224 void Mark(size_t start_offset
, size_t end_offset
) {
225 assert(end_offset
>= start_offset
);
226 marked_ranges_
.emplace(end_offset
, start_offset
);
229 void ResetCheckSequence() { iter_valid_
= false; }
231 // Return true if any byte in this range was Marked
232 // This does linear search from the previous position. When calling
233 // multiple times, `offset` needs to be incremental to get correct results.
234 // Call ResetCheckSequence() to reset it.
235 bool IsPinMarked(size_t offset
) {
237 // Has existing iterator, try linear search from
239 for (int i
= 0; i
< 64; i
++) {
240 if (offset
< iter_
->second
) {
243 if (offset
<= iter_
->first
) {
248 if (iter_
== marked_ranges_
.end()) {
254 // Initial call or have linear searched too many times.
256 iter_
= marked_ranges_
.lower_bound(
257 std::make_pair(offset
, static_cast<size_t>(0)));
258 if (iter_
== marked_ranges_
.end()) {
263 return offset
<= iter_
->first
&& offset
>= iter_
->second
;
267 std::set
<std::pair
<size_t, size_t>> marked_ranges_
;
268 std::set
<std::pair
<size_t, size_t>>::iterator iter_
;
269 bool iter_valid_
= false;
272 TEST_F(BlockTest
, BlockReadAmpBitmap
) {
273 uint32_t pin_offset
= 0;
274 SyncPoint::GetInstance()->SetCallBack(
275 "BlockReadAmpBitmap:rnd", [&pin_offset
](void *arg
) {
276 pin_offset
= *(static_cast<uint32_t *>(arg
));
278 SyncPoint::GetInstance()->EnableProcessing();
279 std::vector
<size_t> block_sizes
= {
289 1024 * 1024 * 4, // 5 MB
293 const size_t kBytesPerBit
= 64;
296 for (size_t block_size
: block_sizes
) {
297 std::shared_ptr
<Statistics
> stats
= ROCKSDB_NAMESPACE::CreateDBStatistics();
298 BlockReadAmpBitmap
read_amp_bitmap(block_size
, kBytesPerBit
, stats
.get());
299 BlockReadAmpBitmapSlowAndAccurate read_amp_slow_and_accurate
;
301 size_t needed_bits
= (block_size
/ kBytesPerBit
);
302 if (block_size
% kBytesPerBit
!= 0) {
306 ASSERT_EQ(stats
->getTickerCount(READ_AMP_TOTAL_READ_BYTES
), block_size
);
308 // Generate some random entries
309 std::vector
<size_t> random_entry_offsets
;
310 for (int i
= 0; i
< 1000; i
++) {
311 random_entry_offsets
.push_back(rnd
.Next() % block_size
);
313 std::sort(random_entry_offsets
.begin(), random_entry_offsets
.end());
315 std::unique(random_entry_offsets
.begin(), random_entry_offsets
.end());
316 random_entry_offsets
.resize(
317 std::distance(random_entry_offsets
.begin(), it
));
319 std::vector
<std::pair
<size_t, size_t>> random_entries
;
320 for (size_t i
= 0; i
< random_entry_offsets
.size(); i
++) {
321 size_t entry_start
= random_entry_offsets
[i
];
323 if (i
+ 1 < random_entry_offsets
.size()) {
324 entry_end
= random_entry_offsets
[i
+ 1] - 1;
326 entry_end
= block_size
- 1;
328 random_entries
.emplace_back(entry_start
, entry_end
);
331 for (size_t i
= 0; i
< random_entries
.size(); i
++) {
332 read_amp_slow_and_accurate
.ResetCheckSequence();
333 auto ¤t_entry
= random_entries
[rnd
.Next() % random_entries
.size()];
335 read_amp_bitmap
.Mark(static_cast<uint32_t>(current_entry
.first
),
336 static_cast<uint32_t>(current_entry
.second
));
337 read_amp_slow_and_accurate
.Mark(current_entry
.first
,
338 current_entry
.second
);
340 size_t total_bits
= 0;
341 for (size_t bit_idx
= 0; bit_idx
< needed_bits
; bit_idx
++) {
342 total_bits
+= read_amp_slow_and_accurate
.IsPinMarked(
343 bit_idx
* kBytesPerBit
+ pin_offset
);
345 size_t expected_estimate_useful
= total_bits
* kBytesPerBit
;
346 size_t got_estimate_useful
=
347 stats
->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES
);
348 ASSERT_EQ(expected_estimate_useful
, got_estimate_useful
);
351 SyncPoint::GetInstance()->DisableProcessing();
352 SyncPoint::GetInstance()->ClearAllCallBacks();
355 TEST_F(BlockTest
, BlockWithReadAmpBitmap
) {
357 Options options
= Options();
359 std::vector
<std::string
> keys
;
360 std::vector
<std::string
> values
;
361 BlockBuilder
builder(16);
362 int num_records
= 10000;
364 GenerateRandomKVs(&keys
, &values
, 0, num_records
, 1);
365 // add a bunch of records to a block
366 for (int i
= 0; i
< num_records
; i
++) {
367 builder
.Add(keys
[i
], values
[i
]);
370 Slice rawblock
= builder
.Finish();
371 const size_t kBytesPerBit
= 8;
373 // Read the block sequentially using Next()
375 std::shared_ptr
<Statistics
> stats
= ROCKSDB_NAMESPACE::CreateDBStatistics();
377 // create block reader
378 BlockContents contents
;
379 contents
.data
= rawblock
;
380 Block
reader(std::move(contents
), kBytesPerBit
, stats
.get());
382 // read contents of block sequentially
383 size_t read_bytes
= 0;
384 DataBlockIter
*iter
= reader
.NewDataIterator(
385 options
.comparator
, kDisableGlobalSequenceNumber
, nullptr, stats
.get());
386 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
388 read_bytes
+= iter
->TEST_CurrentEntrySize();
390 double semi_acc_read_amp
=
391 static_cast<double>(read_bytes
) / rawblock
.size();
392 double read_amp
= static_cast<double>(stats
->getTickerCount(
393 READ_AMP_ESTIMATE_USEFUL_BYTES
)) /
394 stats
->getTickerCount(READ_AMP_TOTAL_READ_BYTES
);
396 // Error in read amplification will be less than 1% if we are reading
398 double error_pct
= fabs(semi_acc_read_amp
- read_amp
) * 100;
399 EXPECT_LT(error_pct
, 1);
405 // Read the block sequentially using Seek()
407 std::shared_ptr
<Statistics
> stats
= ROCKSDB_NAMESPACE::CreateDBStatistics();
409 // create block reader
410 BlockContents contents
;
411 contents
.data
= rawblock
;
412 Block
reader(std::move(contents
), kBytesPerBit
, stats
.get());
414 size_t read_bytes
= 0;
415 DataBlockIter
*iter
= reader
.NewDataIterator(
416 options
.comparator
, kDisableGlobalSequenceNumber
, nullptr, stats
.get());
417 for (int i
= 0; i
< num_records
; i
++) {
420 // search in block for this key
423 read_bytes
+= iter
->TEST_CurrentEntrySize();
425 double semi_acc_read_amp
=
426 static_cast<double>(read_bytes
) / rawblock
.size();
427 double read_amp
= static_cast<double>(stats
->getTickerCount(
428 READ_AMP_ESTIMATE_USEFUL_BYTES
)) /
429 stats
->getTickerCount(READ_AMP_TOTAL_READ_BYTES
);
431 // Error in read amplification will be less than 1% if we are reading
433 double error_pct
= fabs(semi_acc_read_amp
- read_amp
) * 100;
434 EXPECT_LT(error_pct
, 1);
439 // Read the block randomly
441 std::shared_ptr
<Statistics
> stats
= ROCKSDB_NAMESPACE::CreateDBStatistics();
443 // create block reader
444 BlockContents contents
;
445 contents
.data
= rawblock
;
446 Block
reader(std::move(contents
), kBytesPerBit
, stats
.get());
448 size_t read_bytes
= 0;
449 DataBlockIter
*iter
= reader
.NewDataIterator(
450 options
.comparator
, kDisableGlobalSequenceNumber
, nullptr, stats
.get());
451 std::unordered_set
<int> read_keys
;
452 for (int i
= 0; i
< num_records
; i
++) {
453 int index
= rnd
.Uniform(num_records
);
454 Slice
k(keys
[index
]);
458 if (read_keys
.find(index
) == read_keys
.end()) {
459 read_keys
.insert(index
);
460 read_bytes
+= iter
->TEST_CurrentEntrySize();
463 double semi_acc_read_amp
=
464 static_cast<double>(read_bytes
) / rawblock
.size();
465 double read_amp
= static_cast<double>(stats
->getTickerCount(
466 READ_AMP_ESTIMATE_USEFUL_BYTES
)) /
467 stats
->getTickerCount(READ_AMP_TOTAL_READ_BYTES
);
469 double error_pct
= fabs(semi_acc_read_amp
- read_amp
) * 100;
470 // Error in read amplification will be less than 2% if we are reading
472 EXPECT_LT(error_pct
, 2);
478 TEST_F(BlockTest
, ReadAmpBitmapPow2
) {
479 std::shared_ptr
<Statistics
> stats
= ROCKSDB_NAMESPACE::CreateDBStatistics();
480 ASSERT_EQ(BlockReadAmpBitmap(100, 1, stats
.get()).GetBytesPerBit(), 1u);
481 ASSERT_EQ(BlockReadAmpBitmap(100, 2, stats
.get()).GetBytesPerBit(), 2u);
482 ASSERT_EQ(BlockReadAmpBitmap(100, 4, stats
.get()).GetBytesPerBit(), 4u);
483 ASSERT_EQ(BlockReadAmpBitmap(100, 8, stats
.get()).GetBytesPerBit(), 8u);
484 ASSERT_EQ(BlockReadAmpBitmap(100, 16, stats
.get()).GetBytesPerBit(), 16u);
485 ASSERT_EQ(BlockReadAmpBitmap(100, 32, stats
.get()).GetBytesPerBit(), 32u);
487 ASSERT_EQ(BlockReadAmpBitmap(100, 3, stats
.get()).GetBytesPerBit(), 2u);
488 ASSERT_EQ(BlockReadAmpBitmap(100, 7, stats
.get()).GetBytesPerBit(), 4u);
489 ASSERT_EQ(BlockReadAmpBitmap(100, 11, stats
.get()).GetBytesPerBit(), 8u);
490 ASSERT_EQ(BlockReadAmpBitmap(100, 17, stats
.get()).GetBytesPerBit(), 16u);
491 ASSERT_EQ(BlockReadAmpBitmap(100, 33, stats
.get()).GetBytesPerBit(), 32u);
492 ASSERT_EQ(BlockReadAmpBitmap(100, 35, stats
.get()).GetBytesPerBit(), 32u);
496 : public testing::Test
,
497 public testing::WithParamInterface
<std::tuple
<bool, bool>> {
499 IndexBlockTest() = default;
501 bool useValueDeltaEncoding() const { return std::get
<0>(GetParam()); }
502 bool includeFirstKey() const { return std::get
<1>(GetParam()); }
505 // Similar to GenerateRandomKVs but for index block contents.
506 void GenerateRandomIndexEntries(std::vector
<std::string
> *separators
,
507 std::vector
<BlockHandle
> *block_handles
,
508 std::vector
<std::string
> *first_keys
,
512 // For each of `len` blocks, we need to generate a first and last key.
513 // Let's generate n*2 random keys, sort them, group into consecutive pairs.
514 std::set
<std::string
> keys
;
515 while ((int)keys
.size() < len
* 2) {
516 // Keys need to be at least 8 bytes long to look like internal keys.
517 keys
.insert(test::RandomKey(&rnd
, 12));
521 for (auto it
= keys
.begin(); it
!= keys
.end();) {
522 first_keys
->emplace_back(*it
++);
523 separators
->emplace_back(*it
++);
524 uint64_t size
= rnd
.Uniform(1024 * 16);
525 BlockHandle
handle(offset
, size
);
526 offset
+= size
+ BlockBasedTable::kBlockTrailerSize
;
527 block_handles
->emplace_back(handle
);
531 TEST_P(IndexBlockTest
, IndexValueEncodingTest
) {
533 Options options
= Options();
535 std::vector
<std::string
> separators
;
536 std::vector
<BlockHandle
> block_handles
;
537 std::vector
<std::string
> first_keys
;
538 const bool kUseDeltaEncoding
= true;
539 BlockBuilder
builder(16, kUseDeltaEncoding
, useValueDeltaEncoding());
540 int num_records
= 100;
542 GenerateRandomIndexEntries(&separators
, &block_handles
, &first_keys
,
544 BlockHandle last_encoded_handle
;
545 for (int i
= 0; i
< num_records
; i
++) {
546 IndexValue
entry(block_handles
[i
], first_keys
[i
]);
547 std::string encoded_entry
;
548 std::string delta_encoded_entry
;
549 entry
.EncodeTo(&encoded_entry
, includeFirstKey(), nullptr);
550 if (useValueDeltaEncoding() && i
> 0) {
551 entry
.EncodeTo(&delta_encoded_entry
, includeFirstKey(),
552 &last_encoded_handle
);
554 last_encoded_handle
= entry
.handle
;
555 const Slice
delta_encoded_entry_slice(delta_encoded_entry
);
556 builder
.Add(separators
[i
], encoded_entry
, &delta_encoded_entry_slice
);
559 // read serialized contents of the block
560 Slice rawblock
= builder
.Finish();
562 // create block reader
563 BlockContents contents
;
564 contents
.data
= rawblock
;
565 Block
reader(std::move(contents
));
567 const bool kTotalOrderSeek
= true;
568 const bool kIncludesSeq
= true;
569 const bool kValueIsFull
= !useValueDeltaEncoding();
570 IndexBlockIter
*kNullIter
= nullptr;
571 Statistics
*kNullStats
= nullptr;
572 // read contents of block sequentially
573 InternalIteratorBase
<IndexValue
> *iter
= reader
.NewIndexIterator(
574 options
.comparator
, kDisableGlobalSequenceNumber
, kNullIter
, kNullStats
,
575 kTotalOrderSeek
, includeFirstKey(), kIncludesSeq
, kValueIsFull
);
577 for (int index
= 0; index
< num_records
; ++index
) {
578 ASSERT_TRUE(iter
->Valid());
580 Slice k
= iter
->key();
581 IndexValue v
= iter
->value();
583 EXPECT_EQ(separators
[index
], k
.ToString());
584 EXPECT_EQ(block_handles
[index
].offset(), v
.handle
.offset());
585 EXPECT_EQ(block_handles
[index
].size(), v
.handle
.size());
586 EXPECT_EQ(includeFirstKey() ? first_keys
[index
] : "",
587 v
.first_internal_key
.ToString());
593 // read block contents randomly
594 iter
= reader
.NewIndexIterator(
595 options
.comparator
, kDisableGlobalSequenceNumber
, kNullIter
, kNullStats
,
596 kTotalOrderSeek
, includeFirstKey(), kIncludesSeq
, kValueIsFull
);
597 for (int i
= 0; i
< num_records
* 2; i
++) {
598 // find a random key in the lookaside array
599 int index
= rnd
.Uniform(num_records
);
600 Slice
k(separators
[index
]);
602 // search in block for this key
604 ASSERT_TRUE(iter
->Valid());
605 IndexValue v
= iter
->value();
606 EXPECT_EQ(separators
[index
], iter
->key().ToString());
607 EXPECT_EQ(block_handles
[index
].offset(), v
.handle
.offset());
608 EXPECT_EQ(block_handles
[index
].size(), v
.handle
.size());
609 EXPECT_EQ(includeFirstKey() ? first_keys
[index
] : "",
610 v
.first_internal_key
.ToString());
615 INSTANTIATE_TEST_CASE_P(P
, IndexBlockTest
,
616 ::testing::Values(std::make_tuple(false, false),
617 std::make_tuple(false, true),
618 std::make_tuple(true, false),
619 std::make_tuple(true, true)));
621 } // namespace ROCKSDB_NAMESPACE
623 int main(int argc
, char **argv
) {
624 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
625 ::testing::InitGoogleTest(&argc
, argv
);
626 return RUN_ALL_TESTS();