+++ /dev/null
-// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
-// This source code is licensed under both the GPLv2 (found in the
-// COPYING file in the root directory) and Apache 2.0 License
-// (found in the LICENSE.Apache file in the root directory).
-//
-#include <stdio.h>
-#include <algorithm>
-#include <set>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "db/dbformat.h"
-#include "db/memtable.h"
-#include "db/write_batch_internal.h"
-#include "rocksdb/db.h"
-#include "rocksdb/env.h"
-#include "rocksdb/iterator.h"
-#include "rocksdb/slice_transform.h"
-#include "rocksdb/table.h"
-#include "table/block.h"
-#include "table/block_builder.h"
-#include "table/format.h"
-#include "util/random.h"
-#include "util/testharness.h"
-#include "util/testutil.h"
-
-namespace rocksdb {
-
-static std::string RandomString(Random *rnd, int len) {
- std::string r;
- test::RandomString(rnd, len, &r);
- return r;
-}
-std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
- Random *rnd) {
- char buf[50];
- char *p = &buf[0];
- snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
- std::string k(p);
- if (padding_size) {
- k += RandomString(rnd, padding_size);
- }
-
- return k;
-}
-
-// Generate random key value pairs.
-// The generated key will be sorted. You can tune the parameters to generated
-// different kinds of test key/value pairs for different scenario.
-void GenerateRandomKVs(std::vector<std::string> *keys,
- std::vector<std::string> *values, const int from,
- const int len, const int step = 1,
- const int padding_size = 0,
- const int keys_share_prefix = 1) {
- Random rnd(302);
-
- // generate different prefix
- for (int i = from; i < from + len; i += step) {
- // generating keys that shares the prefix
- for (int j = 0; j < keys_share_prefix; ++j) {
- keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
-
- // 100 bytes values
- values->emplace_back(RandomString(&rnd, 100));
- }
- }
-}
-
-// Same as GenerateRandomKVs but the values are BlockHandle
-void GenerateRandomKBHs(std::vector<std::string> *keys,
- std::vector<BlockHandle> *values, const int from,
- const int len, const int step = 1,
- const int padding_size = 0,
- const int keys_share_prefix = 1) {
- Random rnd(302);
- uint64_t offset = 0;
-
- // generate different prefix
- for (int i = from; i < from + len; i += step) {
- // generate keys that shares the prefix
- for (int j = 0; j < keys_share_prefix; ++j) {
- keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
-
- uint64_t size = rnd.Uniform(1024 * 16);
- BlockHandle handle(offset, size);
- offset += size + kBlockTrailerSize;
- values->emplace_back(handle);
- }
- }
-}
-
-class BlockTest : public testing::Test {};
-
-// block test
-TEST_F(BlockTest, SimpleTest) {
- Random rnd(301);
- Options options = Options();
- std::unique_ptr<InternalKeyComparator> ic;
- ic.reset(new test::PlainInternalKeyComparator(options.comparator));
-
- std::vector<std::string> keys;
- std::vector<std::string> values;
- BlockBuilder builder(16);
- int num_records = 100000;
-
- GenerateRandomKVs(&keys, &values, 0, num_records);
- // add a bunch of records to a block
- for (int i = 0; i < num_records; i++) {
- builder.Add(keys[i], values[i]);
- }
-
- // read serialized contents of the block
- Slice rawblock = builder.Finish();
-
- // create block reader
- BlockContents contents;
- contents.data = rawblock;
- Block reader(std::move(contents), kDisableGlobalSequenceNumber);
-
- // read contents of block sequentially
- int count = 0;
- InternalIterator *iter =
- reader.NewIterator<DataBlockIter>(options.comparator, options.comparator);
- for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
- // read kv from block
- Slice k = iter->key();
- Slice v = iter->value();
-
- // compare with lookaside array
- ASSERT_EQ(k.ToString().compare(keys[count]), 0);
- ASSERT_EQ(v.ToString().compare(values[count]), 0);
- }
- delete iter;
-
- // read block contents randomly
- iter =
- reader.NewIterator<DataBlockIter>(options.comparator, options.comparator);
- for (int i = 0; i < num_records; i++) {
- // find a random key in the lookaside array
- int index = rnd.Uniform(num_records);
- Slice k(keys[index]);
-
- // search in block for this key
- iter->Seek(k);
- ASSERT_TRUE(iter->Valid());
- Slice v = iter->value();
- ASSERT_EQ(v.ToString().compare(values[index]), 0);
- }
- delete iter;
-}
-
-TEST_F(BlockTest, ValueDeltaEncodingTest) {
- Random rnd(301);
- Options options = Options();
- std::unique_ptr<InternalKeyComparator> ic;
- ic.reset(new test::PlainInternalKeyComparator(options.comparator));
-
- std::vector<std::string> keys;
- std::vector<BlockHandle> values;
- const bool kUseDeltaEncoding = true;
- const bool kUseValueDeltaEncoding = true;
- BlockBuilder builder(16, kUseDeltaEncoding, kUseValueDeltaEncoding);
- int num_records = 100;
-
- GenerateRandomKBHs(&keys, &values, 0, num_records);
- // add a bunch of records to a block
- BlockHandle last_encoded_handle;
- for (int i = 0; i < num_records; i++) {
- auto block_handle = values[i];
- std::string handle_encoding;
- block_handle.EncodeTo(&handle_encoding);
- std::string handle_delta_encoding;
- PutVarsignedint64(&handle_delta_encoding,
- block_handle.size() - last_encoded_handle.size());
- last_encoded_handle = block_handle;
- const Slice handle_delta_encoding_slice(handle_delta_encoding);
- builder.Add(keys[i], handle_encoding, &handle_delta_encoding_slice);
- }
-
- // read serialized contents of the block
- Slice rawblock = builder.Finish();
-
- // create block reader
- BlockContents contents;
- contents.data = rawblock;
- Block reader(std::move(contents), kDisableGlobalSequenceNumber);
-
- const bool kTotalOrderSeek = true;
- const bool kIncludesSeq = true;
- const bool kValueIsFull = !kUseValueDeltaEncoding;
- IndexBlockIter *kNullIter = nullptr;
- Statistics *kNullStats = nullptr;
- // read contents of block sequentially
- int count = 0;
- InternalIteratorBase<BlockHandle> *iter = reader.NewIterator<IndexBlockIter>(
- options.comparator, options.comparator, kNullIter, kNullStats,
- kTotalOrderSeek, kIncludesSeq, kValueIsFull);
- for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
- // read kv from block
- Slice k = iter->key();
- BlockHandle handle = iter->value();
-
- // compare with lookaside array
- ASSERT_EQ(k.ToString().compare(keys[count]), 0);
-
- ASSERT_EQ(values[count].offset(), handle.offset());
- ASSERT_EQ(values[count].size(), handle.size());
- }
- delete iter;
-
- // read block contents randomly
- iter = reader.NewIterator<IndexBlockIter>(
- options.comparator, options.comparator, kNullIter, kNullStats,
- kTotalOrderSeek, kIncludesSeq, kValueIsFull);
- for (int i = 0; i < num_records; i++) {
- // find a random key in the lookaside array
- int index = rnd.Uniform(num_records);
- Slice k(keys[index]);
-
- // search in block for this key
- iter->Seek(k);
- ASSERT_TRUE(iter->Valid());
- BlockHandle handle = iter->value();
- ASSERT_EQ(values[index].offset(), handle.offset());
- ASSERT_EQ(values[index].size(), handle.size());
- }
- delete iter;
-}
-// return the block contents
-BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
- const std::vector<std::string> &keys,
- const std::vector<std::string> &values,
- const int /*prefix_group_size*/ = 1) {
- builder->reset(new BlockBuilder(1 /* restart interval */));
-
- // Add only half of the keys
- for (size_t i = 0; i < keys.size(); ++i) {
- (*builder)->Add(keys[i], values[i]);
- }
- Slice rawblock = (*builder)->Finish();
-
- BlockContents contents;
- contents.data = rawblock;
-
- return contents;
-}
-
-void CheckBlockContents(BlockContents contents, const int max_key,
- const std::vector<std::string> &keys,
- const std::vector<std::string> &values) {
- const size_t prefix_size = 6;
- // create block reader
- BlockContents contents_ref(contents.data);
- Block reader1(std::move(contents), kDisableGlobalSequenceNumber);
- Block reader2(std::move(contents_ref), kDisableGlobalSequenceNumber);
-
- std::unique_ptr<const SliceTransform> prefix_extractor(
- NewFixedPrefixTransform(prefix_size));
-
- std::unique_ptr<InternalIterator> regular_iter(
- reader2.NewIterator<DataBlockIter>(BytewiseComparator(),
- BytewiseComparator()));
-
- // Seek existent keys
- for (size_t i = 0; i < keys.size(); i++) {
- regular_iter->Seek(keys[i]);
- ASSERT_OK(regular_iter->status());
- ASSERT_TRUE(regular_iter->Valid());
-
- Slice v = regular_iter->value();
- ASSERT_EQ(v.ToString().compare(values[i]), 0);
- }
-
- // Seek non-existent keys.
- // For hash index, if no key with a given prefix is not found, iterator will
- // simply be set as invalid; whereas the binary search based iterator will
- // return the one that is closest.
- for (int i = 1; i < max_key - 1; i += 2) {
- auto key = GenerateKey(i, 0, 0, nullptr);
- regular_iter->Seek(key);
- ASSERT_TRUE(regular_iter->Valid());
- }
-}
-
-// In this test case, no two key share same prefix.
-TEST_F(BlockTest, SimpleIndexHash) {
- const int kMaxKey = 100000;
- std::vector<std::string> keys;
- std::vector<std::string> values;
- GenerateRandomKVs(&keys, &values, 0 /* first key id */,
- kMaxKey /* last key id */, 2 /* step */,
- 8 /* padding size (8 bytes randomly generated suffix) */);
-
- std::unique_ptr<BlockBuilder> builder;
- auto contents = GetBlockContents(&builder, keys, values);
-
- CheckBlockContents(std::move(contents), kMaxKey, keys, values);
-}
-
-TEST_F(BlockTest, IndexHashWithSharedPrefix) {
- const int kMaxKey = 100000;
- // for each prefix, there will be 5 keys starts with it.
- const int kPrefixGroup = 5;
- std::vector<std::string> keys;
- std::vector<std::string> values;
- // Generate keys with same prefix.
- GenerateRandomKVs(&keys, &values, 0, // first key id
- kMaxKey, // last key id
- 2, // step
- 10, // padding size,
- kPrefixGroup);
-
- std::unique_ptr<BlockBuilder> builder;
- auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup);
-
- CheckBlockContents(std::move(contents), kMaxKey, keys, values);
-}
-
-// A slow and accurate version of BlockReadAmpBitmap that simply store
-// all the marked ranges in a set.
-class BlockReadAmpBitmapSlowAndAccurate {
- public:
- void Mark(size_t start_offset, size_t end_offset) {
- assert(end_offset >= start_offset);
- marked_ranges_.emplace(end_offset, start_offset);
- }
-
- void ResetCheckSequence() { iter_valid_ = false; }
-
- // Return true if any byte in this range was Marked
- // This does linear search from the previous position. When calling
- // multiple times, `offset` needs to be incremental to get correct results.
- // Call ResetCheckSequence() to reset it.
- bool IsPinMarked(size_t offset) {
- if (iter_valid_) {
- // Has existing iterator, try linear search from
- // the iterator.
- for (int i = 0; i < 64; i++) {
- if (offset < iter_->second) {
- return false;
- }
- if (offset <= iter_->first) {
- return true;
- }
-
- iter_++;
- if (iter_ == marked_ranges_.end()) {
- iter_valid_ = false;
- return false;
- }
- }
- }
- // Initial call or have linear searched too many times.
- // Do binary search.
- iter_ = marked_ranges_.lower_bound(
- std::make_pair(offset, static_cast<size_t>(0)));
- if (iter_ == marked_ranges_.end()) {
- iter_valid_ = false;
- return false;
- }
- iter_valid_ = true;
- return offset <= iter_->first && offset >= iter_->second;
- }
-
- private:
- std::set<std::pair<size_t, size_t>> marked_ranges_;
- std::set<std::pair<size_t, size_t>>::iterator iter_;
- bool iter_valid_ = false;
-};
-
-TEST_F(BlockTest, BlockReadAmpBitmap) {
- uint32_t pin_offset = 0;
- SyncPoint::GetInstance()->SetCallBack(
- "BlockReadAmpBitmap:rnd", [&pin_offset](void *arg) {
- pin_offset = *(static_cast<uint32_t *>(arg));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- std::vector<size_t> block_sizes = {
- 1, // 1 byte
- 32, // 32 bytes
- 61, // 61 bytes
- 64, // 64 bytes
- 512, // 0.5 KB
- 1024, // 1 KB
- 1024 * 4, // 4 KB
- 1024 * 10, // 10 KB
- 1024 * 50, // 50 KB
- 1024 * 1024 * 4, // 5 MB
- 777,
- 124653,
- };
- const size_t kBytesPerBit = 64;
-
- Random rnd(301);
- for (size_t block_size : block_sizes) {
- std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics();
- BlockReadAmpBitmap read_amp_bitmap(block_size, kBytesPerBit, stats.get());
- BlockReadAmpBitmapSlowAndAccurate read_amp_slow_and_accurate;
-
- size_t needed_bits = (block_size / kBytesPerBit);
- if (block_size % kBytesPerBit != 0) {
- needed_bits++;
- }
-
- ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), block_size);
-
- // Generate some random entries
- std::vector<size_t> random_entry_offsets;
- for (int i = 0; i < 1000; i++) {
- random_entry_offsets.push_back(rnd.Next() % block_size);
- }
- std::sort(random_entry_offsets.begin(), random_entry_offsets.end());
- auto it =
- std::unique(random_entry_offsets.begin(), random_entry_offsets.end());
- random_entry_offsets.resize(
- std::distance(random_entry_offsets.begin(), it));
-
- std::vector<std::pair<size_t, size_t>> random_entries;
- for (size_t i = 0; i < random_entry_offsets.size(); i++) {
- size_t entry_start = random_entry_offsets[i];
- size_t entry_end;
- if (i + 1 < random_entry_offsets.size()) {
- entry_end = random_entry_offsets[i + 1] - 1;
- } else {
- entry_end = block_size - 1;
- }
- random_entries.emplace_back(entry_start, entry_end);
- }
-
- for (size_t i = 0; i < random_entries.size(); i++) {
- read_amp_slow_and_accurate.ResetCheckSequence();
- auto ¤t_entry = random_entries[rnd.Next() % random_entries.size()];
-
- read_amp_bitmap.Mark(static_cast<uint32_t>(current_entry.first),
- static_cast<uint32_t>(current_entry.second));
- read_amp_slow_and_accurate.Mark(current_entry.first,
- current_entry.second);
-
- size_t total_bits = 0;
- for (size_t bit_idx = 0; bit_idx < needed_bits; bit_idx++) {
- total_bits += read_amp_slow_and_accurate.IsPinMarked(
- bit_idx * kBytesPerBit + pin_offset);
- }
- size_t expected_estimate_useful = total_bits * kBytesPerBit;
- size_t got_estimate_useful =
- stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
- ASSERT_EQ(expected_estimate_useful, got_estimate_useful);
- }
- }
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
-}
-
-TEST_F(BlockTest, BlockWithReadAmpBitmap) {
- Random rnd(301);
- Options options = Options();
- std::unique_ptr<InternalKeyComparator> ic;
- ic.reset(new test::PlainInternalKeyComparator(options.comparator));
-
- std::vector<std::string> keys;
- std::vector<std::string> values;
- BlockBuilder builder(16);
- int num_records = 10000;
-
- GenerateRandomKVs(&keys, &values, 0, num_records, 1);
- // add a bunch of records to a block
- for (int i = 0; i < num_records; i++) {
- builder.Add(keys[i], values[i]);
- }
-
- Slice rawblock = builder.Finish();
- const size_t kBytesPerBit = 8;
-
- // Read the block sequentially using Next()
- {
- std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics();
-
- // create block reader
- BlockContents contents;
- contents.data = rawblock;
- Block reader(std::move(contents), kDisableGlobalSequenceNumber,
- kBytesPerBit, stats.get());
-
- // read contents of block sequentially
- size_t read_bytes = 0;
- DataBlockIter *iter =
- static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
- options.comparator, options.comparator, nullptr, stats.get()));
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- iter->value();
- read_bytes += iter->TEST_CurrentEntrySize();
-
- double semi_acc_read_amp =
- static_cast<double>(read_bytes) / rawblock.size();
- double read_amp = static_cast<double>(stats->getTickerCount(
- READ_AMP_ESTIMATE_USEFUL_BYTES)) /
- stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
-
- // Error in read amplification will be less than 1% if we are reading
- // sequentially
- double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
- EXPECT_LT(error_pct, 1);
- }
-
- delete iter;
- }
-
- // Read the block sequentially using Seek()
- {
- std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics();
-
- // create block reader
- BlockContents contents;
- contents.data = rawblock;
- Block reader(std::move(contents), kDisableGlobalSequenceNumber,
- kBytesPerBit, stats.get());
-
- size_t read_bytes = 0;
- DataBlockIter *iter =
- static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
- options.comparator, options.comparator, nullptr, stats.get()));
- for (int i = 0; i < num_records; i++) {
- Slice k(keys[i]);
-
- // search in block for this key
- iter->Seek(k);
- iter->value();
- read_bytes += iter->TEST_CurrentEntrySize();
-
- double semi_acc_read_amp =
- static_cast<double>(read_bytes) / rawblock.size();
- double read_amp = static_cast<double>(stats->getTickerCount(
- READ_AMP_ESTIMATE_USEFUL_BYTES)) /
- stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
-
- // Error in read amplification will be less than 1% if we are reading
- // sequentially
- double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
- EXPECT_LT(error_pct, 1);
- }
- delete iter;
- }
-
- // Read the block randomly
- {
- std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics();
-
- // create block reader
- BlockContents contents;
- contents.data = rawblock;
- Block reader(std::move(contents), kDisableGlobalSequenceNumber,
- kBytesPerBit, stats.get());
-
- size_t read_bytes = 0;
- DataBlockIter *iter =
- static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
- options.comparator, options.comparator, nullptr, stats.get()));
- std::unordered_set<int> read_keys;
- for (int i = 0; i < num_records; i++) {
- int index = rnd.Uniform(num_records);
- Slice k(keys[index]);
-
- iter->Seek(k);
- iter->value();
- if (read_keys.find(index) == read_keys.end()) {
- read_keys.insert(index);
- read_bytes += iter->TEST_CurrentEntrySize();
- }
-
- double semi_acc_read_amp =
- static_cast<double>(read_bytes) / rawblock.size();
- double read_amp = static_cast<double>(stats->getTickerCount(
- READ_AMP_ESTIMATE_USEFUL_BYTES)) /
- stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
-
- double error_pct = fabs(semi_acc_read_amp - read_amp) * 100;
- // Error in read amplification will be less than 2% if we are reading
- // randomly
- EXPECT_LT(error_pct, 2);
- }
- delete iter;
- }
-}
-
-TEST_F(BlockTest, ReadAmpBitmapPow2) {
- std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics();
- ASSERT_EQ(BlockReadAmpBitmap(100, 1, stats.get()).GetBytesPerBit(), 1);
- ASSERT_EQ(BlockReadAmpBitmap(100, 2, stats.get()).GetBytesPerBit(), 2);
- ASSERT_EQ(BlockReadAmpBitmap(100, 4, stats.get()).GetBytesPerBit(), 4);
- ASSERT_EQ(BlockReadAmpBitmap(100, 8, stats.get()).GetBytesPerBit(), 8);
- ASSERT_EQ(BlockReadAmpBitmap(100, 16, stats.get()).GetBytesPerBit(), 16);
- ASSERT_EQ(BlockReadAmpBitmap(100, 32, stats.get()).GetBytesPerBit(), 32);
-
- ASSERT_EQ(BlockReadAmpBitmap(100, 3, stats.get()).GetBytesPerBit(), 2);
- ASSERT_EQ(BlockReadAmpBitmap(100, 7, stats.get()).GetBytesPerBit(), 4);
- ASSERT_EQ(BlockReadAmpBitmap(100, 11, stats.get()).GetBytesPerBit(), 8);
- ASSERT_EQ(BlockReadAmpBitmap(100, 17, stats.get()).GetBytesPerBit(), 16);
- ASSERT_EQ(BlockReadAmpBitmap(100, 33, stats.get()).GetBytesPerBit(), 32);
- ASSERT_EQ(BlockReadAmpBitmap(100, 35, stats.get()).GetBytesPerBit(), 32);
-}
-
-} // namespace rocksdb
-
-int main(int argc, char **argv) {
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}