]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/table_test.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / table / table_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <stdio.h>
11
12 #include <algorithm>
13 #include <iostream>
14 #include <map>
15 #include <memory>
16 #include <string>
17 #include <vector>
18
19 #include "cache/lru_cache.h"
20 #include "db/dbformat.h"
21 #include "db/memtable.h"
22 #include "db/write_batch_internal.h"
23 #include "memtable/stl_wrappers.h"
24 #include "monitoring/statistics.h"
25 #include "port/port.h"
26 #include "rocksdb/cache.h"
27 #include "rocksdb/db.h"
28 #include "rocksdb/env.h"
29 #include "rocksdb/iterator.h"
30 #include "rocksdb/memtablerep.h"
31 #include "rocksdb/perf_context.h"
32 #include "rocksdb/slice_transform.h"
33 #include "rocksdb/statistics.h"
34 #include "rocksdb/write_buffer_manager.h"
35 #include "table/block.h"
36 #include "table/block_based_table_builder.h"
37 #include "table/block_based_table_factory.h"
38 #include "table/block_based_table_reader.h"
39 #include "table/block_builder.h"
40 #include "table/block_fetcher.h"
41 #include "table/format.h"
42 #include "table/get_context.h"
43 #include "table/internal_iterator.h"
44 #include "table/meta_blocks.h"
45 #include "table/plain_table_factory.h"
46 #include "table/scoped_arena_iterator.h"
47 #include "table/sst_file_writer_collectors.h"
48 #include "util/compression.h"
49 #include "util/random.h"
50 #include "util/string_util.h"
51 #include "util/sync_point.h"
52 #include "util/testharness.h"
53 #include "util/testutil.h"
54 #include "utilities/merge_operators.h"
55
56 namespace rocksdb {
57
58 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
59 extern const uint64_t kLegacyPlainTableMagicNumber;
60 extern const uint64_t kBlockBasedTableMagicNumber;
61 extern const uint64_t kPlainTableMagicNumber;
62
63 namespace {
64
65 // DummyPropertiesCollector used to test BlockBasedTableProperties
66 class DummyPropertiesCollector : public TablePropertiesCollector {
67 public:
68 const char* Name() const { return ""; }
69
70 Status Finish(UserCollectedProperties* /*properties*/) {
71 return Status::OK();
72 }
73
74 Status Add(const Slice& /*user_key*/, const Slice& /*value*/) {
75 return Status::OK();
76 }
77
78 virtual UserCollectedProperties GetReadableProperties() const {
79 return UserCollectedProperties{};
80 }
81 };
82
83 class DummyPropertiesCollectorFactory1
84 : public TablePropertiesCollectorFactory {
85 public:
86 virtual TablePropertiesCollector* CreateTablePropertiesCollector(
87 TablePropertiesCollectorFactory::Context /*context*/) {
88 return new DummyPropertiesCollector();
89 }
90 const char* Name() const { return "DummyPropertiesCollector1"; }
91 };
92
93 class DummyPropertiesCollectorFactory2
94 : public TablePropertiesCollectorFactory {
95 public:
96 virtual TablePropertiesCollector* CreateTablePropertiesCollector(
97 TablePropertiesCollectorFactory::Context /*context*/) {
98 return new DummyPropertiesCollector();
99 }
100 const char* Name() const { return "DummyPropertiesCollector2"; }
101 };
102
103 // Return reverse of "key".
104 // Used to test non-lexicographic comparators.
105 std::string Reverse(const Slice& key) {
106 auto rev = key.ToString();
107 std::reverse(rev.begin(), rev.end());
108 return rev;
109 }
110
111 class ReverseKeyComparator : public Comparator {
112 public:
113 virtual const char* Name() const override {
114 return "rocksdb.ReverseBytewiseComparator";
115 }
116
117 virtual int Compare(const Slice& a, const Slice& b) const override {
118 return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
119 }
120
121 virtual void FindShortestSeparator(std::string* start,
122 const Slice& limit) const override {
123 std::string s = Reverse(*start);
124 std::string l = Reverse(limit);
125 BytewiseComparator()->FindShortestSeparator(&s, l);
126 *start = Reverse(s);
127 }
128
129 virtual void FindShortSuccessor(std::string* key) const override {
130 std::string s = Reverse(*key);
131 BytewiseComparator()->FindShortSuccessor(&s);
132 *key = Reverse(s);
133 }
134 };
135
136 ReverseKeyComparator reverse_key_comparator;
137
138 void Increment(const Comparator* cmp, std::string* key) {
139 if (cmp == BytewiseComparator()) {
140 key->push_back('\0');
141 } else {
142 assert(cmp == &reverse_key_comparator);
143 std::string rev = Reverse(*key);
144 rev.push_back('\0');
145 *key = Reverse(rev);
146 }
147 }
148
149 } // namespace
150
151 // Helper class for tests to unify the interface between
152 // BlockBuilder/TableBuilder and Block/Table.
153 class Constructor {
154 public:
155 explicit Constructor(const Comparator* cmp)
156 : data_(stl_wrappers::LessOfComparator(cmp)) {}
157 virtual ~Constructor() { }
158
159 void Add(const std::string& key, const Slice& value) {
160 data_[key] = value.ToString();
161 }
162
163 // Finish constructing the data structure with all the keys that have
164 // been added so far. Returns the keys in sorted order in "*keys"
165 // and stores the key/value pairs in "*kvmap"
166 void Finish(const Options& options, const ImmutableCFOptions& ioptions,
167 const MutableCFOptions& moptions,
168 const BlockBasedTableOptions& table_options,
169 const InternalKeyComparator& internal_comparator,
170 std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
171 last_internal_key_ = &internal_comparator;
172 *kvmap = data_;
173 keys->clear();
174 for (const auto& kv : data_) {
175 keys->push_back(kv.first);
176 }
177 data_.clear();
178 Status s = FinishImpl(options, ioptions, moptions, table_options,
179 internal_comparator, *kvmap);
180 ASSERT_TRUE(s.ok()) << s.ToString();
181 }
182
183 // Construct the data structure from the data in "data"
184 virtual Status FinishImpl(const Options& options,
185 const ImmutableCFOptions& ioptions,
186 const MutableCFOptions& moptions,
187 const BlockBasedTableOptions& table_options,
188 const InternalKeyComparator& internal_comparator,
189 const stl_wrappers::KVMap& data) = 0;
190
191 virtual InternalIterator* NewIterator(
192 const SliceTransform* prefix_extractor = nullptr) const = 0;
193
194 virtual const stl_wrappers::KVMap& data() { return data_; }
195
196 virtual bool IsArenaMode() const { return false; }
197
198 virtual DB* db() const { return nullptr; } // Overridden in DBConstructor
199
200 virtual bool AnywayDeleteIterator() const { return false; }
201
202 protected:
203 const InternalKeyComparator* last_internal_key_;
204
205 private:
206 stl_wrappers::KVMap data_;
207 };
208
209 class BlockConstructor: public Constructor {
210 public:
211 explicit BlockConstructor(const Comparator* cmp)
212 : Constructor(cmp),
213 comparator_(cmp),
214 block_(nullptr) { }
215 ~BlockConstructor() {
216 delete block_;
217 }
218 virtual Status FinishImpl(
219 const Options& /*options*/, const ImmutableCFOptions& /*ioptions*/,
220 const MutableCFOptions& /*moptions*/,
221 const BlockBasedTableOptions& table_options,
222 const InternalKeyComparator& /*internal_comparator*/,
223 const stl_wrappers::KVMap& kv_map) override {
224 delete block_;
225 block_ = nullptr;
226 BlockBuilder builder(table_options.block_restart_interval);
227
228 for (const auto kv : kv_map) {
229 builder.Add(kv.first, kv.second);
230 }
231 // Open the block
232 data_ = builder.Finish().ToString();
233 BlockContents contents;
234 contents.data = data_;
235 contents.cachable = false;
236 block_ = new Block(std::move(contents), kDisableGlobalSequenceNumber);
237 return Status::OK();
238 }
239 virtual InternalIterator* NewIterator(
240 const SliceTransform* /*prefix_extractor*/) const override {
241 return block_->NewIterator<DataBlockIter>(comparator_, comparator_);
242 }
243
244 private:
245 const Comparator* comparator_;
246 std::string data_;
247 Block* block_;
248
249 BlockConstructor();
250 };
251
252 // A helper class that converts internal format keys into user keys
253 class KeyConvertingIterator : public InternalIterator {
254 public:
255 explicit KeyConvertingIterator(InternalIterator* iter,
256 bool arena_mode = false)
257 : iter_(iter), arena_mode_(arena_mode) {}
258 virtual ~KeyConvertingIterator() {
259 if (arena_mode_) {
260 iter_->~InternalIterator();
261 } else {
262 delete iter_;
263 }
264 }
265 virtual bool Valid() const override { return iter_->Valid() && status_.ok(); }
266 virtual void Seek(const Slice& target) override {
267 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
268 std::string encoded;
269 AppendInternalKey(&encoded, ikey);
270 iter_->Seek(encoded);
271 }
272 virtual void SeekForPrev(const Slice& target) override {
273 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
274 std::string encoded;
275 AppendInternalKey(&encoded, ikey);
276 iter_->SeekForPrev(encoded);
277 }
278 virtual void SeekToFirst() override { iter_->SeekToFirst(); }
279 virtual void SeekToLast() override { iter_->SeekToLast(); }
280 virtual void Next() override { iter_->Next(); }
281 virtual void Prev() override { iter_->Prev(); }
282
283 virtual Slice key() const override {
284 assert(Valid());
285 ParsedInternalKey parsed_key;
286 if (!ParseInternalKey(iter_->key(), &parsed_key)) {
287 status_ = Status::Corruption("malformed internal key");
288 return Slice("corrupted key");
289 }
290 return parsed_key.user_key;
291 }
292
293 virtual Slice value() const override { return iter_->value(); }
294 virtual Status status() const override {
295 return status_.ok() ? iter_->status() : status_;
296 }
297
298 private:
299 mutable Status status_;
300 InternalIterator* iter_;
301 bool arena_mode_;
302
303 // No copying allowed
304 KeyConvertingIterator(const KeyConvertingIterator&);
305 void operator=(const KeyConvertingIterator&);
306 };
307
308 class TableConstructor: public Constructor {
309 public:
310 explicit TableConstructor(const Comparator* cmp,
311 bool convert_to_internal_key = false,
312 int level = -1)
313 : Constructor(cmp),
314 convert_to_internal_key_(convert_to_internal_key),
315 level_(level) {}
316 ~TableConstructor() { Reset(); }
317
318 virtual Status FinishImpl(const Options& options,
319 const ImmutableCFOptions& ioptions,
320 const MutableCFOptions& moptions,
321 const BlockBasedTableOptions& /*table_options*/,
322 const InternalKeyComparator& internal_comparator,
323 const stl_wrappers::KVMap& kv_map) override {
324 Reset();
325 soptions.use_mmap_reads = ioptions.allow_mmap_reads;
326 file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(),
327 "" /* don't care */));
328 unique_ptr<TableBuilder> builder;
329 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
330 int_tbl_prop_collector_factories;
331 std::string column_family_name;
332 builder.reset(ioptions.table_factory->NewTableBuilder(
333 TableBuilderOptions(
334 ioptions, moptions, internal_comparator,
335 &int_tbl_prop_collector_factories, options.compression,
336 CompressionOptions(), nullptr /* compression_dict */,
337 false /* skip_filters */, column_family_name, level_),
338 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
339 file_writer_.get()));
340
341 for (const auto kv : kv_map) {
342 if (convert_to_internal_key_) {
343 ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
344 std::string encoded;
345 AppendInternalKey(&encoded, ikey);
346 builder->Add(encoded, kv.second);
347 } else {
348 builder->Add(kv.first, kv.second);
349 }
350 EXPECT_TRUE(builder->status().ok());
351 }
352 Status s = builder->Finish();
353 file_writer_->Flush();
354 EXPECT_TRUE(s.ok()) << s.ToString();
355
356 EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
357
358 // Open the table
359 uniq_id_ = cur_uniq_id_++;
360 file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
361 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
362 const bool kSkipFilters = true;
363 const bool kImmortal = true;
364 return ioptions.table_factory->NewTableReader(
365 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
366 internal_comparator, !kSkipFilters, !kImmortal,
367 level_),
368 std::move(file_reader_), TEST_GetSink()->contents().size(),
369 &table_reader_);
370 }
371
372 virtual InternalIterator* NewIterator(
373 const SliceTransform* prefix_extractor) const override {
374 ReadOptions ro;
375 InternalIterator* iter = table_reader_->NewIterator(ro, prefix_extractor);
376 if (convert_to_internal_key_) {
377 return new KeyConvertingIterator(iter);
378 } else {
379 return iter;
380 }
381 }
382
383 uint64_t ApproximateOffsetOf(const Slice& key) const {
384 if (convert_to_internal_key_) {
385 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
386 const Slice skey = ikey.Encode();
387 return table_reader_->ApproximateOffsetOf(skey);
388 }
389 return table_reader_->ApproximateOffsetOf(key);
390 }
391
392 virtual Status Reopen(const ImmutableCFOptions& ioptions,
393 const MutableCFOptions& moptions) {
394 file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
395 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
396 return ioptions.table_factory->NewTableReader(
397 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
398 *last_internal_key_),
399 std::move(file_reader_), TEST_GetSink()->contents().size(),
400 &table_reader_);
401 }
402
403 virtual TableReader* GetTableReader() { return table_reader_.get(); }
404
405 virtual bool AnywayDeleteIterator() const override {
406 return convert_to_internal_key_;
407 }
408
409 void ResetTableReader() { table_reader_.reset(); }
410
411 bool ConvertToInternalKey() { return convert_to_internal_key_; }
412
413 test::StringSink* TEST_GetSink() {
414 return static_cast<test::StringSink*>(file_writer_->writable_file());
415 }
416
417 private:
418 void Reset() {
419 uniq_id_ = 0;
420 table_reader_.reset();
421 file_writer_.reset();
422 file_reader_.reset();
423 }
424
425 uint64_t uniq_id_;
426 unique_ptr<WritableFileWriter> file_writer_;
427 unique_ptr<RandomAccessFileReader> file_reader_;
428 unique_ptr<TableReader> table_reader_;
429 bool convert_to_internal_key_;
430 int level_;
431
432 TableConstructor();
433
434 static uint64_t cur_uniq_id_;
435 EnvOptions soptions;
436 };
437 uint64_t TableConstructor::cur_uniq_id_ = 1;
438
439 class MemTableConstructor: public Constructor {
440 public:
441 explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
442 : Constructor(cmp),
443 internal_comparator_(cmp),
444 write_buffer_manager_(wb),
445 table_factory_(new SkipListFactory) {
446 options_.memtable_factory = table_factory_;
447 ImmutableCFOptions ioptions(options_);
448 memtable_ =
449 new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
450 wb, kMaxSequenceNumber, 0 /* column_family_id */);
451 memtable_->Ref();
452 }
453 ~MemTableConstructor() {
454 delete memtable_->Unref();
455 }
456 virtual Status FinishImpl(
457 const Options&, const ImmutableCFOptions& ioptions,
458 const MutableCFOptions& /*moptions*/,
459 const BlockBasedTableOptions& /*table_options*/,
460 const InternalKeyComparator& /*internal_comparator*/,
461 const stl_wrappers::KVMap& kv_map) override {
462 delete memtable_->Unref();
463 ImmutableCFOptions mem_ioptions(ioptions);
464 memtable_ = new MemTable(internal_comparator_, mem_ioptions,
465 MutableCFOptions(options_), write_buffer_manager_,
466 kMaxSequenceNumber, 0 /* column_family_id */);
467 memtable_->Ref();
468 int seq = 1;
469 for (const auto kv : kv_map) {
470 memtable_->Add(seq, kTypeValue, kv.first, kv.second);
471 seq++;
472 }
473 return Status::OK();
474 }
475 virtual InternalIterator* NewIterator(
476 const SliceTransform* /*prefix_extractor*/) const override {
477 return new KeyConvertingIterator(
478 memtable_->NewIterator(ReadOptions(), &arena_), true);
479 }
480
481 virtual bool AnywayDeleteIterator() const override { return true; }
482
483 virtual bool IsArenaMode() const override { return true; }
484
485 private:
486 mutable Arena arena_;
487 InternalKeyComparator internal_comparator_;
488 Options options_;
489 WriteBufferManager* write_buffer_manager_;
490 MemTable* memtable_;
491 std::shared_ptr<SkipListFactory> table_factory_;
492 };
493
494 class InternalIteratorFromIterator : public InternalIterator {
495 public:
496 explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
497 virtual bool Valid() const override { return it_->Valid(); }
498 virtual void Seek(const Slice& target) override { it_->Seek(target); }
499 virtual void SeekForPrev(const Slice& target) override {
500 it_->SeekForPrev(target);
501 }
502 virtual void SeekToFirst() override { it_->SeekToFirst(); }
503 virtual void SeekToLast() override { it_->SeekToLast(); }
504 virtual void Next() override { it_->Next(); }
505 virtual void Prev() override { it_->Prev(); }
506 Slice key() const override { return it_->key(); }
507 Slice value() const override { return it_->value(); }
508 virtual Status status() const override { return it_->status(); }
509
510 private:
511 unique_ptr<Iterator> it_;
512 };
513
514 class DBConstructor: public Constructor {
515 public:
516 explicit DBConstructor(const Comparator* cmp)
517 : Constructor(cmp),
518 comparator_(cmp) {
519 db_ = nullptr;
520 NewDB();
521 }
522 ~DBConstructor() {
523 delete db_;
524 }
525 virtual Status FinishImpl(
526 const Options& /*options*/, const ImmutableCFOptions& /*ioptions*/,
527 const MutableCFOptions& /*moptions*/,
528 const BlockBasedTableOptions& /*table_options*/,
529 const InternalKeyComparator& /*internal_comparator*/,
530 const stl_wrappers::KVMap& kv_map) override {
531 delete db_;
532 db_ = nullptr;
533 NewDB();
534 for (const auto kv : kv_map) {
535 WriteBatch batch;
536 batch.Put(kv.first, kv.second);
537 EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
538 }
539 return Status::OK();
540 }
541
542 virtual InternalIterator* NewIterator(
543 const SliceTransform* /*prefix_extractor*/) const override {
544 return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
545 }
546
547 virtual DB* db() const override { return db_; }
548
549 private:
550 void NewDB() {
551 std::string name = test::PerThreadDBPath("table_testdb");
552
553 Options options;
554 options.comparator = comparator_;
555 Status status = DestroyDB(name, options);
556 ASSERT_TRUE(status.ok()) << status.ToString();
557
558 options.create_if_missing = true;
559 options.error_if_exists = true;
560 options.write_buffer_size = 10000; // Something small to force merging
561 status = DB::Open(options, name, &db_);
562 ASSERT_TRUE(status.ok()) << status.ToString();
563 }
564
565 const Comparator* comparator_;
566 DB* db_;
567 };
568
569 enum TestType {
570 BLOCK_BASED_TABLE_TEST,
571 #ifndef ROCKSDB_LITE
572 PLAIN_TABLE_SEMI_FIXED_PREFIX,
573 PLAIN_TABLE_FULL_STR_PREFIX,
574 PLAIN_TABLE_TOTAL_ORDER,
575 #endif // !ROCKSDB_LITE
576 BLOCK_TEST,
577 MEMTABLE_TEST,
578 DB_TEST
579 };
580
581 struct TestArgs {
582 TestType type;
583 bool reverse_compare;
584 int restart_interval;
585 CompressionType compression;
586 uint32_t format_version;
587 bool use_mmap;
588 };
589
590 static std::vector<TestArgs> GenerateArgList() {
591 std::vector<TestArgs> test_args;
592 std::vector<TestType> test_types = {
593 BLOCK_BASED_TABLE_TEST,
594 #ifndef ROCKSDB_LITE
595 PLAIN_TABLE_SEMI_FIXED_PREFIX,
596 PLAIN_TABLE_FULL_STR_PREFIX,
597 PLAIN_TABLE_TOTAL_ORDER,
598 #endif // !ROCKSDB_LITE
599 BLOCK_TEST,
600 MEMTABLE_TEST, DB_TEST};
601 std::vector<bool> reverse_compare_types = {false, true};
602 std::vector<int> restart_intervals = {16, 1, 1024};
603
604 // Only add compression if it is supported
605 std::vector<std::pair<CompressionType, bool>> compression_types;
606 compression_types.emplace_back(kNoCompression, false);
607 if (Snappy_Supported()) {
608 compression_types.emplace_back(kSnappyCompression, false);
609 }
610 if (Zlib_Supported()) {
611 compression_types.emplace_back(kZlibCompression, false);
612 compression_types.emplace_back(kZlibCompression, true);
613 }
614 if (BZip2_Supported()) {
615 compression_types.emplace_back(kBZip2Compression, false);
616 compression_types.emplace_back(kBZip2Compression, true);
617 }
618 if (LZ4_Supported()) {
619 compression_types.emplace_back(kLZ4Compression, false);
620 compression_types.emplace_back(kLZ4Compression, true);
621 compression_types.emplace_back(kLZ4HCCompression, false);
622 compression_types.emplace_back(kLZ4HCCompression, true);
623 }
624 if (XPRESS_Supported()) {
625 compression_types.emplace_back(kXpressCompression, false);
626 compression_types.emplace_back(kXpressCompression, true);
627 }
628 if (ZSTD_Supported()) {
629 compression_types.emplace_back(kZSTD, false);
630 compression_types.emplace_back(kZSTD, true);
631 }
632
633 for (auto test_type : test_types) {
634 for (auto reverse_compare : reverse_compare_types) {
635 #ifndef ROCKSDB_LITE
636 if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
637 test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
638 test_type == PLAIN_TABLE_TOTAL_ORDER) {
639 // Plain table doesn't use restart index or compression.
640 TestArgs one_arg;
641 one_arg.type = test_type;
642 one_arg.reverse_compare = reverse_compare;
643 one_arg.restart_interval = restart_intervals[0];
644 one_arg.compression = compression_types[0].first;
645 one_arg.use_mmap = true;
646 test_args.push_back(one_arg);
647 one_arg.use_mmap = false;
648 test_args.push_back(one_arg);
649 continue;
650 }
651 #endif // !ROCKSDB_LITE
652
653 for (auto restart_interval : restart_intervals) {
654 for (auto compression_type : compression_types) {
655 TestArgs one_arg;
656 one_arg.type = test_type;
657 one_arg.reverse_compare = reverse_compare;
658 one_arg.restart_interval = restart_interval;
659 one_arg.compression = compression_type.first;
660 one_arg.format_version = compression_type.second ? 2 : 1;
661 one_arg.use_mmap = false;
662 test_args.push_back(one_arg);
663 }
664 }
665 }
666 }
667 return test_args;
668 }
669
670 // In order to make all tests run for plain table format, including
671 // those operating on empty keys, create a new prefix transformer which
672 // return fixed prefix if the slice is not shorter than the prefix length,
673 // and the full slice if it is shorter.
674 class FixedOrLessPrefixTransform : public SliceTransform {
675 private:
676 const size_t prefix_len_;
677
678 public:
679 explicit FixedOrLessPrefixTransform(size_t prefix_len) :
680 prefix_len_(prefix_len) {
681 }
682
683 virtual const char* Name() const override { return "rocksdb.FixedPrefix"; }
684
685 virtual Slice Transform(const Slice& src) const override {
686 assert(InDomain(src));
687 if (src.size() < prefix_len_) {
688 return src;
689 }
690 return Slice(src.data(), prefix_len_);
691 }
692
693 virtual bool InDomain(const Slice& /*src*/) const override { return true; }
694
695 virtual bool InRange(const Slice& dst) const override {
696 return (dst.size() <= prefix_len_);
697 }
698 virtual bool FullLengthEnabled(size_t* /*len*/) const override {
699 return false;
700 }
701 };
702
703 class HarnessTest : public testing::Test {
704 public:
705 HarnessTest()
706 : ioptions_(options_),
707 moptions_(options_),
708 constructor_(nullptr),
709 write_buffer_(options_.db_write_buffer_size) {}
710
711 void Init(const TestArgs& args) {
712 delete constructor_;
713 constructor_ = nullptr;
714 options_ = Options();
715 options_.compression = args.compression;
716 // Use shorter block size for tests to exercise block boundary
717 // conditions more.
718 if (args.reverse_compare) {
719 options_.comparator = &reverse_key_comparator;
720 }
721
722 internal_comparator_.reset(
723 new test::PlainInternalKeyComparator(options_.comparator));
724
725 support_prev_ = true;
726 only_support_prefix_seek_ = false;
727 options_.allow_mmap_reads = args.use_mmap;
728 switch (args.type) {
729 case BLOCK_BASED_TABLE_TEST:
730 table_options_.flush_block_policy_factory.reset(
731 new FlushBlockBySizePolicyFactory());
732 table_options_.block_size = 256;
733 table_options_.block_restart_interval = args.restart_interval;
734 table_options_.index_block_restart_interval = args.restart_interval;
735 table_options_.format_version = args.format_version;
736 options_.table_factory.reset(
737 new BlockBasedTableFactory(table_options_));
738 constructor_ = new TableConstructor(
739 options_.comparator, true /* convert_to_internal_key_ */);
740 internal_comparator_.reset(
741 new InternalKeyComparator(options_.comparator));
742 break;
743 // Plain table is not supported in ROCKSDB_LITE
744 #ifndef ROCKSDB_LITE
745 case PLAIN_TABLE_SEMI_FIXED_PREFIX:
746 support_prev_ = false;
747 only_support_prefix_seek_ = true;
748 options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
749 options_.table_factory.reset(NewPlainTableFactory());
750 constructor_ = new TableConstructor(
751 options_.comparator, true /* convert_to_internal_key_ */);
752 internal_comparator_.reset(
753 new InternalKeyComparator(options_.comparator));
754 break;
755 case PLAIN_TABLE_FULL_STR_PREFIX:
756 support_prev_ = false;
757 only_support_prefix_seek_ = true;
758 options_.prefix_extractor.reset(NewNoopTransform());
759 options_.table_factory.reset(NewPlainTableFactory());
760 constructor_ = new TableConstructor(
761 options_.comparator, true /* convert_to_internal_key_ */);
762 internal_comparator_.reset(
763 new InternalKeyComparator(options_.comparator));
764 break;
765 case PLAIN_TABLE_TOTAL_ORDER:
766 support_prev_ = false;
767 only_support_prefix_seek_ = false;
768 options_.prefix_extractor = nullptr;
769
770 {
771 PlainTableOptions plain_table_options;
772 plain_table_options.user_key_len = kPlainTableVariableLength;
773 plain_table_options.bloom_bits_per_key = 0;
774 plain_table_options.hash_table_ratio = 0;
775
776 options_.table_factory.reset(
777 NewPlainTableFactory(plain_table_options));
778 }
779 constructor_ = new TableConstructor(
780 options_.comparator, true /* convert_to_internal_key_ */);
781 internal_comparator_.reset(
782 new InternalKeyComparator(options_.comparator));
783 break;
784 #endif // !ROCKSDB_LITE
785 case BLOCK_TEST:
786 table_options_.block_size = 256;
787 options_.table_factory.reset(
788 new BlockBasedTableFactory(table_options_));
789 constructor_ = new BlockConstructor(options_.comparator);
790 break;
791 case MEMTABLE_TEST:
792 table_options_.block_size = 256;
793 options_.table_factory.reset(
794 new BlockBasedTableFactory(table_options_));
795 constructor_ = new MemTableConstructor(options_.comparator,
796 &write_buffer_);
797 break;
798 case DB_TEST:
799 table_options_.block_size = 256;
800 options_.table_factory.reset(
801 new BlockBasedTableFactory(table_options_));
802 constructor_ = new DBConstructor(options_.comparator);
803 break;
804 }
805 ioptions_ = ImmutableCFOptions(options_);
806 moptions_ = MutableCFOptions(options_);
807 }
808
809 ~HarnessTest() { delete constructor_; }
810
811 void Add(const std::string& key, const std::string& value) {
812 constructor_->Add(key, value);
813 }
814
815 void Test(Random* rnd) {
816 std::vector<std::string> keys;
817 stl_wrappers::KVMap data;
818 constructor_->Finish(options_, ioptions_, moptions_, table_options_,
819 *internal_comparator_, &keys, &data);
820
821 TestForwardScan(keys, data);
822 if (support_prev_) {
823 TestBackwardScan(keys, data);
824 }
825 TestRandomAccess(rnd, keys, data);
826 }
827
828 void TestForwardScan(const std::vector<std::string>& /*keys*/,
829 const stl_wrappers::KVMap& data) {
830 InternalIterator* iter = constructor_->NewIterator();
831 ASSERT_TRUE(!iter->Valid());
832 iter->SeekToFirst();
833 for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
834 model_iter != data.end(); ++model_iter) {
835 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
836 iter->Next();
837 }
838 ASSERT_TRUE(!iter->Valid());
839 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
840 iter->~InternalIterator();
841 } else {
842 delete iter;
843 }
844 }
845
846 void TestBackwardScan(const std::vector<std::string>& /*keys*/,
847 const stl_wrappers::KVMap& data) {
848 InternalIterator* iter = constructor_->NewIterator();
849 ASSERT_TRUE(!iter->Valid());
850 iter->SeekToLast();
851 for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
852 model_iter != data.rend(); ++model_iter) {
853 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
854 iter->Prev();
855 }
856 ASSERT_TRUE(!iter->Valid());
857 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
858 iter->~InternalIterator();
859 } else {
860 delete iter;
861 }
862 }
863
864 void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
865 const stl_wrappers::KVMap& data) {
866 static const bool kVerbose = false;
867 InternalIterator* iter = constructor_->NewIterator();
868 ASSERT_TRUE(!iter->Valid());
869 stl_wrappers::KVMap::const_iterator model_iter = data.begin();
870 if (kVerbose) fprintf(stderr, "---\n");
871 for (int i = 0; i < 200; i++) {
872 const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
873 switch (toss) {
874 case 0: {
875 if (iter->Valid()) {
876 if (kVerbose) fprintf(stderr, "Next\n");
877 iter->Next();
878 ++model_iter;
879 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
880 }
881 break;
882 }
883
884 case 1: {
885 if (kVerbose) fprintf(stderr, "SeekToFirst\n");
886 iter->SeekToFirst();
887 model_iter = data.begin();
888 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
889 break;
890 }
891
892 case 2: {
893 std::string key = PickRandomKey(rnd, keys);
894 model_iter = data.lower_bound(key);
895 if (kVerbose) fprintf(stderr, "Seek '%s'\n",
896 EscapeString(key).c_str());
897 iter->Seek(Slice(key));
898 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
899 break;
900 }
901
902 case 3: {
903 if (iter->Valid()) {
904 if (kVerbose) fprintf(stderr, "Prev\n");
905 iter->Prev();
906 if (model_iter == data.begin()) {
907 model_iter = data.end(); // Wrap around to invalid value
908 } else {
909 --model_iter;
910 }
911 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
912 }
913 break;
914 }
915
916 case 4: {
917 if (kVerbose) fprintf(stderr, "SeekToLast\n");
918 iter->SeekToLast();
919 if (keys.empty()) {
920 model_iter = data.end();
921 } else {
922 std::string last = data.rbegin()->first;
923 model_iter = data.lower_bound(last);
924 }
925 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
926 break;
927 }
928 }
929 }
930 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
931 iter->~InternalIterator();
932 } else {
933 delete iter;
934 }
935 }
936
937 std::string ToString(const stl_wrappers::KVMap& data,
938 const stl_wrappers::KVMap::const_iterator& it) {
939 if (it == data.end()) {
940 return "END";
941 } else {
942 return "'" + it->first + "->" + it->second + "'";
943 }
944 }
945
946 std::string ToString(const stl_wrappers::KVMap& data,
947 const stl_wrappers::KVMap::const_reverse_iterator& it) {
948 if (it == data.rend()) {
949 return "END";
950 } else {
951 return "'" + it->first + "->" + it->second + "'";
952 }
953 }
954
955 std::string ToString(const InternalIterator* it) {
956 if (!it->Valid()) {
957 return "END";
958 } else {
959 return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
960 }
961 }
962
963 std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
964 if (keys.empty()) {
965 return "foo";
966 } else {
967 const int index = rnd->Uniform(static_cast<int>(keys.size()));
968 std::string result = keys[index];
969 switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
970 case 0:
971 // Return an existing key
972 break;
973 case 1: {
974 // Attempt to return something smaller than an existing key
975 if (result.size() > 0 && result[result.size() - 1] > '\0'
976 && (!only_support_prefix_seek_
977 || options_.prefix_extractor->Transform(result).size()
978 < result.size())) {
979 result[result.size() - 1]--;
980 }
981 break;
982 }
983 case 2: {
984 // Return something larger than an existing key
985 Increment(options_.comparator, &result);
986 break;
987 }
988 }
989 return result;
990 }
991 }
992
993 // Returns nullptr if not running against a DB
994 DB* db() const { return constructor_->db(); }
995
996 void RandomizedHarnessTest(size_t part, size_t total) {
997 std::vector<TestArgs> args = GenerateArgList();
998 assert(part);
999 assert(part <= total);
1000 for (size_t i = 0; i < args.size(); i++) {
1001 if ((i % total) + 1 != part) {
1002 continue;
1003 }
1004 Init(args[i]);
1005 Random rnd(test::RandomSeed() + 5);
1006 for (int num_entries = 0; num_entries < 2000;
1007 num_entries += (num_entries < 50 ? 1 : 200)) {
1008 for (int e = 0; e < num_entries; e++) {
1009 std::string v;
1010 Add(test::RandomKey(&rnd, rnd.Skewed(4)),
1011 test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
1012 }
1013 Test(&rnd);
1014 }
1015 }
1016 }
1017
1018 private:
1019 Options options_ = Options();
1020 ImmutableCFOptions ioptions_;
1021 MutableCFOptions moptions_;
1022 BlockBasedTableOptions table_options_ = BlockBasedTableOptions();
1023 Constructor* constructor_;
1024 WriteBufferManager write_buffer_;
1025 bool support_prev_;
1026 bool only_support_prefix_seek_;
1027 shared_ptr<InternalKeyComparator> internal_comparator_;
1028 };
1029
1030 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1031 bool result = (val >= low) && (val <= high);
1032 if (!result) {
1033 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1034 (unsigned long long)(val),
1035 (unsigned long long)(low),
1036 (unsigned long long)(high));
1037 }
1038 return result;
1039 }
1040
1041 // Tests against all kinds of tables
1042 class TableTest : public testing::Test {
1043 public:
1044 const InternalKeyComparator& GetPlainInternalComparator(
1045 const Comparator* comp) {
1046 if (!plain_internal_comparator) {
1047 plain_internal_comparator.reset(
1048 new test::PlainInternalKeyComparator(comp));
1049 }
1050 return *plain_internal_comparator;
1051 }
1052 void IndexTest(BlockBasedTableOptions table_options);
1053
1054 private:
1055 std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
1056 };
1057
1058 class GeneralTableTest : public TableTest {};
1059 class BlockBasedTableTest
1060 : public TableTest,
1061 virtual public ::testing::WithParamInterface<uint32_t> {
1062 public:
1063 BlockBasedTableTest() : format_(GetParam()) {}
1064
1065 BlockBasedTableOptions GetBlockBasedTableOptions() {
1066 BlockBasedTableOptions options;
1067 options.format_version = format_;
1068 return options;
1069 }
1070
1071 protected:
1072 uint64_t IndexUncompressedHelper(bool indexCompress);
1073
1074 private:
1075 uint32_t format_;
1076 };
1077 class PlainTableTest : public TableTest {};
1078 class TablePropertyTest : public testing::Test {};
1079 class BBTTailPrefetchTest : public TableTest {};
1080
1081 INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
1082 testing::Values(test::kDefaultFormatVersion));
1083 INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
1084 testing::Values(test::kLatestFormatVersion));
1085
1086 // This test serves as the living tutorial for the prefix scan of user collected
1087 // properties.
1088 TEST_F(TablePropertyTest, PrefixScanTest) {
1089 UserCollectedProperties props{{"num.111.1", "1"},
1090 {"num.111.2", "2"},
1091 {"num.111.3", "3"},
1092 {"num.333.1", "1"},
1093 {"num.333.2", "2"},
1094 {"num.333.3", "3"},
1095 {"num.555.1", "1"},
1096 {"num.555.2", "2"},
1097 {"num.555.3", "3"}, };
1098
1099 // prefixes that exist
1100 for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
1101 int num = 0;
1102 for (auto pos = props.lower_bound(prefix);
1103 pos != props.end() &&
1104 pos->first.compare(0, prefix.size(), prefix) == 0;
1105 ++pos) {
1106 ++num;
1107 auto key = prefix + "." + ToString(num);
1108 ASSERT_EQ(key, pos->first);
1109 ASSERT_EQ(ToString(num), pos->second);
1110 }
1111 ASSERT_EQ(3, num);
1112 }
1113
1114 // prefixes that don't exist
1115 for (const std::string& prefix :
1116 {"num.000", "num.222", "num.444", "num.666"}) {
1117 auto pos = props.lower_bound(prefix);
1118 ASSERT_TRUE(pos == props.end() ||
1119 pos->first.compare(0, prefix.size(), prefix) != 0);
1120 }
1121 }
1122
1123 // This test include all the basic checks except those for index size and block
1124 // size, which will be conducted in separated unit tests.
1125 TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1126 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1127
1128 c.Add("a1", "val1");
1129 c.Add("b2", "val2");
1130 c.Add("c3", "val3");
1131 c.Add("d4", "val4");
1132 c.Add("e5", "val5");
1133 c.Add("f6", "val6");
1134 c.Add("g7", "val7");
1135 c.Add("h8", "val8");
1136 c.Add("j9", "val9");
1137 uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally
1138
1139 std::vector<std::string> keys;
1140 stl_wrappers::KVMap kvmap;
1141 Options options;
1142 options.compression = kNoCompression;
1143 options.statistics = CreateDBStatistics();
1144 options.statistics->stats_level_ = StatsLevel::kAll;
1145 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1146 table_options.block_restart_interval = 1;
1147 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1148
1149 ImmutableCFOptions ioptions(options);
1150 MutableCFOptions moptions(options);
1151 ioptions.statistics = options.statistics.get();
1152 c.Finish(options, ioptions, moptions, table_options,
1153 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1154 ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
1155
1156 auto& props = *c.GetTableReader()->GetTableProperties();
1157 ASSERT_EQ(kvmap.size(), props.num_entries);
1158
1159 auto raw_key_size = kvmap.size() * 2ul;
1160 auto raw_value_size = kvmap.size() * 4ul;
1161
1162 ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
1163 ASSERT_EQ(raw_value_size, props.raw_value_size);
1164 ASSERT_EQ(1ul, props.num_data_blocks);
1165 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1166
1167 // Verify data size.
1168 BlockBuilder block_builder(1);
1169 for (const auto& item : kvmap) {
1170 block_builder.Add(item.first, item.second);
1171 }
1172 Slice content = block_builder.Finish();
1173 ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
1174 props.data_size);
1175 c.ResetTableReader();
1176 }
1177
1178 #ifdef SNAPPY
1179 uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
1180 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1181 constexpr size_t kNumKeys = 10000;
1182
1183 for (size_t k = 0; k < kNumKeys; ++k) {
1184 c.Add("key" + ToString(k), "val" + ToString(k));
1185 }
1186
1187 std::vector<std::string> keys;
1188 stl_wrappers::KVMap kvmap;
1189 Options options;
1190 options.compression = kSnappyCompression;
1191 options.statistics = CreateDBStatistics();
1192 options.statistics->stats_level_ = StatsLevel::kAll;
1193 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1194 table_options.block_restart_interval = 1;
1195 table_options.enable_index_compression = compressed;
1196 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1197
1198 ImmutableCFOptions ioptions(options);
1199 MutableCFOptions moptions(options);
1200 ioptions.statistics = options.statistics.get();
1201 c.Finish(options, ioptions, moptions, table_options,
1202 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1203 c.ResetTableReader();
1204 return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
1205 }
1206 TEST_P(BlockBasedTableTest, IndexUncompressed) {
1207 uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
1208 uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
1209 // tbl1_compressed_cnt should include 1 index block
1210 EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
1211 }
1212 #endif // SNAPPY
1213
1214 TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1215 TableConstructor c(&reverse_key_comparator);
1216 std::vector<std::string> keys;
1217 stl_wrappers::KVMap kvmap;
1218
1219 {
1220 Options options;
1221 options.compression = CompressionType::kNoCompression;
1222 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1223 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1224
1225 const ImmutableCFOptions ioptions(options);
1226 const MutableCFOptions moptions(options);
1227 c.Finish(options, ioptions, moptions, table_options,
1228 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1229
1230 auto& props = *c.GetTableReader()->GetTableProperties();
1231
1232 // Default comparator
1233 ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
1234 // No merge operator
1235 ASSERT_EQ("nullptr", props.merge_operator_name);
1236 // No prefix extractor
1237 ASSERT_EQ("nullptr", props.prefix_extractor_name);
1238 // No property collectors
1239 ASSERT_EQ("[]", props.property_collectors_names);
1240 // No filter policy is used
1241 ASSERT_EQ("", props.filter_policy_name);
1242 // Compression type == that set:
1243 ASSERT_EQ("NoCompression", props.compression_name);
1244 c.ResetTableReader();
1245 }
1246
1247 {
1248 Options options;
1249 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1250 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1251 options.comparator = &reverse_key_comparator;
1252 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1253 options.prefix_extractor.reset(NewNoopTransform());
1254 options.table_properties_collector_factories.emplace_back(
1255 new DummyPropertiesCollectorFactory1());
1256 options.table_properties_collector_factories.emplace_back(
1257 new DummyPropertiesCollectorFactory2());
1258
1259 const ImmutableCFOptions ioptions(options);
1260 const MutableCFOptions moptions(options);
1261 c.Finish(options, ioptions, moptions, table_options,
1262 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1263
1264 auto& props = *c.GetTableReader()->GetTableProperties();
1265
1266 ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
1267 ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
1268 ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1269 ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]",
1270 props.property_collectors_names);
1271 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1272 c.ResetTableReader();
1273 }
1274 }
1275
1276 TEST_P(BlockBasedTableTest, RangeDelBlock) {
1277 TableConstructor c(BytewiseComparator());
1278 std::vector<std::string> keys = {"1pika", "2chu"};
1279 std::vector<std::string> vals = {"p", "c"};
1280
1281 for (int i = 0; i < 2; i++) {
1282 RangeTombstone t(keys[i], vals[i], i);
1283 std::pair<InternalKey, Slice> p = t.Serialize();
1284 c.Add(p.first.Encode().ToString(), p.second);
1285 }
1286
1287 std::vector<std::string> sorted_keys;
1288 stl_wrappers::KVMap kvmap;
1289 Options options;
1290 options.compression = kNoCompression;
1291 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1292 table_options.block_restart_interval = 1;
1293 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1294
1295 const ImmutableCFOptions ioptions(options);
1296 const MutableCFOptions moptions(options);
1297 std::unique_ptr<InternalKeyComparator> internal_cmp(
1298 new InternalKeyComparator(options.comparator));
1299 c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
1300 &sorted_keys, &kvmap);
1301
1302 for (int j = 0; j < 2; ++j) {
1303 std::unique_ptr<InternalIterator> iter(
1304 c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
1305 if (j > 0) {
1306 // For second iteration, delete the table reader object and verify the
1307 // iterator can still access its metablock's range tombstones.
1308 c.ResetTableReader();
1309 }
1310 ASSERT_FALSE(iter->Valid());
1311 iter->SeekToFirst();
1312 ASSERT_TRUE(iter->Valid());
1313 for (int i = 0; i < 2; i++) {
1314 ASSERT_TRUE(iter->Valid());
1315 ParsedInternalKey parsed_key;
1316 ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key));
1317 RangeTombstone t(parsed_key, iter->value());
1318 ASSERT_EQ(t.start_key_, keys[i]);
1319 ASSERT_EQ(t.end_key_, vals[i]);
1320 ASSERT_EQ(t.seq_, i);
1321 iter->Next();
1322 }
1323 ASSERT_TRUE(!iter->Valid());
1324 }
1325 }
1326
1327 TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1328 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1329 c.Add("a1", "val1");
1330 std::vector<std::string> keys;
1331 stl_wrappers::KVMap kvmap;
1332 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1333 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1334 Options options;
1335 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1336
1337 const ImmutableCFOptions ioptions(options);
1338 const MutableCFOptions moptions(options);
1339 c.Finish(options, ioptions, moptions, table_options,
1340 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1341 auto& props = *c.GetTableReader()->GetTableProperties();
1342 ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1343 c.ResetTableReader();
1344 }
1345
1346 //
1347 // BlockBasedTableTest::PrefetchTest
1348 //
1349 void AssertKeysInCache(BlockBasedTable* table_reader,
1350 const std::vector<std::string>& keys_in_cache,
1351 const std::vector<std::string>& keys_not_in_cache,
1352 bool convert = false) {
1353 if (convert) {
1354 for (auto key : keys_in_cache) {
1355 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1356 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1357 }
1358 for (auto key : keys_not_in_cache) {
1359 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1360 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1361 }
1362 } else {
1363 for (auto key : keys_in_cache) {
1364 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
1365 }
1366 for (auto key : keys_not_in_cache) {
1367 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
1368 }
1369 }
1370 }
1371
1372 void PrefetchRange(TableConstructor* c, Options* opt,
1373 BlockBasedTableOptions* table_options, const char* key_begin,
1374 const char* key_end,
1375 const std::vector<std::string>& keys_in_cache,
1376 const std::vector<std::string>& keys_not_in_cache,
1377 const Status expected_status = Status::OK()) {
1378 // reset the cache and reopen the table
1379 table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1380 opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
1381 const ImmutableCFOptions ioptions2(*opt);
1382 const MutableCFOptions moptions(*opt);
1383 ASSERT_OK(c->Reopen(ioptions2, moptions));
1384
1385 // prefetch
1386 auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1387 Status s;
1388 unique_ptr<Slice> begin, end;
1389 unique_ptr<InternalKey> i_begin, i_end;
1390 if (key_begin != nullptr) {
1391 if (c->ConvertToInternalKey()) {
1392 i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
1393 begin.reset(new Slice(i_begin->Encode()));
1394 } else {
1395 begin.reset(new Slice(key_begin));
1396 }
1397 }
1398 if (key_end != nullptr) {
1399 if (c->ConvertToInternalKey()) {
1400 i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
1401 end.reset(new Slice(i_end->Encode()));
1402 } else {
1403 end.reset(new Slice(key_end));
1404 }
1405 }
1406 s = table_reader->Prefetch(begin.get(), end.get());
1407
1408 ASSERT_TRUE(s.code() == expected_status.code());
1409
1410 // assert our expectation in cache warmup
1411 AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
1412 c->ConvertToInternalKey());
1413 c->ResetTableReader();
1414 }
1415
1416 TEST_P(BlockBasedTableTest, PrefetchTest) {
1417 // The purpose of this test is to test the prefetching operation built into
1418 // BlockBasedTable.
1419 Options opt;
1420 unique_ptr<InternalKeyComparator> ikc;
1421 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
1422 opt.compression = kNoCompression;
1423 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1424 table_options.block_size = 1024;
1425 // big enough so we don't ever lose cached values.
1426 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1427 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
1428
1429 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1430 c.Add("k01", "hello");
1431 c.Add("k02", "hello2");
1432 c.Add("k03", std::string(10000, 'x'));
1433 c.Add("k04", std::string(200000, 'x'));
1434 c.Add("k05", std::string(300000, 'x'));
1435 c.Add("k06", "hello3");
1436 c.Add("k07", std::string(100000, 'x'));
1437 std::vector<std::string> keys;
1438 stl_wrappers::KVMap kvmap;
1439 const ImmutableCFOptions ioptions(opt);
1440 const MutableCFOptions moptions(opt);
1441 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1442 c.ResetTableReader();
1443
1444 // We get the following data spread :
1445 //
1446 // Data block Index
1447 // ========================
1448 // [ k01 k02 k03 ] k03
1449 // [ k04 ] k04
1450 // [ k05 ] k05
1451 // [ k06 k07 ] k07
1452
1453
1454 // Simple
1455 PrefetchRange(&c, &opt, &table_options,
1456 /*key_range=*/"k01", "k05",
1457 /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
1458 /*keys_not_in_cache=*/{"k06", "k07"});
1459 PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1460 {"k04", "k05", "k06", "k07"});
1461 // odd
1462 PrefetchRange(&c, &opt, &table_options, "a", "z",
1463 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1464 PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1465 {"k04", "k05", "k06", "k07"});
1466 // Edge cases
1467 PrefetchRange(&c, &opt, &table_options, "k00", "k06",
1468 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1469 PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
1470 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1471 // null keys
1472 PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
1473 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1474 PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
1475 {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
1476 PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
1477 {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1478 // invalid
1479 PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1480 Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1481 c.ResetTableReader();
1482 }
1483
1484 TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
1485 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1486 for (int i = 0; i < 4; ++i) {
1487 Options options;
1488 // Make each key/value an individual block
1489 table_options.block_size = 64;
1490 switch (i) {
1491 case 0:
1492 // Binary search index
1493 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1494 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1495 break;
1496 case 1:
1497 // Hash search index
1498 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1499 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1500 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1501 break;
1502 case 2:
1503 // Hash search index with hash_index_allow_collision
1504 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1505 table_options.hash_index_allow_collision = true;
1506 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1507 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1508 break;
1509 case 3:
1510 // Hash search index with filter policy
1511 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1512 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1513 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1514 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1515 break;
1516 case 4:
1517 default:
1518 // Binary search index
1519 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1520 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1521 break;
1522 }
1523
1524 TableConstructor c(BytewiseComparator(),
1525 true /* convert_to_internal_key_ */);
1526 c.Add("aaaa1", std::string('a', 56));
1527 c.Add("bbaa1", std::string('a', 56));
1528 c.Add("cccc1", std::string('a', 56));
1529 c.Add("bbbb1", std::string('a', 56));
1530 c.Add("baaa1", std::string('a', 56));
1531 c.Add("abbb1", std::string('a', 56));
1532 c.Add("cccc2", std::string('a', 56));
1533 std::vector<std::string> keys;
1534 stl_wrappers::KVMap kvmap;
1535 const ImmutableCFOptions ioptions(options);
1536 const MutableCFOptions moptions(options);
1537 c.Finish(options, ioptions, moptions, table_options,
1538 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1539 auto props = c.GetTableReader()->GetTableProperties();
1540 ASSERT_EQ(7u, props->num_data_blocks);
1541 auto* reader = c.GetTableReader();
1542 ReadOptions ro;
1543 ro.total_order_seek = true;
1544 std::unique_ptr<InternalIterator> iter(
1545 reader->NewIterator(ro, moptions.prefix_extractor.get()));
1546
1547 iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
1548 ASSERT_OK(iter->status());
1549 ASSERT_TRUE(iter->Valid());
1550 ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
1551 iter->Next();
1552 ASSERT_OK(iter->status());
1553 ASSERT_TRUE(iter->Valid());
1554 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1555
1556 iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
1557 ASSERT_OK(iter->status());
1558 ASSERT_TRUE(iter->Valid());
1559 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1560 iter->Next();
1561 ASSERT_OK(iter->status());
1562 ASSERT_TRUE(iter->Valid());
1563 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1564
1565 iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
1566 ASSERT_OK(iter->status());
1567 ASSERT_TRUE(iter->Valid());
1568 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1569 iter->Next();
1570 ASSERT_OK(iter->status());
1571 ASSERT_TRUE(iter->Valid());
1572 ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
1573 }
1574 }
1575
1576 TEST_P(BlockBasedTableTest, NoopTransformSeek) {
1577 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1578 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1579
1580 Options options;
1581 options.comparator = BytewiseComparator();
1582 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1583 options.prefix_extractor.reset(NewNoopTransform());
1584
1585 TableConstructor c(options.comparator);
1586 // To tickle the PrefixMayMatch bug it is important that the
1587 // user-key is a single byte so that the index key exactly matches
1588 // the user-key.
1589 InternalKey key("a", 1, kTypeValue);
1590 c.Add(key.Encode().ToString(), "b");
1591 std::vector<std::string> keys;
1592 stl_wrappers::KVMap kvmap;
1593 const ImmutableCFOptions ioptions(options);
1594 const MutableCFOptions moptions(options);
1595 const InternalKeyComparator internal_comparator(options.comparator);
1596 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1597 &keys, &kvmap);
1598
1599 auto* reader = c.GetTableReader();
1600 for (int i = 0; i < 2; ++i) {
1601 ReadOptions ro;
1602 ro.total_order_seek = (i == 0);
1603 std::unique_ptr<InternalIterator> iter(
1604 reader->NewIterator(ro, moptions.prefix_extractor.get()));
1605
1606 iter->Seek(key.Encode());
1607 ASSERT_OK(iter->status());
1608 ASSERT_TRUE(iter->Valid());
1609 ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
1610 }
1611 }
1612
1613 TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
1614 // if DB is opened with a prefix extractor of a different name,
1615 // prefix bloom is skipped when read the file
1616 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1617 table_options.filter_policy.reset(NewBloomFilterPolicy(2));
1618 table_options.whole_key_filtering = false;
1619
1620 Options options;
1621 options.comparator = BytewiseComparator();
1622 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1623 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1624
1625 TableConstructor c(options.comparator);
1626 InternalKey key("abcdefghijk", 1, kTypeValue);
1627 c.Add(key.Encode().ToString(), "test");
1628 std::vector<std::string> keys;
1629 stl_wrappers::KVMap kvmap;
1630 const ImmutableCFOptions ioptions(options);
1631 const MutableCFOptions moptions(options);
1632 const InternalKeyComparator internal_comparator(options.comparator);
1633 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1634 &keys, &kvmap);
1635 // TODO(Zhongyi): update test to use MutableCFOptions
1636 options.prefix_extractor.reset(NewFixedPrefixTransform(9));
1637 const ImmutableCFOptions new_ioptions(options);
1638 const MutableCFOptions new_moptions(options);
1639 c.Reopen(new_ioptions, new_moptions);
1640 auto reader = c.GetTableReader();
1641 std::unique_ptr<InternalIterator> db_iter(
1642 reader->NewIterator(ReadOptions(), new_moptions.prefix_extractor.get()));
1643
1644 // Test point lookup
1645 // only one kv
1646 for (auto& kv : kvmap) {
1647 db_iter->Seek(kv.first);
1648 ASSERT_TRUE(db_iter->Valid());
1649 ASSERT_OK(db_iter->status());
1650 ASSERT_EQ(db_iter->key(), kv.first);
1651 ASSERT_EQ(db_iter->value(), kv.second);
1652 }
1653 }
1654
1655 static std::string RandomString(Random* rnd, int len) {
1656 std::string r;
1657 test::RandomString(rnd, len, &r);
1658 return r;
1659 }
1660
1661 void AddInternalKey(TableConstructor* c, const std::string& prefix,
1662 int /*suffix_len*/ = 800) {
1663 static Random rnd(1023);
1664 InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
1665 c->Add(k.Encode().ToString(), "v");
1666 }
1667
1668 void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1669 TableConstructor c(BytewiseComparator());
1670
1671 // keys with prefix length 3, make sure the key/value is big enough to fill
1672 // one block
1673 AddInternalKey(&c, "0015");
1674 AddInternalKey(&c, "0035");
1675
1676 AddInternalKey(&c, "0054");
1677 AddInternalKey(&c, "0055");
1678
1679 AddInternalKey(&c, "0056");
1680 AddInternalKey(&c, "0057");
1681
1682 AddInternalKey(&c, "0058");
1683 AddInternalKey(&c, "0075");
1684
1685 AddInternalKey(&c, "0076");
1686 AddInternalKey(&c, "0095");
1687
1688 std::vector<std::string> keys;
1689 stl_wrappers::KVMap kvmap;
1690 Options options;
1691 options.prefix_extractor.reset(NewFixedPrefixTransform(3));
1692 table_options.block_size = 1700;
1693 table_options.block_cache = NewLRUCache(1024, 4);
1694 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1695
1696 std::unique_ptr<InternalKeyComparator> comparator(
1697 new InternalKeyComparator(BytewiseComparator()));
1698 const ImmutableCFOptions ioptions(options);
1699 const MutableCFOptions moptions(options);
1700 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
1701 &kvmap);
1702 auto reader = c.GetTableReader();
1703
1704 auto props = reader->GetTableProperties();
1705 ASSERT_EQ(5u, props->num_data_blocks);
1706
1707 // TODO(Zhongyi): update test to use MutableCFOptions
1708 std::unique_ptr<InternalIterator> index_iter(
1709 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
1710
1711 // -- Find keys do not exist, but have common prefix.
1712 std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1713 std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
1714 keys[7], keys[9], };
1715
1716 // find the lower bound of the prefix
1717 for (size_t i = 0; i < prefixes.size(); ++i) {
1718 index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
1719 ASSERT_OK(index_iter->status());
1720 ASSERT_TRUE(index_iter->Valid());
1721
1722 // seek the first element in the block
1723 ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
1724 ASSERT_EQ("v", index_iter->value().ToString());
1725 }
1726
1727 // find the upper bound of prefixes
1728 std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
1729
1730 // find existing keys
1731 for (const auto& item : kvmap) {
1732 auto ukey = ExtractUserKey(item.first).ToString();
1733 index_iter->Seek(ukey);
1734
1735 // ASSERT_OK(regular_iter->status());
1736 ASSERT_OK(index_iter->status());
1737
1738 // ASSERT_TRUE(regular_iter->Valid());
1739 ASSERT_TRUE(index_iter->Valid());
1740
1741 ASSERT_EQ(item.first, index_iter->key().ToString());
1742 ASSERT_EQ(item.second, index_iter->value().ToString());
1743 }
1744
1745 for (size_t i = 0; i < prefixes.size(); ++i) {
1746 // the key is greater than any existing keys.
1747 auto key = prefixes[i] + "9";
1748 index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
1749
1750 ASSERT_OK(index_iter->status());
1751 if (i == prefixes.size() - 1) {
1752 // last key
1753 ASSERT_TRUE(!index_iter->Valid());
1754 } else {
1755 ASSERT_TRUE(index_iter->Valid());
1756 // seek the first element in the block
1757 ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
1758 ASSERT_EQ("v", index_iter->value().ToString());
1759 }
1760 }
1761
1762 // find keys with prefix that don't match any of the existing prefixes.
1763 std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
1764 for (const auto& prefix : non_exist_prefixes) {
1765 index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
1766 // regular_iter->Seek(prefix);
1767
1768 ASSERT_OK(index_iter->status());
1769 // Seek to non-existing prefixes should yield either invalid, or a
1770 // key with prefix greater than the target.
1771 if (index_iter->Valid()) {
1772 Slice ukey = ExtractUserKey(index_iter->key());
1773 Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
1774 ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
1775 }
1776 }
1777 c.ResetTableReader();
1778 }
1779
1780 TEST_P(BlockBasedTableTest, BinaryIndexTest) {
1781 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1782 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1783 IndexTest(table_options);
1784 }
1785
1786 TEST_P(BlockBasedTableTest, HashIndexTest) {
1787 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1788 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1789 IndexTest(table_options);
1790 }
1791
1792 TEST_P(BlockBasedTableTest, PartitionIndexTest) {
1793 const int max_index_keys = 5;
1794 const int est_max_index_key_value_size = 32;
1795 const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
1796 for (int i = 1; i <= est_max_index_size + 1; i++) {
1797 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1798 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1799 table_options.metadata_block_size = i;
1800 IndexTest(table_options);
1801 }
1802 }
1803
1804 // It's very hard to figure out the index block size of a block accurately.
1805 // To make sure we get the index size, we just make sure as key number
1806 // grows, the filter block size also grows.
1807 TEST_P(BlockBasedTableTest, IndexSizeStat) {
1808 uint64_t last_index_size = 0;
1809
1810 // we need to use random keys since the pure human readable texts
1811 // may be well compressed, resulting insignifcant change of index
1812 // block size.
1813 Random rnd(test::RandomSeed());
1814 std::vector<std::string> keys;
1815
1816 for (int i = 0; i < 100; ++i) {
1817 keys.push_back(RandomString(&rnd, 10000));
1818 }
1819
1820 // Each time we load one more key to the table. the table index block
1821 // size is expected to be larger than last time's.
1822 for (size_t i = 1; i < keys.size(); ++i) {
1823 TableConstructor c(BytewiseComparator(),
1824 true /* convert_to_internal_key_ */);
1825 for (size_t j = 0; j < i; ++j) {
1826 c.Add(keys[j], "val");
1827 }
1828
1829 std::vector<std::string> ks;
1830 stl_wrappers::KVMap kvmap;
1831 Options options;
1832 options.compression = kNoCompression;
1833 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1834 table_options.block_restart_interval = 1;
1835 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1836
1837 const ImmutableCFOptions ioptions(options);
1838 const MutableCFOptions moptions(options);
1839 c.Finish(options, ioptions, moptions, table_options,
1840 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
1841 auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
1842 ASSERT_GT(index_size, last_index_size);
1843 last_index_size = index_size;
1844 c.ResetTableReader();
1845 }
1846 }
1847
1848 TEST_P(BlockBasedTableTest, NumBlockStat) {
1849 Random rnd(test::RandomSeed());
1850 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1851 Options options;
1852 options.compression = kNoCompression;
1853 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1854 table_options.block_restart_interval = 1;
1855 table_options.block_size = 1000;
1856 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1857
1858 for (int i = 0; i < 10; ++i) {
1859 // the key/val are slightly smaller than block size, so that each block
1860 // holds roughly one key/value pair.
1861 c.Add(RandomString(&rnd, 900), "val");
1862 }
1863
1864 std::vector<std::string> ks;
1865 stl_wrappers::KVMap kvmap;
1866 const ImmutableCFOptions ioptions(options);
1867 const MutableCFOptions moptions(options);
1868 c.Finish(options, ioptions, moptions, table_options,
1869 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
1870 ASSERT_EQ(kvmap.size(),
1871 c.GetTableReader()->GetTableProperties()->num_data_blocks);
1872 c.ResetTableReader();
1873 }
1874
1875 // A simple tool that takes the snapshot of block cache statistics.
1876 class BlockCachePropertiesSnapshot {
1877 public:
1878 explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
1879 block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
1880 block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
1881 index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
1882 index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
1883 data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
1884 data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
1885 filter_block_cache_miss =
1886 statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
1887 filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
1888 block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
1889 block_cache_bytes_write =
1890 statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
1891 }
1892
1893 void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
1894 int64_t expected_index_block_cache_hit) {
1895 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
1896 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
1897 }
1898
1899 void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
1900 int64_t expected_filter_block_cache_hit) {
1901 ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
1902 ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
1903 }
1904
1905 // Check if the fetched props matches the expected ones.
1906 // TODO(kailiu) Use this only when you disabled filter policy!
1907 void AssertEqual(int64_t expected_index_block_cache_miss,
1908 int64_t expected_index_block_cache_hit,
1909 int64_t expected_data_block_cache_miss,
1910 int64_t expected_data_block_cache_hit) const {
1911 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
1912 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
1913 ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
1914 ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
1915 ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
1916 block_cache_miss);
1917 ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
1918 block_cache_hit);
1919 }
1920
1921 int64_t GetCacheBytesRead() { return block_cache_bytes_read; }
1922
1923 int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }
1924
1925 private:
1926 int64_t block_cache_miss = 0;
1927 int64_t block_cache_hit = 0;
1928 int64_t index_block_cache_miss = 0;
1929 int64_t index_block_cache_hit = 0;
1930 int64_t data_block_cache_miss = 0;
1931 int64_t data_block_cache_hit = 0;
1932 int64_t filter_block_cache_miss = 0;
1933 int64_t filter_block_cache_hit = 0;
1934 int64_t block_cache_bytes_read = 0;
1935 int64_t block_cache_bytes_write = 0;
1936 };
1937
1938 // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
1939 // use block cache to store them).
1940 TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
1941 Options options;
1942 options.create_if_missing = true;
1943 options.statistics = CreateDBStatistics();
1944 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1945 table_options.block_cache = NewLRUCache(1024, 4);
1946 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1947 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1948 std::vector<std::string> keys;
1949 stl_wrappers::KVMap kvmap;
1950
1951 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1952 c.Add("key", "value");
1953 const ImmutableCFOptions ioptions(options);
1954 const MutableCFOptions moptions(options);
1955 c.Finish(options, ioptions, moptions, table_options,
1956 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1957
1958 // preloading filter/index blocks is enabled.
1959 auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
1960 ASSERT_TRUE(reader->TEST_filter_block_preloaded());
1961 ASSERT_TRUE(reader->TEST_index_reader_preloaded());
1962
1963 {
1964 // nothing happens in the beginning
1965 BlockCachePropertiesSnapshot props(options.statistics.get());
1966 props.AssertIndexBlockStat(0, 0);
1967 props.AssertFilterBlockStat(0, 0);
1968 }
1969
1970 {
1971 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
1972 GetContext::kNotFound, Slice(), nullptr, nullptr,
1973 nullptr, nullptr, nullptr);
1974 // a hack that just to trigger BlockBasedTable::GetFilter.
1975 reader->Get(ReadOptions(), "non-exist-key", &get_context,
1976 moptions.prefix_extractor.get());
1977 BlockCachePropertiesSnapshot props(options.statistics.get());
1978 props.AssertIndexBlockStat(0, 0);
1979 props.AssertFilterBlockStat(0, 0);
1980 }
1981 }
1982
1983 // Due to the difficulities of the intersaction between statistics, this test
1984 // only tests the case when "index block is put to block cache"
1985 TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
1986 // -- Table construction
1987 Options options;
1988 options.create_if_missing = true;
1989 options.statistics = CreateDBStatistics();
1990
1991 // Enable the cache for index/filter blocks
1992 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1993 table_options.block_cache = NewLRUCache(2048, 2);
1994 table_options.cache_index_and_filter_blocks = true;
1995 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1996 std::vector<std::string> keys;
1997 stl_wrappers::KVMap kvmap;
1998
1999 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2000 c.Add("key", "value");
2001 const ImmutableCFOptions ioptions(options);
2002 const MutableCFOptions moptions(options);
2003 c.Finish(options, ioptions, moptions, table_options,
2004 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2005 // preloading filter/index blocks is prohibited.
2006 auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2007 ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
2008 ASSERT_TRUE(!reader->TEST_index_reader_preloaded());
2009
2010 // -- PART 1: Open with regular block cache.
2011 // Since block_cache is disabled, no cache activities will be involved.
2012 unique_ptr<InternalIterator> iter;
2013
2014 int64_t last_cache_bytes_read = 0;
2015 // At first, no block will be accessed.
2016 {
2017 BlockCachePropertiesSnapshot props(options.statistics.get());
2018 // index will be added to block cache.
2019 props.AssertEqual(1, // index block miss
2020 0, 0, 0);
2021 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2022 ASSERT_EQ(props.GetCacheBytesWrite(),
2023 table_options.block_cache->GetUsage());
2024 last_cache_bytes_read = props.GetCacheBytesRead();
2025 }
2026
2027 // Only index block will be accessed
2028 {
2029 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2030 BlockCachePropertiesSnapshot props(options.statistics.get());
2031 // NOTE: to help better highlight the "detla" of each ticker, I use
2032 // <last_value> + <added_value> to indicate the increment of changed
2033 // value; other numbers remain the same.
2034 props.AssertEqual(1, 0 + 1, // index block hit
2035 0, 0);
2036 // Cache hit, bytes read from cache should increase
2037 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2038 ASSERT_EQ(props.GetCacheBytesWrite(),
2039 table_options.block_cache->GetUsage());
2040 last_cache_bytes_read = props.GetCacheBytesRead();
2041 }
2042
2043 // Only data block will be accessed
2044 {
2045 iter->SeekToFirst();
2046 BlockCachePropertiesSnapshot props(options.statistics.get());
2047 props.AssertEqual(1, 1, 0 + 1, // data block miss
2048 0);
2049 // Cache miss, Bytes read from cache should not change
2050 ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
2051 ASSERT_EQ(props.GetCacheBytesWrite(),
2052 table_options.block_cache->GetUsage());
2053 last_cache_bytes_read = props.GetCacheBytesRead();
2054 }
2055
2056 // Data block will be in cache
2057 {
2058 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2059 iter->SeekToFirst();
2060 BlockCachePropertiesSnapshot props(options.statistics.get());
2061 props.AssertEqual(1, 1 + 1, /* index block hit */
2062 1, 0 + 1 /* data block hit */);
2063 // Cache hit, bytes read from cache should increase
2064 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2065 ASSERT_EQ(props.GetCacheBytesWrite(),
2066 table_options.block_cache->GetUsage());
2067 }
2068 // release the iterator so that the block cache can reset correctly.
2069 iter.reset();
2070
2071 c.ResetTableReader();
2072
2073 // -- PART 2: Open with very small block cache
2074 // In this test, no block will ever get hit since the block cache is
2075 // too small to fit even one entry.
2076 table_options.block_cache = NewLRUCache(1, 4);
2077 options.statistics = CreateDBStatistics();
2078 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2079 const ImmutableCFOptions ioptions2(options);
2080 const MutableCFOptions moptions2(options);
2081 c.Reopen(ioptions2, moptions2);
2082 {
2083 BlockCachePropertiesSnapshot props(options.statistics.get());
2084 props.AssertEqual(1, // index block miss
2085 0, 0, 0);
2086 // Cache miss, Bytes read from cache should not change
2087 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2088 }
2089
2090 {
2091 // Both index and data block get accessed.
2092 // It first cache index block then data block. But since the cache size
2093 // is only 1, index block will be purged after data block is inserted.
2094 iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2095 BlockCachePropertiesSnapshot props(options.statistics.get());
2096 props.AssertEqual(1 + 1, // index block miss
2097 0, 0, // data block miss
2098 0);
2099 // Cache hit, bytes read from cache should increase
2100 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2101 }
2102
2103 {
2104 // SeekToFirst() accesses data block. With similar reason, we expect data
2105 // block's cache miss.
2106 iter->SeekToFirst();
2107 BlockCachePropertiesSnapshot props(options.statistics.get());
2108 props.AssertEqual(2, 0, 0 + 1, // data block miss
2109 0);
2110 // Cache miss, Bytes read from cache should not change
2111 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2112 }
2113 iter.reset();
2114 c.ResetTableReader();
2115
2116 // -- PART 3: Open table with bloom filter enabled but not in SST file
2117 table_options.block_cache = NewLRUCache(4096, 4);
2118 table_options.cache_index_and_filter_blocks = false;
2119 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2120
2121 TableConstructor c3(BytewiseComparator());
2122 std::string user_key = "k01";
2123 InternalKey internal_key(user_key, 0, kTypeValue);
2124 c3.Add(internal_key.Encode().ToString(), "hello");
2125 ImmutableCFOptions ioptions3(options);
2126 MutableCFOptions moptions3(options);
2127 // Generate table without filter policy
2128 c3.Finish(options, ioptions3, moptions3, table_options,
2129 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2130 c3.ResetTableReader();
2131
2132 // Open table with filter policy
2133 table_options.filter_policy.reset(NewBloomFilterPolicy(1));
2134 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2135 options.statistics = CreateDBStatistics();
2136 ImmutableCFOptions ioptions4(options);
2137 MutableCFOptions moptions4(options);
2138 ASSERT_OK(c3.Reopen(ioptions4, moptions4));
2139 reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
2140 ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
2141 PinnableSlice value;
2142 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2143 GetContext::kNotFound, user_key, &value, nullptr,
2144 nullptr, nullptr, nullptr);
2145 ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
2146 moptions4.prefix_extractor.get()));
2147 ASSERT_STREQ(value.data(), "hello");
2148 BlockCachePropertiesSnapshot props(options.statistics.get());
2149 props.AssertFilterBlockStat(0, 0);
2150 c3.ResetTableReader();
2151 }
2152
2153 void ValidateBlockSizeDeviation(int value, int expected) {
2154 BlockBasedTableOptions table_options;
2155 table_options.block_size_deviation = value;
2156 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2157
2158 const BlockBasedTableOptions* normalized_table_options =
2159 (const BlockBasedTableOptions*)factory->GetOptions();
2160 ASSERT_EQ(normalized_table_options->block_size_deviation, expected);
2161
2162 delete factory;
2163 }
2164
2165 void ValidateBlockRestartInterval(int value, int expected) {
2166 BlockBasedTableOptions table_options;
2167 table_options.block_restart_interval = value;
2168 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2169
2170 const BlockBasedTableOptions* normalized_table_options =
2171 (const BlockBasedTableOptions*)factory->GetOptions();
2172 ASSERT_EQ(normalized_table_options->block_restart_interval, expected);
2173
2174 delete factory;
2175 }
2176
2177 TEST_P(BlockBasedTableTest, InvalidOptions) {
2178 // invalid values for block_size_deviation (<0 or >100) are silently set to 0
2179 ValidateBlockSizeDeviation(-10, 0);
2180 ValidateBlockSizeDeviation(-1, 0);
2181 ValidateBlockSizeDeviation(0, 0);
2182 ValidateBlockSizeDeviation(1, 1);
2183 ValidateBlockSizeDeviation(99, 99);
2184 ValidateBlockSizeDeviation(100, 100);
2185 ValidateBlockSizeDeviation(101, 0);
2186 ValidateBlockSizeDeviation(1000, 0);
2187
2188 // invalid values for block_restart_interval (<1) are silently set to 1
2189 ValidateBlockRestartInterval(-10, 1);
2190 ValidateBlockRestartInterval(-1, 1);
2191 ValidateBlockRestartInterval(0, 1);
2192 ValidateBlockRestartInterval(1, 1);
2193 ValidateBlockRestartInterval(2, 2);
2194 ValidateBlockRestartInterval(1000, 1000);
2195 }
2196
2197 TEST_P(BlockBasedTableTest, BlockReadCountTest) {
2198 // bloom_filter_type = 0 -- block-based filter
2199 // bloom_filter_type = 0 -- full filter
2200 for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
2201 for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
2202 ++index_and_filter_in_cache) {
2203 Options options;
2204 options.create_if_missing = true;
2205
2206 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2207 table_options.block_cache = NewLRUCache(1, 0);
2208 table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
2209 table_options.filter_policy.reset(
2210 NewBloomFilterPolicy(10, bloom_filter_type == 0));
2211 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2212 std::vector<std::string> keys;
2213 stl_wrappers::KVMap kvmap;
2214
2215 TableConstructor c(BytewiseComparator());
2216 std::string user_key = "k04";
2217 InternalKey internal_key(user_key, 0, kTypeValue);
2218 std::string encoded_key = internal_key.Encode().ToString();
2219 c.Add(encoded_key, "hello");
2220 ImmutableCFOptions ioptions(options);
2221 MutableCFOptions moptions(options);
2222 // Generate table with filter policy
2223 c.Finish(options, ioptions, moptions, table_options,
2224 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2225 auto reader = c.GetTableReader();
2226 PinnableSlice value;
2227 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2228 GetContext::kNotFound, user_key, &value, nullptr,
2229 nullptr, nullptr, nullptr);
2230 get_perf_context()->Reset();
2231 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
2232 moptions.prefix_extractor.get()));
2233 if (index_and_filter_in_cache) {
2234 // data, index and filter block
2235 ASSERT_EQ(get_perf_context()->block_read_count, 3);
2236 } else {
2237 // just the data block
2238 ASSERT_EQ(get_perf_context()->block_read_count, 1);
2239 }
2240 ASSERT_EQ(get_context.State(), GetContext::kFound);
2241 ASSERT_STREQ(value.data(), "hello");
2242
2243 // Get non-existing key
2244 user_key = "does-not-exist";
2245 internal_key = InternalKey(user_key, 0, kTypeValue);
2246 encoded_key = internal_key.Encode().ToString();
2247
2248 value.Reset();
2249 get_context = GetContext(options.comparator, nullptr, nullptr, nullptr,
2250 GetContext::kNotFound, user_key, &value, nullptr,
2251 nullptr, nullptr, nullptr);
2252 get_perf_context()->Reset();
2253 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
2254 moptions.prefix_extractor.get()));
2255 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
2256
2257 if (index_and_filter_in_cache) {
2258 if (bloom_filter_type == 0) {
2259 // with block-based, we read index and then the filter
2260 ASSERT_EQ(get_perf_context()->block_read_count, 2);
2261 } else {
2262 // with full-filter, we read filter first and then we stop
2263 ASSERT_EQ(get_perf_context()->block_read_count, 1);
2264 }
2265 } else {
2266 // filter is already in memory and it figures out that the key doesn't
2267 // exist
2268 ASSERT_EQ(get_perf_context()->block_read_count, 0);
2269 }
2270 }
2271 }
2272 }
2273
2274 // A wrapper around LRICache that also keeps track of data blocks (in contrast
2275 // with the objects) in the cache. The class is very simple and can be used only
2276 // for trivial tests.
2277 class MockCache : public LRUCache {
2278 public:
2279 MockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
2280 double high_pri_pool_ratio)
2281 : LRUCache(capacity, num_shard_bits, strict_capacity_limit,
2282 high_pri_pool_ratio) {}
2283 virtual Status Insert(const Slice& key, void* value, size_t charge,
2284 void (*deleter)(const Slice& key, void* value),
2285 Handle** handle = nullptr,
2286 Priority priority = Priority::LOW) override {
2287 // Replace the deleter with our own so that we keep track of data blocks
2288 // erased from the cache
2289 deleters_[key.ToString()] = deleter;
2290 return ShardedCache::Insert(key, value, charge, &MockDeleter, handle,
2291 priority);
2292 }
2293 // This is called by the application right after inserting a data block
2294 virtual void TEST_mark_as_data_block(const Slice& key,
2295 size_t charge) override {
2296 marked_data_in_cache_[key.ToString()] = charge;
2297 marked_size_ += charge;
2298 }
2299 using DeleterFunc = void (*)(const Slice& key, void* value);
2300 static std::map<std::string, DeleterFunc> deleters_;
2301 static std::map<std::string, size_t> marked_data_in_cache_;
2302 static size_t marked_size_;
2303 static void MockDeleter(const Slice& key, void* value) {
2304 // If the item was marked for being data block, decrease its usage from the
2305 // total data block usage of the cache
2306 if (marked_data_in_cache_.find(key.ToString()) !=
2307 marked_data_in_cache_.end()) {
2308 marked_size_ -= marked_data_in_cache_[key.ToString()];
2309 }
2310 // Then call the origianl deleter
2311 assert(deleters_.find(key.ToString()) != deleters_.end());
2312 auto deleter = deleters_[key.ToString()];
2313 deleter(key, value);
2314 }
2315 };
2316
2317 size_t MockCache::marked_size_ = 0;
2318 std::map<std::string, MockCache::DeleterFunc> MockCache::deleters_;
2319 std::map<std::string, size_t> MockCache::marked_data_in_cache_;
2320
2321 // Block cache can contain raw data blocks as well as general objects. If an
2322 // object depends on the table to be live, it then must be destructed before the
2323 // table is closed. This test makes sure that the only items remains in the
2324 // cache after the table is closed are raw data blocks.
2325 TEST_P(BlockBasedTableTest, NoObjectInCacheAfterTableClose) {
2326 for (int level: {-1, 0, 1, 10}) {
2327 for (auto index_type :
2328 {BlockBasedTableOptions::IndexType::kBinarySearch,
2329 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}) {
2330 for (bool block_based_filter : {true, false}) {
2331 for (bool partition_filter : {true, false}) {
2332 if (partition_filter &&
2333 (block_based_filter ||
2334 index_type !=
2335 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch)) {
2336 continue;
2337 }
2338 for (bool index_and_filter_in_cache : {true, false}) {
2339 for (bool pin_l0 : {true, false}) {
2340 for (bool pin_top_level : {true, false}) {
2341 if (pin_l0 && !index_and_filter_in_cache) {
2342 continue;
2343 }
2344 // Create a table
2345 Options opt;
2346 unique_ptr<InternalKeyComparator> ikc;
2347 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
2348 opt.compression = kNoCompression;
2349 BlockBasedTableOptions table_options =
2350 GetBlockBasedTableOptions();
2351 table_options.block_size = 1024;
2352 table_options.index_type =
2353 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
2354 table_options.pin_l0_filter_and_index_blocks_in_cache = pin_l0;
2355 table_options.pin_top_level_index_and_filter = pin_top_level;
2356 table_options.partition_filters = partition_filter;
2357 table_options.cache_index_and_filter_blocks =
2358 index_and_filter_in_cache;
2359 // big enough so we don't ever lose cached values.
2360 table_options.block_cache = std::shared_ptr<rocksdb::Cache>(
2361 new MockCache(16 * 1024 * 1024, 4, false, 0.0));
2362 table_options.filter_policy.reset(
2363 rocksdb::NewBloomFilterPolicy(10, block_based_filter));
2364 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2365
2366 bool convert_to_internal_key = false;
2367 TableConstructor c(BytewiseComparator(), convert_to_internal_key,
2368 level);
2369 std::string user_key = "k01";
2370 std::string key =
2371 InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2372 c.Add(key, "hello");
2373 std::vector<std::string> keys;
2374 stl_wrappers::KVMap kvmap;
2375 const ImmutableCFOptions ioptions(opt);
2376 const MutableCFOptions moptions(opt);
2377 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys,
2378 &kvmap);
2379
2380 // Doing a read to make index/filter loaded into the cache
2381 auto table_reader =
2382 dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2383 PinnableSlice value;
2384 GetContext get_context(opt.comparator, nullptr, nullptr, nullptr,
2385 GetContext::kNotFound, user_key, &value,
2386 nullptr, nullptr, nullptr, nullptr);
2387 InternalKey ikey(user_key, 0, kTypeValue);
2388 auto s = table_reader->Get(ReadOptions(), key, &get_context,
2389 moptions.prefix_extractor.get());
2390 ASSERT_EQ(get_context.State(), GetContext::kFound);
2391 ASSERT_STREQ(value.data(), "hello");
2392
2393 // Close the table
2394 c.ResetTableReader();
2395
2396 auto usage = table_options.block_cache->GetUsage();
2397 auto pinned_usage = table_options.block_cache->GetPinnedUsage();
2398 // The only usage must be for marked data blocks
2399 ASSERT_EQ(usage, MockCache::marked_size_);
2400 // There must be some pinned data since PinnableSlice has not
2401 // released them yet
2402 ASSERT_GT(pinned_usage, 0);
2403 // Release pinnable slice reousrces
2404 value.Reset();
2405 pinned_usage = table_options.block_cache->GetPinnedUsage();
2406 ASSERT_EQ(pinned_usage, 0);
2407 }
2408 }
2409 }
2410 }
2411 }
2412 }
2413 } // level
2414 }
2415
2416 TEST_P(BlockBasedTableTest, BlockCacheLeak) {
2417 // Check that when we reopen a table we don't lose access to blocks already
2418 // in the cache. This test checks whether the Table actually makes use of the
2419 // unique ID from the file.
2420
2421 Options opt;
2422 unique_ptr<InternalKeyComparator> ikc;
2423 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
2424 opt.compression = kNoCompression;
2425 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2426 table_options.block_size = 1024;
2427 // big enough so we don't ever lose cached values.
2428 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2429 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2430
2431 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2432 c.Add("k01", "hello");
2433 c.Add("k02", "hello2");
2434 c.Add("k03", std::string(10000, 'x'));
2435 c.Add("k04", std::string(200000, 'x'));
2436 c.Add("k05", std::string(300000, 'x'));
2437 c.Add("k06", "hello3");
2438 c.Add("k07", std::string(100000, 'x'));
2439 std::vector<std::string> keys;
2440 stl_wrappers::KVMap kvmap;
2441 const ImmutableCFOptions ioptions(opt);
2442 const MutableCFOptions moptions(opt);
2443 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
2444
2445 unique_ptr<InternalIterator> iter(
2446 c.NewIterator(moptions.prefix_extractor.get()));
2447 iter->SeekToFirst();
2448 while (iter->Valid()) {
2449 iter->key();
2450 iter->value();
2451 iter->Next();
2452 }
2453 ASSERT_OK(iter->status());
2454 iter.reset();
2455
2456 const ImmutableCFOptions ioptions1(opt);
2457 const MutableCFOptions moptions1(opt);
2458 ASSERT_OK(c.Reopen(ioptions1, moptions1));
2459 auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2460 for (const std::string& key : keys) {
2461 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
2462 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
2463 }
2464 c.ResetTableReader();
2465
2466 // rerun with different block cache
2467 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2468 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2469 const ImmutableCFOptions ioptions2(opt);
2470 const MutableCFOptions moptions2(opt);
2471 ASSERT_OK(c.Reopen(ioptions2, moptions2));
2472 table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2473 for (const std::string& key : keys) {
2474 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
2475 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
2476 }
2477 c.ResetTableReader();
2478 }
2479
2480 TEST_P(BlockBasedTableTest, NewIndexIteratorLeak) {
2481 // A regression test to avoid data race described in
2482 // https://github.com/facebook/rocksdb/issues/1267
2483 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2484 std::vector<std::string> keys;
2485 stl_wrappers::KVMap kvmap;
2486 c.Add("a1", "val1");
2487 Options options;
2488 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
2489 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2490 table_options.index_type = BlockBasedTableOptions::kHashSearch;
2491 table_options.cache_index_and_filter_blocks = true;
2492 table_options.block_cache = NewLRUCache(0);
2493 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2494 const ImmutableCFOptions ioptions(options);
2495 const MutableCFOptions moptions(options);
2496 c.Finish(options, ioptions, moptions, table_options,
2497 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2498
2499 rocksdb::SyncPoint::GetInstance()->LoadDependencyAndMarkers(
2500 {
2501 {"BlockBasedTable::NewIndexIterator::thread1:1",
2502 "BlockBasedTable::NewIndexIterator::thread2:2"},
2503 {"BlockBasedTable::NewIndexIterator::thread2:3",
2504 "BlockBasedTable::NewIndexIterator::thread1:4"},
2505 },
2506 {
2507 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker",
2508 "BlockBasedTable::NewIndexIterator::thread1:1"},
2509 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker",
2510 "BlockBasedTable::NewIndexIterator::thread1:4"},
2511 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker",
2512 "BlockBasedTable::NewIndexIterator::thread2:2"},
2513 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker",
2514 "BlockBasedTable::NewIndexIterator::thread2:3"},
2515 });
2516
2517 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
2518 ReadOptions ro;
2519 auto* reader = c.GetTableReader();
2520
2521 std::function<void()> func1 = [&]() {
2522 TEST_SYNC_POINT("BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker");
2523 // TODO(Zhongyi): update test to use MutableCFOptions
2524 std::unique_ptr<InternalIterator> iter(
2525 reader->NewIterator(ro, moptions.prefix_extractor.get()));
2526 iter->Seek(InternalKey("a1", 0, kTypeValue).Encode());
2527 };
2528
2529 std::function<void()> func2 = [&]() {
2530 TEST_SYNC_POINT("BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker");
2531 std::unique_ptr<InternalIterator> iter(
2532 reader->NewIterator(ro, moptions.prefix_extractor.get()));
2533 };
2534
2535 auto thread1 = port::Thread(func1);
2536 auto thread2 = port::Thread(func2);
2537 thread1.join();
2538 thread2.join();
2539 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
2540 c.ResetTableReader();
2541 }
2542
2543 // Plain table is not supported in ROCKSDB_LITE
2544 #ifndef ROCKSDB_LITE
2545 TEST_F(PlainTableTest, BasicPlainTableProperties) {
2546 PlainTableOptions plain_table_options;
2547 plain_table_options.user_key_len = 8;
2548 plain_table_options.bloom_bits_per_key = 8;
2549 plain_table_options.hash_table_ratio = 0;
2550
2551 PlainTableFactory factory(plain_table_options);
2552 test::StringSink sink;
2553 unique_ptr<WritableFileWriter> file_writer(
2554 test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */));
2555 Options options;
2556 const ImmutableCFOptions ioptions(options);
2557 const MutableCFOptions moptions(options);
2558 InternalKeyComparator ikc(options.comparator);
2559 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
2560 int_tbl_prop_collector_factories;
2561 std::string column_family_name;
2562 int unknown_level = -1;
2563 std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
2564 TableBuilderOptions(
2565 ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
2566 kNoCompression, CompressionOptions(), nullptr /* compression_dict */,
2567 false /* skip_filters */, column_family_name, unknown_level),
2568 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
2569 file_writer.get()));
2570
2571 for (char c = 'a'; c <= 'z'; ++c) {
2572 std::string key(8, c);
2573 key.append("\1 "); // PlainTable expects internal key structure
2574 std::string value(28, c + 42);
2575 builder->Add(key, value);
2576 }
2577 ASSERT_OK(builder->Finish());
2578 file_writer->Flush();
2579
2580 test::StringSink* ss =
2581 static_cast<test::StringSink*>(file_writer->writable_file());
2582 unique_ptr<RandomAccessFileReader> file_reader(
2583 test::GetRandomAccessFileReader(
2584 new test::StringSource(ss->contents(), 72242, true)));
2585
2586 TableProperties* props = nullptr;
2587 auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
2588 kPlainTableMagicNumber, ioptions,
2589 &props, true /* compression_type_missing */);
2590 std::unique_ptr<TableProperties> props_guard(props);
2591 ASSERT_OK(s);
2592
2593 ASSERT_EQ(0ul, props->index_size);
2594 ASSERT_EQ(0ul, props->filter_size);
2595 ASSERT_EQ(16ul * 26, props->raw_key_size);
2596 ASSERT_EQ(28ul * 26, props->raw_value_size);
2597 ASSERT_EQ(26ul, props->num_entries);
2598 ASSERT_EQ(1ul, props->num_data_blocks);
2599 }
2600 #endif // !ROCKSDB_LITE
2601
2602 TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
2603 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2604 c.Add("k01", "hello");
2605 c.Add("k02", "hello2");
2606 c.Add("k03", std::string(10000, 'x'));
2607 c.Add("k04", std::string(200000, 'x'));
2608 c.Add("k05", std::string(300000, 'x'));
2609 c.Add("k06", "hello3");
2610 c.Add("k07", std::string(100000, 'x'));
2611 std::vector<std::string> keys;
2612 stl_wrappers::KVMap kvmap;
2613 Options options;
2614 test::PlainInternalKeyComparator internal_comparator(options.comparator);
2615 options.compression = kNoCompression;
2616 BlockBasedTableOptions table_options;
2617 table_options.block_size = 1024;
2618 const ImmutableCFOptions ioptions(options);
2619 const MutableCFOptions moptions(options);
2620 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
2621 &keys, &kvmap);
2622
2623 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
2624 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
2625 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0));
2626 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
2627 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0));
2628 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000));
2629 // k04 and k05 will be in two consecutive blocks, the index is
2630 // an arbitrary slice between k04 and k05, either before or after k04a
2631 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
2632 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
2633 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
2634 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
2635 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
2636 c.ResetTableReader();
2637 }
2638
2639 static void DoCompressionTest(CompressionType comp) {
2640 Random rnd(301);
2641 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2642 std::string tmp;
2643 c.Add("k01", "hello");
2644 c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
2645 c.Add("k03", "hello3");
2646 c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
2647 std::vector<std::string> keys;
2648 stl_wrappers::KVMap kvmap;
2649 Options options;
2650 test::PlainInternalKeyComparator ikc(options.comparator);
2651 options.compression = comp;
2652 BlockBasedTableOptions table_options;
2653 table_options.block_size = 1024;
2654 const ImmutableCFOptions ioptions(options);
2655 const MutableCFOptions moptions(options);
2656 c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
2657
2658 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
2659 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
2660 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
2661 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000));
2662 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000));
2663 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6100));
2664 c.ResetTableReader();
2665 }
2666
2667 TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
2668 std::vector<CompressionType> compression_state;
2669 if (!Snappy_Supported()) {
2670 fprintf(stderr, "skipping snappy compression tests\n");
2671 } else {
2672 compression_state.push_back(kSnappyCompression);
2673 }
2674
2675 if (!Zlib_Supported()) {
2676 fprintf(stderr, "skipping zlib compression tests\n");
2677 } else {
2678 compression_state.push_back(kZlibCompression);
2679 }
2680
2681 // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
2682 /*
2683 if (!BZip2_Supported()) {
2684 fprintf(stderr, "skipping bzip2 compression tests\n");
2685 } else {
2686 compression_state.push_back(kBZip2Compression);
2687 }
2688 */
2689
2690 if (!LZ4_Supported()) {
2691 fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
2692 } else {
2693 compression_state.push_back(kLZ4Compression);
2694 compression_state.push_back(kLZ4HCCompression);
2695 }
2696
2697 if (!XPRESS_Supported()) {
2698 fprintf(stderr, "skipping xpress and xpress compression tests\n");
2699 }
2700 else {
2701 compression_state.push_back(kXpressCompression);
2702 }
2703
2704 for (auto state : compression_state) {
2705 DoCompressionTest(state);
2706 }
2707 }
2708
2709 // RandomizedHarnessTest is very slow for certain combination of arguments
2710 // Split into 8 pieces to reduce the time individual tests take.
2711 TEST_F(HarnessTest, Randomized1) {
2712 // part 1 out of 8
2713 const size_t part = 1;
2714 const size_t total = 8;
2715 RandomizedHarnessTest(part, total);
2716 }
2717
2718 TEST_F(HarnessTest, Randomized2) {
2719 // part 2 out of 8
2720 const size_t part = 2;
2721 const size_t total = 8;
2722 RandomizedHarnessTest(part, total);
2723 }
2724
2725 TEST_F(HarnessTest, Randomized3) {
2726 // part 3 out of 8
2727 const size_t part = 3;
2728 const size_t total = 8;
2729 RandomizedHarnessTest(part, total);
2730 }
2731
2732 TEST_F(HarnessTest, Randomized4) {
2733 // part 4 out of 8
2734 const size_t part = 4;
2735 const size_t total = 8;
2736 RandomizedHarnessTest(part, total);
2737 }
2738
2739 TEST_F(HarnessTest, Randomized5) {
2740 // part 5 out of 8
2741 const size_t part = 5;
2742 const size_t total = 8;
2743 RandomizedHarnessTest(part, total);
2744 }
2745
2746 TEST_F(HarnessTest, Randomized6) {
2747 // part 6 out of 8
2748 const size_t part = 6;
2749 const size_t total = 8;
2750 RandomizedHarnessTest(part, total);
2751 }
2752
2753 TEST_F(HarnessTest, Randomized7) {
2754 // part 7 out of 8
2755 const size_t part = 7;
2756 const size_t total = 8;
2757 RandomizedHarnessTest(part, total);
2758 }
2759
2760 TEST_F(HarnessTest, Randomized8) {
2761 // part 8 out of 8
2762 const size_t part = 8;
2763 const size_t total = 8;
2764 RandomizedHarnessTest(part, total);
2765 }
2766
2767 #ifndef ROCKSDB_LITE
2768 TEST_F(HarnessTest, RandomizedLongDB) {
2769 Random rnd(test::RandomSeed());
2770 TestArgs args = {DB_TEST, false, 16, kNoCompression, 0, false};
2771 Init(args);
2772 int num_entries = 100000;
2773 for (int e = 0; e < num_entries; e++) {
2774 std::string v;
2775 Add(test::RandomKey(&rnd, rnd.Skewed(4)),
2776 test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
2777 }
2778 Test(&rnd);
2779
2780 // We must have created enough data to force merging
2781 int files = 0;
2782 for (int level = 0; level < db()->NumberLevels(); level++) {
2783 std::string value;
2784 char name[100];
2785 snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
2786 ASSERT_TRUE(db()->GetProperty(name, &value));
2787 files += atoi(value.c_str());
2788 }
2789 ASSERT_GT(files, 0);
2790 }
2791 #endif // ROCKSDB_LITE
2792
2793 class MemTableTest : public testing::Test {};
2794
2795 TEST_F(MemTableTest, Simple) {
2796 InternalKeyComparator cmp(BytewiseComparator());
2797 auto table_factory = std::make_shared<SkipListFactory>();
2798 Options options;
2799 options.memtable_factory = table_factory;
2800 ImmutableCFOptions ioptions(options);
2801 WriteBufferManager wb(options.db_write_buffer_size);
2802 MemTable* memtable =
2803 new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
2804 kMaxSequenceNumber, 0 /* column_family_id */);
2805 memtable->Ref();
2806 WriteBatch batch;
2807 WriteBatchInternal::SetSequence(&batch, 100);
2808 batch.Put(std::string("k1"), std::string("v1"));
2809 batch.Put(std::string("k2"), std::string("v2"));
2810 batch.Put(std::string("k3"), std::string("v3"));
2811 batch.Put(std::string("largekey"), std::string("vlarge"));
2812 batch.DeleteRange(std::string("chi"), std::string("xigua"));
2813 batch.DeleteRange(std::string("begin"), std::string("end"));
2814 ColumnFamilyMemTablesDefault cf_mems_default(memtable);
2815 ASSERT_TRUE(
2816 WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr).ok());
2817
2818 for (int i = 0; i < 2; ++i) {
2819 Arena arena;
2820 ScopedArenaIterator arena_iter_guard;
2821 std::unique_ptr<InternalIterator> iter_guard;
2822 InternalIterator* iter;
2823 if (i == 0) {
2824 iter = memtable->NewIterator(ReadOptions(), &arena);
2825 arena_iter_guard.set(iter);
2826 } else {
2827 iter = memtable->NewRangeTombstoneIterator(ReadOptions());
2828 iter_guard.reset(iter);
2829 }
2830 if (iter == nullptr) {
2831 continue;
2832 }
2833 iter->SeekToFirst();
2834 while (iter->Valid()) {
2835 fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
2836 iter->value().ToString().c_str());
2837 iter->Next();
2838 }
2839 }
2840
2841 delete memtable->Unref();
2842 }
2843
2844 // Test the empty key
2845 TEST_F(HarnessTest, SimpleEmptyKey) {
2846 auto args = GenerateArgList();
2847 for (const auto& arg : args) {
2848 Init(arg);
2849 Random rnd(test::RandomSeed() + 1);
2850 Add("", "v");
2851 Test(&rnd);
2852 }
2853 }
2854
2855 TEST_F(HarnessTest, SimpleSingle) {
2856 auto args = GenerateArgList();
2857 for (const auto& arg : args) {
2858 Init(arg);
2859 Random rnd(test::RandomSeed() + 2);
2860 Add("abc", "v");
2861 Test(&rnd);
2862 }
2863 }
2864
2865 TEST_F(HarnessTest, SimpleMulti) {
2866 auto args = GenerateArgList();
2867 for (const auto& arg : args) {
2868 Init(arg);
2869 Random rnd(test::RandomSeed() + 3);
2870 Add("abc", "v");
2871 Add("abcd", "v");
2872 Add("ac", "v2");
2873 Test(&rnd);
2874 }
2875 }
2876
2877 TEST_F(HarnessTest, SimpleSpecialKey) {
2878 auto args = GenerateArgList();
2879 for (const auto& arg : args) {
2880 Init(arg);
2881 Random rnd(test::RandomSeed() + 4);
2882 Add("\xff\xff", "v3");
2883 Test(&rnd);
2884 }
2885 }
2886
2887 TEST_F(HarnessTest, FooterTests) {
2888 {
2889 // upconvert legacy block based
2890 std::string encoded;
2891 Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
2892 BlockHandle meta_index(10, 5), index(20, 15);
2893 footer.set_metaindex_handle(meta_index);
2894 footer.set_index_handle(index);
2895 footer.EncodeTo(&encoded);
2896 Footer decoded_footer;
2897 Slice encoded_slice(encoded);
2898 decoded_footer.DecodeFrom(&encoded_slice);
2899 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
2900 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
2901 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
2902 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
2903 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
2904 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
2905 ASSERT_EQ(decoded_footer.version(), 0U);
2906 }
2907 {
2908 // xxhash block based
2909 std::string encoded;
2910 Footer footer(kBlockBasedTableMagicNumber, 1);
2911 BlockHandle meta_index(10, 5), index(20, 15);
2912 footer.set_metaindex_handle(meta_index);
2913 footer.set_index_handle(index);
2914 footer.set_checksum(kxxHash);
2915 footer.EncodeTo(&encoded);
2916 Footer decoded_footer;
2917 Slice encoded_slice(encoded);
2918 decoded_footer.DecodeFrom(&encoded_slice);
2919 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
2920 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
2921 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
2922 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
2923 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
2924 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
2925 ASSERT_EQ(decoded_footer.version(), 1U);
2926 }
2927 // Plain table is not supported in ROCKSDB_LITE
2928 #ifndef ROCKSDB_LITE
2929 {
2930 // upconvert legacy plain table
2931 std::string encoded;
2932 Footer footer(kLegacyPlainTableMagicNumber, 0);
2933 BlockHandle meta_index(10, 5), index(20, 15);
2934 footer.set_metaindex_handle(meta_index);
2935 footer.set_index_handle(index);
2936 footer.EncodeTo(&encoded);
2937 Footer decoded_footer;
2938 Slice encoded_slice(encoded);
2939 decoded_footer.DecodeFrom(&encoded_slice);
2940 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
2941 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
2942 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
2943 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
2944 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
2945 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
2946 ASSERT_EQ(decoded_footer.version(), 0U);
2947 }
2948 {
2949 // xxhash block based
2950 std::string encoded;
2951 Footer footer(kPlainTableMagicNumber, 1);
2952 BlockHandle meta_index(10, 5), index(20, 15);
2953 footer.set_metaindex_handle(meta_index);
2954 footer.set_index_handle(index);
2955 footer.set_checksum(kxxHash);
2956 footer.EncodeTo(&encoded);
2957 Footer decoded_footer;
2958 Slice encoded_slice(encoded);
2959 decoded_footer.DecodeFrom(&encoded_slice);
2960 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
2961 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
2962 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
2963 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
2964 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
2965 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
2966 ASSERT_EQ(decoded_footer.version(), 1U);
2967 }
2968 #endif // !ROCKSDB_LITE
2969 {
2970 // version == 2
2971 std::string encoded;
2972 Footer footer(kBlockBasedTableMagicNumber, 2);
2973 BlockHandle meta_index(10, 5), index(20, 15);
2974 footer.set_metaindex_handle(meta_index);
2975 footer.set_index_handle(index);
2976 footer.EncodeTo(&encoded);
2977 Footer decoded_footer;
2978 Slice encoded_slice(encoded);
2979 decoded_footer.DecodeFrom(&encoded_slice);
2980 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
2981 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
2982 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
2983 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
2984 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
2985 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
2986 ASSERT_EQ(decoded_footer.version(), 2U);
2987 }
2988 }
2989
2990 class IndexBlockRestartIntervalTest
2991 : public TableTest,
2992 public ::testing::WithParamInterface<std::pair<int, bool>> {
2993 public:
2994 static std::vector<std::pair<int, bool>> GetRestartValues() {
2995 return {{-1, false}, {0, false}, {1, false}, {8, false},
2996 {16, false}, {32, false}, {-1, true}, {0, true},
2997 {1, true}, {8, true}, {16, true}, {32, true}};
2998 }
2999 };
3000
3001 INSTANTIATE_TEST_CASE_P(
3002 IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
3003 ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
3004
3005 TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
3006 const int kKeysInTable = 10000;
3007 const int kKeySize = 100;
3008 const int kValSize = 500;
3009
3010 const int index_block_restart_interval = std::get<0>(GetParam());
3011 const bool value_delta_encoding = std::get<1>(GetParam());
3012
3013 Options options;
3014 BlockBasedTableOptions table_options;
3015 table_options.block_size = 64; // small block size to get big index block
3016 table_options.index_block_restart_interval = index_block_restart_interval;
3017 if (value_delta_encoding) {
3018 table_options.format_version = 4;
3019 }
3020 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3021
3022 TableConstructor c(BytewiseComparator());
3023 static Random rnd(301);
3024 for (int i = 0; i < kKeysInTable; i++) {
3025 InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
3026 c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
3027 }
3028
3029 std::vector<std::string> keys;
3030 stl_wrappers::KVMap kvmap;
3031 std::unique_ptr<InternalKeyComparator> comparator(
3032 new InternalKeyComparator(BytewiseComparator()));
3033 const ImmutableCFOptions ioptions(options);
3034 const MutableCFOptions moptions(options);
3035 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
3036 &kvmap);
3037 auto reader = c.GetTableReader();
3038
3039 std::unique_ptr<InternalIterator> db_iter(
3040 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
3041
3042 // Test point lookup
3043 for (auto& kv : kvmap) {
3044 db_iter->Seek(kv.first);
3045
3046 ASSERT_TRUE(db_iter->Valid());
3047 ASSERT_OK(db_iter->status());
3048 ASSERT_EQ(db_iter->key(), kv.first);
3049 ASSERT_EQ(db_iter->value(), kv.second);
3050 }
3051
3052 // Test iterating
3053 auto kv_iter = kvmap.begin();
3054 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3055 ASSERT_EQ(db_iter->key(), kv_iter->first);
3056 ASSERT_EQ(db_iter->value(), kv_iter->second);
3057 kv_iter++;
3058 }
3059 ASSERT_EQ(kv_iter, kvmap.end());
3060 c.ResetTableReader();
3061 }
3062
3063 class PrefixTest : public testing::Test {
3064 public:
3065 PrefixTest() : testing::Test() {}
3066 ~PrefixTest() {}
3067 };
3068
3069 namespace {
3070 // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3071 class TestPrefixExtractor : public rocksdb::SliceTransform {
3072 public:
3073 ~TestPrefixExtractor() override{};
3074 const char* Name() const override { return "TestPrefixExtractor"; }
3075
3076 rocksdb::Slice Transform(const rocksdb::Slice& src) const override {
3077 assert(IsValid(src));
3078 return rocksdb::Slice(src.data(), 3);
3079 }
3080
3081 bool InDomain(const rocksdb::Slice& src) const override {
3082 assert(IsValid(src));
3083 return true;
3084 }
3085
3086 bool InRange(const rocksdb::Slice& /*dst*/) const override { return true; }
3087
3088 bool IsValid(const rocksdb::Slice& src) const {
3089 if (src.size() != 4) {
3090 return false;
3091 }
3092 if (src[0] != '[') {
3093 return false;
3094 }
3095 if (src[1] < '0' || src[1] > '9') {
3096 return false;
3097 }
3098 if (src[2] != ']') {
3099 return false;
3100 }
3101 if (src[3] < '0' || src[3] > '9') {
3102 return false;
3103 }
3104 return true;
3105 }
3106 };
3107 } // namespace
3108
3109 TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
3110 rocksdb::Options options;
3111 options.compaction_style = rocksdb::kCompactionStyleUniversal;
3112 options.num_levels = 20;
3113 options.create_if_missing = true;
3114 options.optimize_filters_for_hits = false;
3115 options.target_file_size_base = 268435456;
3116 options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
3117 rocksdb::BlockBasedTableOptions bbto;
3118 bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
3119 bbto.block_size = 262144;
3120 bbto.whole_key_filtering = true;
3121
3122 const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
3123 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3124 DestroyDB(kDBPath, options);
3125 rocksdb::DB* db;
3126 ASSERT_OK(rocksdb::DB::Open(options, kDBPath, &db));
3127
3128 // Create a bunch of keys with 10 filters.
3129 for (int i = 0; i < 10; i++) {
3130 std::string prefix = "[" + std::to_string(i) + "]";
3131 for (int j = 0; j < 10; j++) {
3132 std::string key = prefix + std::to_string(j);
3133 db->Put(rocksdb::WriteOptions(), key, "1");
3134 }
3135 }
3136
3137 // Trigger compaction.
3138 db->CompactRange(CompactRangeOptions(), nullptr, nullptr);
3139 delete db;
3140 // In the second round, turn whole_key_filtering off and expect
3141 // rocksdb still works.
3142 }
3143
3144 /*
3145 * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
3146 * the SST file any more. Instead, RocksDB deduces global_seqno from the
3147 * MANIFEST while reading from an SST. Therefore, it's not possible to test the
3148 * functionality of global_seqno in a single, isolated unit test without the
3149 * involvement of Version, VersionSet, etc.
3150 */
3151 TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
3152 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3153 test::StringSink* sink = new test::StringSink();
3154 unique_ptr<WritableFileWriter> file_writer(
3155 test::GetWritableFileWriter(sink, "" /* don't care */));
3156 Options options;
3157 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3158 const ImmutableCFOptions ioptions(options);
3159 const MutableCFOptions moptions(options);
3160 InternalKeyComparator ikc(options.comparator);
3161 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3162 int_tbl_prop_collector_factories;
3163 int_tbl_prop_collector_factories.emplace_back(
3164 new SstFileWriterPropertiesCollectorFactory(2 /* version */,
3165 0 /* global_seqno*/));
3166 std::string column_family_name;
3167 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3168 TableBuilderOptions(ioptions, moptions, ikc,
3169 &int_tbl_prop_collector_factories, kNoCompression,
3170 CompressionOptions(), nullptr /* compression_dict */,
3171 false /* skip_filters */, column_family_name, -1),
3172 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3173 file_writer.get()));
3174
3175 for (char c = 'a'; c <= 'z'; ++c) {
3176 std::string key(8, c);
3177 std::string value = key;
3178 InternalKey ik(key, 0, kTypeValue);
3179
3180 builder->Add(ik.Encode(), value);
3181 }
3182 ASSERT_OK(builder->Finish());
3183 file_writer->Flush();
3184
3185 test::RandomRWStringSink ss_rw(sink);
3186 uint32_t version;
3187 uint64_t global_seqno;
3188 uint64_t global_seqno_offset;
3189
3190 // Helper function to get version, global_seqno, global_seqno_offset
3191 std::function<void()> GetVersionAndGlobalSeqno = [&]() {
3192 unique_ptr<RandomAccessFileReader> file_reader(
3193 test::GetRandomAccessFileReader(
3194 new test::StringSource(ss_rw.contents(), 73342, true)));
3195
3196 TableProperties* props = nullptr;
3197 ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
3198 kBlockBasedTableMagicNumber, ioptions,
3199 &props, true /* compression_type_missing */));
3200
3201 UserCollectedProperties user_props = props->user_collected_properties;
3202 version = DecodeFixed32(
3203 user_props[ExternalSstFilePropertyNames::kVersion].c_str());
3204 global_seqno = DecodeFixed64(
3205 user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
3206 global_seqno_offset =
3207 props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];
3208
3209 delete props;
3210 };
3211
3212 // Helper function to update the value of the global seqno in the file
3213 std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
3214 std::string new_global_seqno;
3215 PutFixed64(&new_global_seqno, val);
3216
3217 ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno));
3218 };
3219
3220 // Helper function to get the contents of the table InternalIterator
3221 unique_ptr<TableReader> table_reader;
3222 std::function<InternalIterator*()> GetTableInternalIter = [&]() {
3223 unique_ptr<RandomAccessFileReader> file_reader(
3224 test::GetRandomAccessFileReader(
3225 new test::StringSource(ss_rw.contents(), 73342, true)));
3226
3227 options.table_factory->NewTableReader(
3228 TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
3229 EnvOptions(), ikc),
3230 std::move(file_reader), ss_rw.contents().size(), &table_reader);
3231
3232 return table_reader->NewIterator(ReadOptions(),
3233 moptions.prefix_extractor.get());
3234 };
3235
3236 GetVersionAndGlobalSeqno();
3237 ASSERT_EQ(2, version);
3238 ASSERT_EQ(0, global_seqno);
3239
3240 InternalIterator* iter = GetTableInternalIter();
3241 char current_c = 'a';
3242 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3243 ParsedInternalKey pik;
3244 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3245
3246 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3247 ASSERT_EQ(pik.sequence, 0);
3248 ASSERT_EQ(pik.user_key, iter->value());
3249 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3250 current_c++;
3251 }
3252 ASSERT_EQ(current_c, 'z' + 1);
3253 delete iter;
3254
3255 // Update global sequence number to 10
3256 SetGlobalSeqno(10);
3257 GetVersionAndGlobalSeqno();
3258 ASSERT_EQ(2, version);
3259 ASSERT_EQ(10, global_seqno);
3260
3261 iter = GetTableInternalIter();
3262 current_c = 'a';
3263 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3264 ParsedInternalKey pik;
3265 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3266
3267 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3268 ASSERT_EQ(pik.sequence, 10);
3269 ASSERT_EQ(pik.user_key, iter->value());
3270 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3271 current_c++;
3272 }
3273 ASSERT_EQ(current_c, 'z' + 1);
3274
3275 // Verify Seek
3276 for (char c = 'a'; c <= 'z'; c++) {
3277 std::string k = std::string(8, c);
3278 InternalKey ik(k, 10, kValueTypeForSeek);
3279 iter->Seek(ik.Encode());
3280 ASSERT_TRUE(iter->Valid());
3281
3282 ParsedInternalKey pik;
3283 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3284
3285 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3286 ASSERT_EQ(pik.sequence, 10);
3287 ASSERT_EQ(pik.user_key.ToString(), k);
3288 ASSERT_EQ(iter->value().ToString(), k);
3289 }
3290 delete iter;
3291
3292 // Update global sequence number to 3
3293 SetGlobalSeqno(3);
3294 GetVersionAndGlobalSeqno();
3295 ASSERT_EQ(2, version);
3296 ASSERT_EQ(3, global_seqno);
3297
3298 iter = GetTableInternalIter();
3299 current_c = 'a';
3300 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3301 ParsedInternalKey pik;
3302 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3303
3304 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3305 ASSERT_EQ(pik.sequence, 3);
3306 ASSERT_EQ(pik.user_key, iter->value());
3307 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3308 current_c++;
3309 }
3310 ASSERT_EQ(current_c, 'z' + 1);
3311
3312 // Verify Seek
3313 for (char c = 'a'; c <= 'z'; c++) {
3314 std::string k = std::string(8, c);
3315 // seqno=4 is less than 3 so we still should get our key
3316 InternalKey ik(k, 4, kValueTypeForSeek);
3317 iter->Seek(ik.Encode());
3318 ASSERT_TRUE(iter->Valid());
3319
3320 ParsedInternalKey pik;
3321 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3322
3323 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3324 ASSERT_EQ(pik.sequence, 3);
3325 ASSERT_EQ(pik.user_key.ToString(), k);
3326 ASSERT_EQ(iter->value().ToString(), k);
3327 }
3328
3329 delete iter;
3330 }
3331
3332 TEST_P(BlockBasedTableTest, BlockAlignTest) {
3333 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3334 bbto.block_align = true;
3335 test::StringSink* sink = new test::StringSink();
3336 unique_ptr<WritableFileWriter> file_writer(
3337 test::GetWritableFileWriter(sink, "" /* don't care */));
3338 Options options;
3339 options.compression = kNoCompression;
3340 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3341 const ImmutableCFOptions ioptions(options);
3342 const MutableCFOptions moptions(options);
3343 InternalKeyComparator ikc(options.comparator);
3344 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3345 int_tbl_prop_collector_factories;
3346 std::string column_family_name;
3347 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3348 TableBuilderOptions(ioptions, moptions, ikc,
3349 &int_tbl_prop_collector_factories, kNoCompression,
3350 CompressionOptions(), nullptr /* compression_dict */,
3351 false /* skip_filters */, column_family_name, -1),
3352 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3353 file_writer.get()));
3354
3355 for (int i = 1; i <= 10000; ++i) {
3356 std::ostringstream ostr;
3357 ostr << std::setfill('0') << std::setw(5) << i;
3358 std::string key = ostr.str();
3359 std::string value = "val";
3360 InternalKey ik(key, 0, kTypeValue);
3361
3362 builder->Add(ik.Encode(), value);
3363 }
3364 ASSERT_OK(builder->Finish());
3365 file_writer->Flush();
3366
3367 test::RandomRWStringSink ss_rw(sink);
3368 unique_ptr<RandomAccessFileReader> file_reader(
3369 test::GetRandomAccessFileReader(
3370 new test::StringSource(ss_rw.contents(), 73342, true)));
3371
3372 // Helper function to get version, global_seqno, global_seqno_offset
3373 std::function<void()> VerifyBlockAlignment = [&]() {
3374 TableProperties* props = nullptr;
3375 ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
3376 kBlockBasedTableMagicNumber, ioptions,
3377 &props, true /* compression_type_missing */));
3378
3379 uint64_t data_block_size = props->data_size / props->num_data_blocks;
3380 ASSERT_EQ(data_block_size, 4096);
3381 ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
3382 delete props;
3383 };
3384
3385 VerifyBlockAlignment();
3386
3387 // The below block of code verifies that we can read back the keys. Set
3388 // block_align to false when creating the reader to ensure we can flip between
3389 // the two modes without any issues
3390 std::unique_ptr<TableReader> table_reader;
3391 bbto.block_align = false;
3392 Options options2;
3393 options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
3394 ImmutableCFOptions ioptions2(options2);
3395 const MutableCFOptions moptions2(options2);
3396
3397 ASSERT_OK(ioptions.table_factory->NewTableReader(
3398 TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
3399 EnvOptions(),
3400 GetPlainInternalComparator(options2.comparator)),
3401 std::move(file_reader), ss_rw.contents().size(), &table_reader));
3402
3403 std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
3404 ReadOptions(), moptions2.prefix_extractor.get()));
3405
3406 int expected_key = 1;
3407 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3408 std::ostringstream ostr;
3409 ostr << std::setfill('0') << std::setw(5) << expected_key++;
3410 std::string key = ostr.str();
3411 std::string value = "val";
3412
3413 ASSERT_OK(db_iter->status());
3414 ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
3415 ASSERT_EQ(db_iter->value().ToString(), value);
3416 }
3417 expected_key--;
3418 ASSERT_EQ(expected_key, 10000);
3419 table_reader.reset();
3420 }
3421
3422 TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
3423 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3424 bbto.block_align = true;
3425 test::StringSink* sink = new test::StringSink();
3426 unique_ptr<WritableFileWriter> file_writer(
3427 test::GetWritableFileWriter(sink, "" /* don't care */));
3428
3429 Options options;
3430 options.compression = kNoCompression;
3431 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3432
3433 const ImmutableCFOptions ioptions(options);
3434 const MutableCFOptions moptions(options);
3435 InternalKeyComparator ikc(options.comparator);
3436 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3437 int_tbl_prop_collector_factories;
3438 std::string column_family_name;
3439
3440 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3441 TableBuilderOptions(ioptions, moptions, ikc,
3442 &int_tbl_prop_collector_factories, kNoCompression,
3443 CompressionOptions(), nullptr /* compression_dict */,
3444 false /* skip_filters */, column_family_name, -1),
3445 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3446 file_writer.get()));
3447
3448 for (int i = 1; i <= 10000; ++i) {
3449 std::ostringstream ostr;
3450 ostr << std::setfill('0') << std::setw(5) << i;
3451 std::string key = ostr.str();
3452 std::string value = "val";
3453 InternalKey ik(key, 0, kTypeValue);
3454
3455 builder->Add(ik.Encode(), value);
3456 }
3457 ASSERT_OK(builder->Finish());
3458 file_writer->Flush();
3459
3460 test::RandomRWStringSink ss_rw(sink);
3461 unique_ptr<RandomAccessFileReader> file_reader(
3462 test::GetRandomAccessFileReader(
3463 new test::StringSource(ss_rw.contents(), 73342, true)));
3464
3465 {
3466 RandomAccessFileReader* file = file_reader.get();
3467 uint64_t file_size = ss_rw.contents().size();
3468
3469 Footer footer;
3470 ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
3471 &footer, kBlockBasedTableMagicNumber));
3472
3473 auto BlockFetchHelper = [&](const BlockHandle& handle,
3474 BlockContents* contents) {
3475 ReadOptions read_options;
3476 read_options.verify_checksums = false;
3477 Slice compression_dict;
3478 PersistentCacheOptions cache_options;
3479
3480 BlockFetcher block_fetcher(file, nullptr /* prefetch_buffer */, footer,
3481 read_options, handle, contents, ioptions,
3482 false /* decompress */, compression_dict,
3483 cache_options);
3484
3485 ASSERT_OK(block_fetcher.ReadBlockContents());
3486 };
3487
3488 // -- Read metaindex block
3489 auto metaindex_handle = footer.metaindex_handle();
3490 BlockContents metaindex_contents;
3491
3492 BlockFetchHelper(metaindex_handle, &metaindex_contents);
3493 Block metaindex_block(std::move(metaindex_contents),
3494 kDisableGlobalSequenceNumber);
3495
3496 std::unique_ptr<InternalIterator> meta_iter(
3497 metaindex_block.NewIterator<DataBlockIter>(BytewiseComparator(),
3498 BytewiseComparator()));
3499 bool found_properties_block = true;
3500 ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
3501 ASSERT_TRUE(found_properties_block);
3502
3503 // -- Read properties block
3504 Slice v = meta_iter->value();
3505 BlockHandle properties_handle;
3506 ASSERT_OK(properties_handle.DecodeFrom(&v));
3507 BlockContents properties_contents;
3508
3509 BlockFetchHelper(properties_handle, &properties_contents);
3510 Block properties_block(std::move(properties_contents),
3511 kDisableGlobalSequenceNumber);
3512
3513 ASSERT_EQ(properties_block.NumRestarts(), 1);
3514 }
3515 }
3516
3517 TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
3518 // The properties meta-block should come at the end since we always need to
3519 // read it when opening a file, unlike index/filter/other meta-blocks, which
3520 // are sometimes read depending on the user's configuration. This ordering
3521 // allows us to do a small readahead on the end of the file to read properties
3522 // and meta-index blocks with one I/O.
3523 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3524 c.Add("a1", "val1");
3525 c.Add("b2", "val2");
3526 c.Add("c3", "val3");
3527 c.Add("d4", "val4");
3528 c.Add("e5", "val5");
3529 c.Add("f6", "val6");
3530 c.Add("g7", "val7");
3531 c.Add("h8", "val8");
3532 c.Add("j9", "val9");
3533
3534 // write an SST file
3535 Options options;
3536 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3537 table_options.filter_policy.reset(NewBloomFilterPolicy(
3538 8 /* bits_per_key */, false /* use_block_based_filter */));
3539 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3540 ImmutableCFOptions ioptions(options);
3541 MutableCFOptions moptions(options);
3542 std::vector<std::string> keys;
3543 stl_wrappers::KVMap kvmap;
3544 c.Finish(options, ioptions, moptions, table_options,
3545 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3546
3547 // get file reader
3548 test::StringSink* table_sink = c.TEST_GetSink();
3549 std::unique_ptr<RandomAccessFileReader> table_reader{
3550 test::GetRandomAccessFileReader(
3551 new test::StringSource(table_sink->contents(), 0 /* unique_id */,
3552 false /* allow_mmap_reads */))};
3553 size_t table_size = table_sink->contents().size();
3554
3555 // read footer
3556 Footer footer;
3557 ASSERT_OK(ReadFooterFromFile(table_reader.get(),
3558 nullptr /* prefetch_buffer */, table_size,
3559 &footer, kBlockBasedTableMagicNumber));
3560
3561 // read metaindex
3562 auto metaindex_handle = footer.metaindex_handle();
3563 BlockContents metaindex_contents;
3564 Slice compression_dict;
3565 PersistentCacheOptions pcache_opts;
3566 BlockFetcher block_fetcher(
3567 table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
3568 metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
3569 compression_dict, pcache_opts);
3570 ASSERT_OK(block_fetcher.ReadBlockContents());
3571 Block metaindex_block(std::move(metaindex_contents),
3572 kDisableGlobalSequenceNumber);
3573
3574 // verify properties block comes last
3575 std::unique_ptr<InternalIterator> metaindex_iter{
3576 metaindex_block.NewIterator<DataBlockIter>(options.comparator,
3577 options.comparator)};
3578 uint64_t max_offset = 0;
3579 std::string key_at_max_offset;
3580 for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
3581 metaindex_iter->Next()) {
3582 BlockHandle handle;
3583 Slice value = metaindex_iter->value();
3584 ASSERT_OK(handle.DecodeFrom(&value));
3585 if (handle.offset() > max_offset) {
3586 max_offset = handle.offset();
3587 key_at_max_offset = metaindex_iter->key().ToString();
3588 }
3589 }
3590 ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
3591 // index handle is stored in footer rather than metaindex block, so need
3592 // separate logic to verify it comes before properties block.
3593 ASSERT_GT(max_offset, footer.index_handle().offset());
3594 c.ResetTableReader();
3595 }
3596
3597 TEST_P(BlockBasedTableTest, BadOptions) {
3598 rocksdb::Options options;
3599 options.compression = kNoCompression;
3600 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3601 bbto.block_size = 4000;
3602 bbto.block_align = true;
3603
3604 const std::string kDBPath =
3605 test::PerThreadDBPath("block_based_table_bad_options_test");
3606 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3607 DestroyDB(kDBPath, options);
3608 rocksdb::DB* db;
3609 ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));
3610
3611 bbto.block_size = 4096;
3612 options.compression = kSnappyCompression;
3613 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3614 ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));
3615 }
3616
3617 TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
3618 TailPrefetchStats tpstats;
3619 ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
3620 tpstats.RecordEffectiveSize(size_t{1000});
3621 tpstats.RecordEffectiveSize(size_t{1005});
3622 tpstats.RecordEffectiveSize(size_t{1002});
3623 ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());
3624
3625 // One single super large value shouldn't influence much
3626 tpstats.RecordEffectiveSize(size_t{1002000});
3627 tpstats.RecordEffectiveSize(size_t{999});
3628 ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
3629 ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());
3630
3631 // Only history of 32 is kept
3632 for (int i = 0; i < 32; i++) {
3633 tpstats.RecordEffectiveSize(size_t{100});
3634 }
3635 ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());
3636
3637 // 16 large values and 16 small values. The result should be closer
3638 // to the small value as the algorithm.
3639 for (int i = 0; i < 16; i++) {
3640 tpstats.RecordEffectiveSize(size_t{1000});
3641 }
3642 tpstats.RecordEffectiveSize(size_t{10});
3643 tpstats.RecordEffectiveSize(size_t{20});
3644 for (int i = 0; i < 6; i++) {
3645 tpstats.RecordEffectiveSize(size_t{100});
3646 }
3647 ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
3648 ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
3649 }
3650
3651 TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
3652 TailPrefetchStats tpstats;
3653 FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
3654 buffer.TryReadFromCache(500, 10, nullptr);
3655 buffer.TryReadFromCache(480, 10, nullptr);
3656 buffer.TryReadFromCache(490, 10, nullptr);
3657 ASSERT_EQ(480, buffer.min_offset_read());
3658 }
3659
3660 TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
3661 const int kNumKeys = 500;
3662 const int kKeySize = 8;
3663 const int kValSize = 40;
3664
3665 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3666 table_options.data_block_index_type =
3667 BlockBasedTableOptions::kDataBlockBinaryAndHash;
3668
3669 Options options;
3670 options.comparator = BytewiseComparator();
3671
3672 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3673
3674 TableConstructor c(options.comparator);
3675
3676 static Random rnd(1048);
3677 for (int i = 0; i < kNumKeys; i++) {
3678 // padding one "0" to mark existent keys.
3679 std::string random_key(RandomString(&rnd, kKeySize - 1) + "1");
3680 InternalKey k(random_key, 0, kTypeValue);
3681 c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
3682 }
3683
3684 std::vector<std::string> keys;
3685 stl_wrappers::KVMap kvmap;
3686 const ImmutableCFOptions ioptions(options);
3687 const MutableCFOptions moptions(options);
3688 const InternalKeyComparator internal_comparator(options.comparator);
3689 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
3690 &keys, &kvmap);
3691
3692 auto reader = c.GetTableReader();
3693
3694 std::unique_ptr<InternalIterator> seek_iter;
3695 seek_iter.reset(
3696 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
3697 for (int i = 0; i < 2; ++i) {
3698 ReadOptions ro;
3699 // for every kv, we seek using two method: Get() and Seek()
3700 // Get() will use the SuffixIndexHash in Block. For non-existent key it
3701 // will invalidate the iterator
3702 // Seek() will use the default BinarySeek() in Block. So for non-existent
3703 // key it will land at the closest key that is large than target.
3704
3705 // Search for existent keys
3706 for (auto& kv : kvmap) {
3707 if (i == 0) {
3708 // Search using Seek()
3709 seek_iter->Seek(kv.first);
3710 ASSERT_OK(seek_iter->status());
3711 ASSERT_TRUE(seek_iter->Valid());
3712 ASSERT_EQ(seek_iter->key(), kv.first);
3713 ASSERT_EQ(seek_iter->value(), kv.second);
3714 } else {
3715 // Search using Get()
3716 PinnableSlice value;
3717 std::string user_key = ExtractUserKey(kv.first).ToString();
3718 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3719 GetContext::kNotFound, user_key, &value, nullptr,
3720 nullptr, nullptr, nullptr);
3721 ASSERT_OK(reader->Get(ro, kv.first, &get_context,
3722 moptions.prefix_extractor.get()));
3723 ASSERT_EQ(get_context.State(), GetContext::kFound);
3724 ASSERT_EQ(value, Slice(kv.second));
3725 value.Reset();
3726 }
3727 }
3728
3729 // Search for non-existent keys
3730 for (auto& kv : kvmap) {
3731 std::string user_key = ExtractUserKey(kv.first).ToString();
3732 user_key.back() = '0'; // make it non-existent key
3733 InternalKey internal_key(user_key, 0, kTypeValue);
3734 std::string encoded_key = internal_key.Encode().ToString();
3735 if (i == 0) { // Search using Seek()
3736 seek_iter->Seek(encoded_key);
3737 ASSERT_OK(seek_iter->status());
3738 if (seek_iter->Valid()) {
3739 ASSERT_TRUE(BytewiseComparator()->Compare(
3740 user_key, ExtractUserKey(seek_iter->key())) < 0);
3741 }
3742 } else { // Search using Get()
3743 PinnableSlice value;
3744 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3745 GetContext::kNotFound, user_key, &value, nullptr,
3746 nullptr, nullptr, nullptr);
3747 ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
3748 moptions.prefix_extractor.get()));
3749 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
3750 value.Reset();
3751 }
3752 }
3753 }
3754 }
3755
3756 } // namespace rocksdb
3757
3758 int main(int argc, char** argv) {
3759 ::testing::InitGoogleTest(&argc, argv);
3760 return RUN_ALL_TESTS();
3761 }