]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/table_test.cc
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / rocksdb / table / table_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <stdio.h>
11
12 #include <algorithm>
13 #include <iostream>
14 #include <map>
15 #include <memory>
16 #include <string>
17 #include <vector>
18
19 #include "cache/lru_cache.h"
20 #include "db/dbformat.h"
21 #include "db/memtable.h"
22 #include "db/write_batch_internal.h"
23 #include "memtable/stl_wrappers.h"
24 #include "monitoring/statistics.h"
25 #include "port/port.h"
26 #include "rocksdb/cache.h"
27 #include "rocksdb/db.h"
28 #include "rocksdb/env.h"
29 #include "rocksdb/iterator.h"
30 #include "rocksdb/memtablerep.h"
31 #include "rocksdb/perf_context.h"
32 #include "rocksdb/slice_transform.h"
33 #include "rocksdb/statistics.h"
34 #include "rocksdb/write_buffer_manager.h"
35 #include "table/block.h"
36 #include "table/block_based_table_builder.h"
37 #include "table/block_based_table_factory.h"
38 #include "table/block_based_table_reader.h"
39 #include "table/block_builder.h"
40 #include "table/block_fetcher.h"
41 #include "table/format.h"
42 #include "table/get_context.h"
43 #include "table/internal_iterator.h"
44 #include "table/meta_blocks.h"
45 #include "table/plain_table_factory.h"
46 #include "table/scoped_arena_iterator.h"
47 #include "table/sst_file_writer_collectors.h"
48 #include "util/compression.h"
49 #include "util/random.h"
50 #include "util/string_util.h"
51 #include "util/sync_point.h"
52 #include "util/testharness.h"
53 #include "util/testutil.h"
54 #include "utilities/merge_operators.h"
55
56 namespace rocksdb {
57
58 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
59 extern const uint64_t kLegacyPlainTableMagicNumber;
60 extern const uint64_t kBlockBasedTableMagicNumber;
61 extern const uint64_t kPlainTableMagicNumber;
62
63 namespace {
64
65 // DummyPropertiesCollector used to test BlockBasedTableProperties
66 class DummyPropertiesCollector : public TablePropertiesCollector {
67 public:
68 const char* Name() const override { return ""; }
69
70 Status Finish(UserCollectedProperties* /*properties*/) override {
71 return Status::OK();
72 }
73
74 Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
75 return Status::OK();
76 }
77
78 UserCollectedProperties GetReadableProperties() const override {
79 return UserCollectedProperties{};
80 }
81 };
82
83 class DummyPropertiesCollectorFactory1
84 : public TablePropertiesCollectorFactory {
85 public:
86 TablePropertiesCollector* CreateTablePropertiesCollector(
87 TablePropertiesCollectorFactory::Context /*context*/) override {
88 return new DummyPropertiesCollector();
89 }
90 const char* Name() const override { return "DummyPropertiesCollector1"; }
91 };
92
93 class DummyPropertiesCollectorFactory2
94 : public TablePropertiesCollectorFactory {
95 public:
96 TablePropertiesCollector* CreateTablePropertiesCollector(
97 TablePropertiesCollectorFactory::Context /*context*/) override {
98 return new DummyPropertiesCollector();
99 }
100 const char* Name() const override { return "DummyPropertiesCollector2"; }
101 };
102
103 // Return reverse of "key".
104 // Used to test non-lexicographic comparators.
105 std::string Reverse(const Slice& key) {
106 auto rev = key.ToString();
107 std::reverse(rev.begin(), rev.end());
108 return rev;
109 }
110
111 class ReverseKeyComparator : public Comparator {
112 public:
113 const char* Name() const override {
114 return "rocksdb.ReverseBytewiseComparator";
115 }
116
117 int Compare(const Slice& a, const Slice& b) const override {
118 return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
119 }
120
121 void FindShortestSeparator(std::string* start,
122 const Slice& limit) const override {
123 std::string s = Reverse(*start);
124 std::string l = Reverse(limit);
125 BytewiseComparator()->FindShortestSeparator(&s, l);
126 *start = Reverse(s);
127 }
128
129 void FindShortSuccessor(std::string* key) const override {
130 std::string s = Reverse(*key);
131 BytewiseComparator()->FindShortSuccessor(&s);
132 *key = Reverse(s);
133 }
134 };
135
136 ReverseKeyComparator reverse_key_comparator;
137
138 void Increment(const Comparator* cmp, std::string* key) {
139 if (cmp == BytewiseComparator()) {
140 key->push_back('\0');
141 } else {
142 assert(cmp == &reverse_key_comparator);
143 std::string rev = Reverse(*key);
144 rev.push_back('\0');
145 *key = Reverse(rev);
146 }
147 }
148
149 } // namespace
150
151 // Helper class for tests to unify the interface between
152 // BlockBuilder/TableBuilder and Block/Table.
153 class Constructor {
154 public:
155 explicit Constructor(const Comparator* cmp)
156 : data_(stl_wrappers::LessOfComparator(cmp)) {}
157 virtual ~Constructor() { }
158
159 void Add(const std::string& key, const Slice& value) {
160 data_[key] = value.ToString();
161 }
162
163 // Finish constructing the data structure with all the keys that have
164 // been added so far. Returns the keys in sorted order in "*keys"
165 // and stores the key/value pairs in "*kvmap"
166 void Finish(const Options& options, const ImmutableCFOptions& ioptions,
167 const MutableCFOptions& moptions,
168 const BlockBasedTableOptions& table_options,
169 const InternalKeyComparator& internal_comparator,
170 std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
171 last_internal_key_ = &internal_comparator;
172 *kvmap = data_;
173 keys->clear();
174 for (const auto& kv : data_) {
175 keys->push_back(kv.first);
176 }
177 data_.clear();
178 Status s = FinishImpl(options, ioptions, moptions, table_options,
179 internal_comparator, *kvmap);
180 ASSERT_TRUE(s.ok()) << s.ToString();
181 }
182
183 // Construct the data structure from the data in "data"
184 virtual Status FinishImpl(const Options& options,
185 const ImmutableCFOptions& ioptions,
186 const MutableCFOptions& moptions,
187 const BlockBasedTableOptions& table_options,
188 const InternalKeyComparator& internal_comparator,
189 const stl_wrappers::KVMap& data) = 0;
190
191 virtual InternalIterator* NewIterator(
192 const SliceTransform* prefix_extractor = nullptr) const = 0;
193
194 virtual const stl_wrappers::KVMap& data() { return data_; }
195
196 virtual bool IsArenaMode() const { return false; }
197
198 virtual DB* db() const { return nullptr; } // Overridden in DBConstructor
199
200 virtual bool AnywayDeleteIterator() const { return false; }
201
202 protected:
203 const InternalKeyComparator* last_internal_key_;
204
205 private:
206 stl_wrappers::KVMap data_;
207 };
208
209 class BlockConstructor: public Constructor {
210 public:
211 explicit BlockConstructor(const Comparator* cmp)
212 : Constructor(cmp),
213 comparator_(cmp),
214 block_(nullptr) { }
215 ~BlockConstructor() override { delete block_; }
216 Status FinishImpl(const Options& /*options*/,
217 const ImmutableCFOptions& /*ioptions*/,
218 const MutableCFOptions& /*moptions*/,
219 const BlockBasedTableOptions& table_options,
220 const InternalKeyComparator& /*internal_comparator*/,
221 const stl_wrappers::KVMap& kv_map) override {
222 delete block_;
223 block_ = nullptr;
224 BlockBuilder builder(table_options.block_restart_interval);
225
226 for (const auto kv : kv_map) {
227 builder.Add(kv.first, kv.second);
228 }
229 // Open the block
230 data_ = builder.Finish().ToString();
231 BlockContents contents;
232 contents.data = data_;
233 block_ = new Block(std::move(contents), kDisableGlobalSequenceNumber);
234 return Status::OK();
235 }
236 InternalIterator* NewIterator(
237 const SliceTransform* /*prefix_extractor*/) const override {
238 return block_->NewIterator<DataBlockIter>(comparator_, comparator_);
239 }
240
241 private:
242 const Comparator* comparator_;
243 std::string data_;
244 Block* block_;
245
246 BlockConstructor();
247 };
248
249 // A helper class that converts internal format keys into user keys
250 class KeyConvertingIterator : public InternalIterator {
251 public:
252 explicit KeyConvertingIterator(InternalIterator* iter,
253 bool arena_mode = false)
254 : iter_(iter), arena_mode_(arena_mode) {}
255 ~KeyConvertingIterator() override {
256 if (arena_mode_) {
257 iter_->~InternalIterator();
258 } else {
259 delete iter_;
260 }
261 }
262 bool Valid() const override { return iter_->Valid() && status_.ok(); }
263 void Seek(const Slice& target) override {
264 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
265 std::string encoded;
266 AppendInternalKey(&encoded, ikey);
267 iter_->Seek(encoded);
268 }
269 void SeekForPrev(const Slice& target) override {
270 ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
271 std::string encoded;
272 AppendInternalKey(&encoded, ikey);
273 iter_->SeekForPrev(encoded);
274 }
275 void SeekToFirst() override { iter_->SeekToFirst(); }
276 void SeekToLast() override { iter_->SeekToLast(); }
277 void Next() override { iter_->Next(); }
278 void Prev() override { iter_->Prev(); }
279
280 Slice key() const override {
281 assert(Valid());
282 ParsedInternalKey parsed_key;
283 if (!ParseInternalKey(iter_->key(), &parsed_key)) {
284 status_ = Status::Corruption("malformed internal key");
285 return Slice("corrupted key");
286 }
287 return parsed_key.user_key;
288 }
289
290 Slice value() const override { return iter_->value(); }
291 Status status() const override {
292 return status_.ok() ? iter_->status() : status_;
293 }
294
295 private:
296 mutable Status status_;
297 InternalIterator* iter_;
298 bool arena_mode_;
299
300 // No copying allowed
301 KeyConvertingIterator(const KeyConvertingIterator&);
302 void operator=(const KeyConvertingIterator&);
303 };
304
305 class TableConstructor: public Constructor {
306 public:
307 explicit TableConstructor(const Comparator* cmp,
308 bool convert_to_internal_key = false,
309 int level = -1)
310 : Constructor(cmp),
311 convert_to_internal_key_(convert_to_internal_key),
312 level_(level) {}
313 ~TableConstructor() override { Reset(); }
314
315 Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions,
316 const MutableCFOptions& moptions,
317 const BlockBasedTableOptions& /*table_options*/,
318 const InternalKeyComparator& internal_comparator,
319 const stl_wrappers::KVMap& kv_map) override {
320 Reset();
321 soptions.use_mmap_reads = ioptions.allow_mmap_reads;
322 file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(),
323 "" /* don't care */));
324 std::unique_ptr<TableBuilder> builder;
325 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
326 int_tbl_prop_collector_factories;
327 std::string column_family_name;
328 builder.reset(ioptions.table_factory->NewTableBuilder(
329 TableBuilderOptions(ioptions, moptions, internal_comparator,
330 &int_tbl_prop_collector_factories,
331 options.compression, options.sample_for_compression,
332 options.compression_opts, false /* skip_filters */,
333 column_family_name, level_),
334 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
335 file_writer_.get()));
336
337 for (const auto kv : kv_map) {
338 if (convert_to_internal_key_) {
339 ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
340 std::string encoded;
341 AppendInternalKey(&encoded, ikey);
342 builder->Add(encoded, kv.second);
343 } else {
344 builder->Add(kv.first, kv.second);
345 }
346 EXPECT_TRUE(builder->status().ok());
347 }
348 Status s = builder->Finish();
349 file_writer_->Flush();
350 EXPECT_TRUE(s.ok()) << s.ToString();
351
352 EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
353
354 // Open the table
355 uniq_id_ = cur_uniq_id_++;
356 file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
357 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
358 const bool kSkipFilters = true;
359 const bool kImmortal = true;
360 return ioptions.table_factory->NewTableReader(
361 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
362 internal_comparator, !kSkipFilters, !kImmortal,
363 level_),
364 std::move(file_reader_), TEST_GetSink()->contents().size(),
365 &table_reader_);
366 }
367
368 InternalIterator* NewIterator(
369 const SliceTransform* prefix_extractor) const override {
370 ReadOptions ro;
371 InternalIterator* iter = table_reader_->NewIterator(ro, prefix_extractor);
372 if (convert_to_internal_key_) {
373 return new KeyConvertingIterator(iter);
374 } else {
375 return iter;
376 }
377 }
378
379 uint64_t ApproximateOffsetOf(const Slice& key) const {
380 if (convert_to_internal_key_) {
381 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
382 const Slice skey = ikey.Encode();
383 return table_reader_->ApproximateOffsetOf(skey);
384 }
385 return table_reader_->ApproximateOffsetOf(key);
386 }
387
388 virtual Status Reopen(const ImmutableCFOptions& ioptions,
389 const MutableCFOptions& moptions) {
390 file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
391 TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
392 return ioptions.table_factory->NewTableReader(
393 TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
394 *last_internal_key_),
395 std::move(file_reader_), TEST_GetSink()->contents().size(),
396 &table_reader_);
397 }
398
399 virtual TableReader* GetTableReader() { return table_reader_.get(); }
400
401 bool AnywayDeleteIterator() const override {
402 return convert_to_internal_key_;
403 }
404
405 void ResetTableReader() { table_reader_.reset(); }
406
407 bool ConvertToInternalKey() { return convert_to_internal_key_; }
408
409 test::StringSink* TEST_GetSink() {
410 return static_cast<test::StringSink*>(file_writer_->writable_file());
411 }
412
413 private:
414 void Reset() {
415 uniq_id_ = 0;
416 table_reader_.reset();
417 file_writer_.reset();
418 file_reader_.reset();
419 }
420
421 uint64_t uniq_id_;
422 std::unique_ptr<WritableFileWriter> file_writer_;
423 std::unique_ptr<RandomAccessFileReader> file_reader_;
424 std::unique_ptr<TableReader> table_reader_;
425 bool convert_to_internal_key_;
426 int level_;
427
428 TableConstructor();
429
430 static uint64_t cur_uniq_id_;
431 EnvOptions soptions;
432 };
433 uint64_t TableConstructor::cur_uniq_id_ = 1;
434
435 class MemTableConstructor: public Constructor {
436 public:
437 explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
438 : Constructor(cmp),
439 internal_comparator_(cmp),
440 write_buffer_manager_(wb),
441 table_factory_(new SkipListFactory) {
442 options_.memtable_factory = table_factory_;
443 ImmutableCFOptions ioptions(options_);
444 memtable_ =
445 new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
446 wb, kMaxSequenceNumber, 0 /* column_family_id */);
447 memtable_->Ref();
448 }
449 ~MemTableConstructor() override { delete memtable_->Unref(); }
450 Status FinishImpl(const Options&, const ImmutableCFOptions& ioptions,
451 const MutableCFOptions& /*moptions*/,
452 const BlockBasedTableOptions& /*table_options*/,
453 const InternalKeyComparator& /*internal_comparator*/,
454 const stl_wrappers::KVMap& kv_map) override {
455 delete memtable_->Unref();
456 ImmutableCFOptions mem_ioptions(ioptions);
457 memtable_ = new MemTable(internal_comparator_, mem_ioptions,
458 MutableCFOptions(options_), write_buffer_manager_,
459 kMaxSequenceNumber, 0 /* column_family_id */);
460 memtable_->Ref();
461 int seq = 1;
462 for (const auto kv : kv_map) {
463 memtable_->Add(seq, kTypeValue, kv.first, kv.second);
464 seq++;
465 }
466 return Status::OK();
467 }
468 InternalIterator* NewIterator(
469 const SliceTransform* /*prefix_extractor*/) const override {
470 return new KeyConvertingIterator(
471 memtable_->NewIterator(ReadOptions(), &arena_), true);
472 }
473
474 bool AnywayDeleteIterator() const override { return true; }
475
476 bool IsArenaMode() const override { return true; }
477
478 private:
479 mutable Arena arena_;
480 InternalKeyComparator internal_comparator_;
481 Options options_;
482 WriteBufferManager* write_buffer_manager_;
483 MemTable* memtable_;
484 std::shared_ptr<SkipListFactory> table_factory_;
485 };
486
487 class InternalIteratorFromIterator : public InternalIterator {
488 public:
489 explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
490 bool Valid() const override { return it_->Valid(); }
491 void Seek(const Slice& target) override { it_->Seek(target); }
492 void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
493 void SeekToFirst() override { it_->SeekToFirst(); }
494 void SeekToLast() override { it_->SeekToLast(); }
495 void Next() override { it_->Next(); }
496 void Prev() override { it_->Prev(); }
497 Slice key() const override { return it_->key(); }
498 Slice value() const override { return it_->value(); }
499 Status status() const override { return it_->status(); }
500
501 private:
502 std::unique_ptr<Iterator> it_;
503 };
504
505 class DBConstructor: public Constructor {
506 public:
507 explicit DBConstructor(const Comparator* cmp)
508 : Constructor(cmp),
509 comparator_(cmp) {
510 db_ = nullptr;
511 NewDB();
512 }
513 ~DBConstructor() override { delete db_; }
514 Status FinishImpl(const Options& /*options*/,
515 const ImmutableCFOptions& /*ioptions*/,
516 const MutableCFOptions& /*moptions*/,
517 const BlockBasedTableOptions& /*table_options*/,
518 const InternalKeyComparator& /*internal_comparator*/,
519 const stl_wrappers::KVMap& kv_map) override {
520 delete db_;
521 db_ = nullptr;
522 NewDB();
523 for (const auto kv : kv_map) {
524 WriteBatch batch;
525 batch.Put(kv.first, kv.second);
526 EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
527 }
528 return Status::OK();
529 }
530
531 InternalIterator* NewIterator(
532 const SliceTransform* /*prefix_extractor*/) const override {
533 return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
534 }
535
536 DB* db() const override { return db_; }
537
538 private:
539 void NewDB() {
540 std::string name = test::PerThreadDBPath("table_testdb");
541
542 Options options;
543 options.comparator = comparator_;
544 Status status = DestroyDB(name, options);
545 ASSERT_TRUE(status.ok()) << status.ToString();
546
547 options.create_if_missing = true;
548 options.error_if_exists = true;
549 options.write_buffer_size = 10000; // Something small to force merging
550 status = DB::Open(options, name, &db_);
551 ASSERT_TRUE(status.ok()) << status.ToString();
552 }
553
554 const Comparator* comparator_;
555 DB* db_;
556 };
557
558 enum TestType {
559 BLOCK_BASED_TABLE_TEST,
560 #ifndef ROCKSDB_LITE
561 PLAIN_TABLE_SEMI_FIXED_PREFIX,
562 PLAIN_TABLE_FULL_STR_PREFIX,
563 PLAIN_TABLE_TOTAL_ORDER,
564 #endif // !ROCKSDB_LITE
565 BLOCK_TEST,
566 MEMTABLE_TEST,
567 DB_TEST
568 };
569
570 struct TestArgs {
571 TestType type;
572 bool reverse_compare;
573 int restart_interval;
574 CompressionType compression;
575 uint32_t format_version;
576 bool use_mmap;
577 };
578
579 static std::vector<TestArgs> GenerateArgList() {
580 std::vector<TestArgs> test_args;
581 std::vector<TestType> test_types = {
582 BLOCK_BASED_TABLE_TEST,
583 #ifndef ROCKSDB_LITE
584 PLAIN_TABLE_SEMI_FIXED_PREFIX,
585 PLAIN_TABLE_FULL_STR_PREFIX,
586 PLAIN_TABLE_TOTAL_ORDER,
587 #endif // !ROCKSDB_LITE
588 BLOCK_TEST,
589 MEMTABLE_TEST, DB_TEST};
590 std::vector<bool> reverse_compare_types = {false, true};
591 std::vector<int> restart_intervals = {16, 1, 1024};
592
593 // Only add compression if it is supported
594 std::vector<std::pair<CompressionType, bool>> compression_types;
595 compression_types.emplace_back(kNoCompression, false);
596 if (Snappy_Supported()) {
597 compression_types.emplace_back(kSnappyCompression, false);
598 }
599 if (Zlib_Supported()) {
600 compression_types.emplace_back(kZlibCompression, false);
601 compression_types.emplace_back(kZlibCompression, true);
602 }
603 if (BZip2_Supported()) {
604 compression_types.emplace_back(kBZip2Compression, false);
605 compression_types.emplace_back(kBZip2Compression, true);
606 }
607 if (LZ4_Supported()) {
608 compression_types.emplace_back(kLZ4Compression, false);
609 compression_types.emplace_back(kLZ4Compression, true);
610 compression_types.emplace_back(kLZ4HCCompression, false);
611 compression_types.emplace_back(kLZ4HCCompression, true);
612 }
613 if (XPRESS_Supported()) {
614 compression_types.emplace_back(kXpressCompression, false);
615 compression_types.emplace_back(kXpressCompression, true);
616 }
617 if (ZSTD_Supported()) {
618 compression_types.emplace_back(kZSTD, false);
619 compression_types.emplace_back(kZSTD, true);
620 }
621
622 for (auto test_type : test_types) {
623 for (auto reverse_compare : reverse_compare_types) {
624 #ifndef ROCKSDB_LITE
625 if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
626 test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
627 test_type == PLAIN_TABLE_TOTAL_ORDER) {
628 // Plain table doesn't use restart index or compression.
629 TestArgs one_arg;
630 one_arg.type = test_type;
631 one_arg.reverse_compare = reverse_compare;
632 one_arg.restart_interval = restart_intervals[0];
633 one_arg.compression = compression_types[0].first;
634 one_arg.use_mmap = true;
635 test_args.push_back(one_arg);
636 one_arg.use_mmap = false;
637 test_args.push_back(one_arg);
638 continue;
639 }
640 #endif // !ROCKSDB_LITE
641
642 for (auto restart_interval : restart_intervals) {
643 for (auto compression_type : compression_types) {
644 TestArgs one_arg;
645 one_arg.type = test_type;
646 one_arg.reverse_compare = reverse_compare;
647 one_arg.restart_interval = restart_interval;
648 one_arg.compression = compression_type.first;
649 one_arg.format_version = compression_type.second ? 2 : 1;
650 one_arg.use_mmap = false;
651 test_args.push_back(one_arg);
652 }
653 }
654 }
655 }
656 return test_args;
657 }
658
659 // In order to make all tests run for plain table format, including
660 // those operating on empty keys, create a new prefix transformer which
661 // return fixed prefix if the slice is not shorter than the prefix length,
662 // and the full slice if it is shorter.
663 class FixedOrLessPrefixTransform : public SliceTransform {
664 private:
665 const size_t prefix_len_;
666
667 public:
668 explicit FixedOrLessPrefixTransform(size_t prefix_len) :
669 prefix_len_(prefix_len) {
670 }
671
672 const char* Name() const override { return "rocksdb.FixedPrefix"; }
673
674 Slice Transform(const Slice& src) const override {
675 assert(InDomain(src));
676 if (src.size() < prefix_len_) {
677 return src;
678 }
679 return Slice(src.data(), prefix_len_);
680 }
681
682 bool InDomain(const Slice& /*src*/) const override { return true; }
683
684 bool InRange(const Slice& dst) const override {
685 return (dst.size() <= prefix_len_);
686 }
687 bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
688 };
689
690 class HarnessTest : public testing::Test {
691 public:
692 HarnessTest()
693 : ioptions_(options_),
694 moptions_(options_),
695 constructor_(nullptr),
696 write_buffer_(options_.db_write_buffer_size) {}
697
698 void Init(const TestArgs& args) {
699 delete constructor_;
700 constructor_ = nullptr;
701 options_ = Options();
702 options_.compression = args.compression;
703 // Use shorter block size for tests to exercise block boundary
704 // conditions more.
705 if (args.reverse_compare) {
706 options_.comparator = &reverse_key_comparator;
707 }
708
709 internal_comparator_.reset(
710 new test::PlainInternalKeyComparator(options_.comparator));
711
712 support_prev_ = true;
713 only_support_prefix_seek_ = false;
714 options_.allow_mmap_reads = args.use_mmap;
715 switch (args.type) {
716 case BLOCK_BASED_TABLE_TEST:
717 table_options_.flush_block_policy_factory.reset(
718 new FlushBlockBySizePolicyFactory());
719 table_options_.block_size = 256;
720 table_options_.block_restart_interval = args.restart_interval;
721 table_options_.index_block_restart_interval = args.restart_interval;
722 table_options_.format_version = args.format_version;
723 options_.table_factory.reset(
724 new BlockBasedTableFactory(table_options_));
725 constructor_ = new TableConstructor(
726 options_.comparator, true /* convert_to_internal_key_ */);
727 internal_comparator_.reset(
728 new InternalKeyComparator(options_.comparator));
729 break;
730 // Plain table is not supported in ROCKSDB_LITE
731 #ifndef ROCKSDB_LITE
732 case PLAIN_TABLE_SEMI_FIXED_PREFIX:
733 support_prev_ = false;
734 only_support_prefix_seek_ = true;
735 options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
736 options_.table_factory.reset(NewPlainTableFactory());
737 constructor_ = new TableConstructor(
738 options_.comparator, true /* convert_to_internal_key_ */);
739 internal_comparator_.reset(
740 new InternalKeyComparator(options_.comparator));
741 break;
742 case PLAIN_TABLE_FULL_STR_PREFIX:
743 support_prev_ = false;
744 only_support_prefix_seek_ = true;
745 options_.prefix_extractor.reset(NewNoopTransform());
746 options_.table_factory.reset(NewPlainTableFactory());
747 constructor_ = new TableConstructor(
748 options_.comparator, true /* convert_to_internal_key_ */);
749 internal_comparator_.reset(
750 new InternalKeyComparator(options_.comparator));
751 break;
752 case PLAIN_TABLE_TOTAL_ORDER:
753 support_prev_ = false;
754 only_support_prefix_seek_ = false;
755 options_.prefix_extractor = nullptr;
756
757 {
758 PlainTableOptions plain_table_options;
759 plain_table_options.user_key_len = kPlainTableVariableLength;
760 plain_table_options.bloom_bits_per_key = 0;
761 plain_table_options.hash_table_ratio = 0;
762
763 options_.table_factory.reset(
764 NewPlainTableFactory(plain_table_options));
765 }
766 constructor_ = new TableConstructor(
767 options_.comparator, true /* convert_to_internal_key_ */);
768 internal_comparator_.reset(
769 new InternalKeyComparator(options_.comparator));
770 break;
771 #endif // !ROCKSDB_LITE
772 case BLOCK_TEST:
773 table_options_.block_size = 256;
774 options_.table_factory.reset(
775 new BlockBasedTableFactory(table_options_));
776 constructor_ = new BlockConstructor(options_.comparator);
777 break;
778 case MEMTABLE_TEST:
779 table_options_.block_size = 256;
780 options_.table_factory.reset(
781 new BlockBasedTableFactory(table_options_));
782 constructor_ = new MemTableConstructor(options_.comparator,
783 &write_buffer_);
784 break;
785 case DB_TEST:
786 table_options_.block_size = 256;
787 options_.table_factory.reset(
788 new BlockBasedTableFactory(table_options_));
789 constructor_ = new DBConstructor(options_.comparator);
790 break;
791 }
792 ioptions_ = ImmutableCFOptions(options_);
793 moptions_ = MutableCFOptions(options_);
794 }
795
796 ~HarnessTest() override { delete constructor_; }
797
798 void Add(const std::string& key, const std::string& value) {
799 constructor_->Add(key, value);
800 }
801
802 void Test(Random* rnd) {
803 std::vector<std::string> keys;
804 stl_wrappers::KVMap data;
805 constructor_->Finish(options_, ioptions_, moptions_, table_options_,
806 *internal_comparator_, &keys, &data);
807
808 TestForwardScan(keys, data);
809 if (support_prev_) {
810 TestBackwardScan(keys, data);
811 }
812 TestRandomAccess(rnd, keys, data);
813 }
814
815 void TestForwardScan(const std::vector<std::string>& /*keys*/,
816 const stl_wrappers::KVMap& data) {
817 InternalIterator* iter = constructor_->NewIterator();
818 ASSERT_TRUE(!iter->Valid());
819 iter->SeekToFirst();
820 for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
821 model_iter != data.end(); ++model_iter) {
822 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
823 iter->Next();
824 }
825 ASSERT_TRUE(!iter->Valid());
826 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
827 iter->~InternalIterator();
828 } else {
829 delete iter;
830 }
831 }
832
833 void TestBackwardScan(const std::vector<std::string>& /*keys*/,
834 const stl_wrappers::KVMap& data) {
835 InternalIterator* iter = constructor_->NewIterator();
836 ASSERT_TRUE(!iter->Valid());
837 iter->SeekToLast();
838 for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
839 model_iter != data.rend(); ++model_iter) {
840 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
841 iter->Prev();
842 }
843 ASSERT_TRUE(!iter->Valid());
844 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
845 iter->~InternalIterator();
846 } else {
847 delete iter;
848 }
849 }
850
851 void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
852 const stl_wrappers::KVMap& data) {
853 static const bool kVerbose = false;
854 InternalIterator* iter = constructor_->NewIterator();
855 ASSERT_TRUE(!iter->Valid());
856 stl_wrappers::KVMap::const_iterator model_iter = data.begin();
857 if (kVerbose) fprintf(stderr, "---\n");
858 for (int i = 0; i < 200; i++) {
859 const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
860 switch (toss) {
861 case 0: {
862 if (iter->Valid()) {
863 if (kVerbose) fprintf(stderr, "Next\n");
864 iter->Next();
865 ++model_iter;
866 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
867 }
868 break;
869 }
870
871 case 1: {
872 if (kVerbose) fprintf(stderr, "SeekToFirst\n");
873 iter->SeekToFirst();
874 model_iter = data.begin();
875 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
876 break;
877 }
878
879 case 2: {
880 std::string key = PickRandomKey(rnd, keys);
881 model_iter = data.lower_bound(key);
882 if (kVerbose) fprintf(stderr, "Seek '%s'\n",
883 EscapeString(key).c_str());
884 iter->Seek(Slice(key));
885 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
886 break;
887 }
888
889 case 3: {
890 if (iter->Valid()) {
891 if (kVerbose) fprintf(stderr, "Prev\n");
892 iter->Prev();
893 if (model_iter == data.begin()) {
894 model_iter = data.end(); // Wrap around to invalid value
895 } else {
896 --model_iter;
897 }
898 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
899 }
900 break;
901 }
902
903 case 4: {
904 if (kVerbose) fprintf(stderr, "SeekToLast\n");
905 iter->SeekToLast();
906 if (keys.empty()) {
907 model_iter = data.end();
908 } else {
909 std::string last = data.rbegin()->first;
910 model_iter = data.lower_bound(last);
911 }
912 ASSERT_EQ(ToString(data, model_iter), ToString(iter));
913 break;
914 }
915 }
916 }
917 if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
918 iter->~InternalIterator();
919 } else {
920 delete iter;
921 }
922 }
923
924 std::string ToString(const stl_wrappers::KVMap& data,
925 const stl_wrappers::KVMap::const_iterator& it) {
926 if (it == data.end()) {
927 return "END";
928 } else {
929 return "'" + it->first + "->" + it->second + "'";
930 }
931 }
932
933 std::string ToString(const stl_wrappers::KVMap& data,
934 const stl_wrappers::KVMap::const_reverse_iterator& it) {
935 if (it == data.rend()) {
936 return "END";
937 } else {
938 return "'" + it->first + "->" + it->second + "'";
939 }
940 }
941
942 std::string ToString(const InternalIterator* it) {
943 if (!it->Valid()) {
944 return "END";
945 } else {
946 return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
947 }
948 }
949
950 std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
951 if (keys.empty()) {
952 return "foo";
953 } else {
954 const int index = rnd->Uniform(static_cast<int>(keys.size()));
955 std::string result = keys[index];
956 switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
957 case 0:
958 // Return an existing key
959 break;
960 case 1: {
961 // Attempt to return something smaller than an existing key
962 if (result.size() > 0 && result[result.size() - 1] > '\0'
963 && (!only_support_prefix_seek_
964 || options_.prefix_extractor->Transform(result).size()
965 < result.size())) {
966 result[result.size() - 1]--;
967 }
968 break;
969 }
970 case 2: {
971 // Return something larger than an existing key
972 Increment(options_.comparator, &result);
973 break;
974 }
975 }
976 return result;
977 }
978 }
979
980 // Returns nullptr if not running against a DB
981 DB* db() const { return constructor_->db(); }
982
983 void RandomizedHarnessTest(size_t part, size_t total) {
984 std::vector<TestArgs> args = GenerateArgList();
985 assert(part);
986 assert(part <= total);
987 for (size_t i = 0; i < args.size(); i++) {
988 if ((i % total) + 1 != part) {
989 continue;
990 }
991 Init(args[i]);
992 Random rnd(test::RandomSeed() + 5);
993 for (int num_entries = 0; num_entries < 2000;
994 num_entries += (num_entries < 50 ? 1 : 200)) {
995 for (int e = 0; e < num_entries; e++) {
996 std::string v;
997 Add(test::RandomKey(&rnd, rnd.Skewed(4)),
998 test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
999 }
1000 Test(&rnd);
1001 }
1002 }
1003 }
1004
1005 private:
1006 Options options_ = Options();
1007 ImmutableCFOptions ioptions_;
1008 MutableCFOptions moptions_;
1009 BlockBasedTableOptions table_options_ = BlockBasedTableOptions();
1010 Constructor* constructor_;
1011 WriteBufferManager write_buffer_;
1012 bool support_prev_;
1013 bool only_support_prefix_seek_;
1014 std::shared_ptr<InternalKeyComparator> internal_comparator_;
1015 };
1016
1017 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1018 bool result = (val >= low) && (val <= high);
1019 if (!result) {
1020 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1021 (unsigned long long)(val),
1022 (unsigned long long)(low),
1023 (unsigned long long)(high));
1024 }
1025 return result;
1026 }
1027
1028 // Tests against all kinds of tables
1029 class TableTest : public testing::Test {
1030 public:
1031 const InternalKeyComparator& GetPlainInternalComparator(
1032 const Comparator* comp) {
1033 if (!plain_internal_comparator) {
1034 plain_internal_comparator.reset(
1035 new test::PlainInternalKeyComparator(comp));
1036 }
1037 return *plain_internal_comparator;
1038 }
1039 void IndexTest(BlockBasedTableOptions table_options);
1040
1041 private:
1042 std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
1043 };
1044
1045 class GeneralTableTest : public TableTest {};
1046 class BlockBasedTableTest
1047 : public TableTest,
1048 virtual public ::testing::WithParamInterface<uint32_t> {
1049 public:
1050 BlockBasedTableTest() : format_(GetParam()) {}
1051
1052 BlockBasedTableOptions GetBlockBasedTableOptions() {
1053 BlockBasedTableOptions options;
1054 options.format_version = format_;
1055 return options;
1056 }
1057
1058 protected:
1059 uint64_t IndexUncompressedHelper(bool indexCompress);
1060
1061 private:
1062 uint32_t format_;
1063 };
1064 class PlainTableTest : public TableTest {};
1065 class TablePropertyTest : public testing::Test {};
1066 class BBTTailPrefetchTest : public TableTest {};
1067
1068 INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
1069 testing::Values(test::kDefaultFormatVersion));
1070 INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
1071 testing::Values(test::kLatestFormatVersion));
1072
1073 // This test serves as the living tutorial for the prefix scan of user collected
1074 // properties.
1075 TEST_F(TablePropertyTest, PrefixScanTest) {
1076 UserCollectedProperties props{{"num.111.1", "1"},
1077 {"num.111.2", "2"},
1078 {"num.111.3", "3"},
1079 {"num.333.1", "1"},
1080 {"num.333.2", "2"},
1081 {"num.333.3", "3"},
1082 {"num.555.1", "1"},
1083 {"num.555.2", "2"},
1084 {"num.555.3", "3"}, };
1085
1086 // prefixes that exist
1087 for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
1088 int num = 0;
1089 for (auto pos = props.lower_bound(prefix);
1090 pos != props.end() &&
1091 pos->first.compare(0, prefix.size(), prefix) == 0;
1092 ++pos) {
1093 ++num;
1094 auto key = prefix + "." + ToString(num);
1095 ASSERT_EQ(key, pos->first);
1096 ASSERT_EQ(ToString(num), pos->second);
1097 }
1098 ASSERT_EQ(3, num);
1099 }
1100
1101 // prefixes that don't exist
1102 for (const std::string& prefix :
1103 {"num.000", "num.222", "num.444", "num.666"}) {
1104 auto pos = props.lower_bound(prefix);
1105 ASSERT_TRUE(pos == props.end() ||
1106 pos->first.compare(0, prefix.size(), prefix) != 0);
1107 }
1108 }
1109
1110 // This test include all the basic checks except those for index size and block
1111 // size, which will be conducted in separated unit tests.
1112 TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1113 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1114
1115 c.Add("a1", "val1");
1116 c.Add("b2", "val2");
1117 c.Add("c3", "val3");
1118 c.Add("d4", "val4");
1119 c.Add("e5", "val5");
1120 c.Add("f6", "val6");
1121 c.Add("g7", "val7");
1122 c.Add("h8", "val8");
1123 c.Add("j9", "val9");
1124 uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally
1125
1126 std::vector<std::string> keys;
1127 stl_wrappers::KVMap kvmap;
1128 Options options;
1129 options.compression = kNoCompression;
1130 options.statistics = CreateDBStatistics();
1131 options.statistics->set_stats_level(StatsLevel::kAll);
1132 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1133 table_options.block_restart_interval = 1;
1134 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1135
1136 ImmutableCFOptions ioptions(options);
1137 MutableCFOptions moptions(options);
1138 ioptions.statistics = options.statistics.get();
1139 c.Finish(options, ioptions, moptions, table_options,
1140 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1141 ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
1142
1143 auto& props = *c.GetTableReader()->GetTableProperties();
1144 ASSERT_EQ(kvmap.size(), props.num_entries);
1145
1146 auto raw_key_size = kvmap.size() * 2ul;
1147 auto raw_value_size = kvmap.size() * 4ul;
1148
1149 ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
1150 ASSERT_EQ(raw_value_size, props.raw_value_size);
1151 ASSERT_EQ(1ul, props.num_data_blocks);
1152 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1153
1154 // Verify data size.
1155 BlockBuilder block_builder(1);
1156 for (const auto& item : kvmap) {
1157 block_builder.Add(item.first, item.second);
1158 }
1159 Slice content = block_builder.Finish();
1160 ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
1161 props.data_size);
1162 c.ResetTableReader();
1163 }
1164
1165 #ifdef SNAPPY
1166 uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
1167 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1168 constexpr size_t kNumKeys = 10000;
1169
1170 for (size_t k = 0; k < kNumKeys; ++k) {
1171 c.Add("key" + ToString(k), "val" + ToString(k));
1172 }
1173
1174 std::vector<std::string> keys;
1175 stl_wrappers::KVMap kvmap;
1176 Options options;
1177 options.compression = kSnappyCompression;
1178 options.statistics = CreateDBStatistics();
1179 options.statistics->set_stats_level(StatsLevel::kAll);
1180 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1181 table_options.block_restart_interval = 1;
1182 table_options.enable_index_compression = compressed;
1183 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1184
1185 ImmutableCFOptions ioptions(options);
1186 MutableCFOptions moptions(options);
1187 ioptions.statistics = options.statistics.get();
1188 c.Finish(options, ioptions, moptions, table_options,
1189 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1190 c.ResetTableReader();
1191 return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
1192 }
1193 TEST_P(BlockBasedTableTest, IndexUncompressed) {
1194 uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
1195 uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
1196 // tbl1_compressed_cnt should include 1 index block
1197 EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
1198 }
1199 #endif // SNAPPY
1200
1201 TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1202 TableConstructor c(&reverse_key_comparator);
1203 std::vector<std::string> keys;
1204 stl_wrappers::KVMap kvmap;
1205
1206 {
1207 Options options;
1208 options.compression = CompressionType::kNoCompression;
1209 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1210 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1211
1212 const ImmutableCFOptions ioptions(options);
1213 const MutableCFOptions moptions(options);
1214 c.Finish(options, ioptions, moptions, table_options,
1215 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1216
1217 auto& props = *c.GetTableReader()->GetTableProperties();
1218
1219 // Default comparator
1220 ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
1221 // No merge operator
1222 ASSERT_EQ("nullptr", props.merge_operator_name);
1223 // No prefix extractor
1224 ASSERT_EQ("nullptr", props.prefix_extractor_name);
1225 // No property collectors
1226 ASSERT_EQ("[]", props.property_collectors_names);
1227 // No filter policy is used
1228 ASSERT_EQ("", props.filter_policy_name);
1229 // Compression type == that set:
1230 ASSERT_EQ("NoCompression", props.compression_name);
1231 c.ResetTableReader();
1232 }
1233
1234 {
1235 Options options;
1236 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1237 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1238 options.comparator = &reverse_key_comparator;
1239 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1240 options.prefix_extractor.reset(NewNoopTransform());
1241 options.table_properties_collector_factories.emplace_back(
1242 new DummyPropertiesCollectorFactory1());
1243 options.table_properties_collector_factories.emplace_back(
1244 new DummyPropertiesCollectorFactory2());
1245
1246 const ImmutableCFOptions ioptions(options);
1247 const MutableCFOptions moptions(options);
1248 c.Finish(options, ioptions, moptions, table_options,
1249 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1250
1251 auto& props = *c.GetTableReader()->GetTableProperties();
1252
1253 ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
1254 ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
1255 ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1256 ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]",
1257 props.property_collectors_names);
1258 ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
1259 c.ResetTableReader();
1260 }
1261 }
1262
1263 TEST_P(BlockBasedTableTest, RangeDelBlock) {
1264 TableConstructor c(BytewiseComparator());
1265 std::vector<std::string> keys = {"1pika", "2chu"};
1266 std::vector<std::string> vals = {"p", "c"};
1267
1268 std::vector<RangeTombstone> expected_tombstones = {
1269 {"1pika", "2chu", 0},
1270 {"2chu", "c", 1},
1271 {"2chu", "c", 0},
1272 {"c", "p", 0},
1273 };
1274
1275 for (int i = 0; i < 2; i++) {
1276 RangeTombstone t(keys[i], vals[i], i);
1277 std::pair<InternalKey, Slice> p = t.Serialize();
1278 c.Add(p.first.Encode().ToString(), p.second);
1279 }
1280
1281 std::vector<std::string> sorted_keys;
1282 stl_wrappers::KVMap kvmap;
1283 Options options;
1284 options.compression = kNoCompression;
1285 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1286 table_options.block_restart_interval = 1;
1287 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1288
1289 const ImmutableCFOptions ioptions(options);
1290 const MutableCFOptions moptions(options);
1291 std::unique_ptr<InternalKeyComparator> internal_cmp(
1292 new InternalKeyComparator(options.comparator));
1293 c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
1294 &sorted_keys, &kvmap);
1295
1296 for (int j = 0; j < 2; ++j) {
1297 std::unique_ptr<InternalIterator> iter(
1298 c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
1299 if (j > 0) {
1300 // For second iteration, delete the table reader object and verify the
1301 // iterator can still access its metablock's range tombstones.
1302 c.ResetTableReader();
1303 }
1304 ASSERT_FALSE(iter->Valid());
1305 iter->SeekToFirst();
1306 ASSERT_TRUE(iter->Valid());
1307 for (size_t i = 0; i < expected_tombstones.size(); i++) {
1308 ASSERT_TRUE(iter->Valid());
1309 ParsedInternalKey parsed_key;
1310 ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key));
1311 RangeTombstone t(parsed_key, iter->value());
1312 const auto& expected_t = expected_tombstones[i];
1313 ASSERT_EQ(t.start_key_, expected_t.start_key_);
1314 ASSERT_EQ(t.end_key_, expected_t.end_key_);
1315 ASSERT_EQ(t.seq_, expected_t.seq_);
1316 iter->Next();
1317 }
1318 ASSERT_TRUE(!iter->Valid());
1319 }
1320 }
1321
1322 TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1323 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1324 c.Add("a1", "val1");
1325 std::vector<std::string> keys;
1326 stl_wrappers::KVMap kvmap;
1327 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1328 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1329 Options options;
1330 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1331
1332 const ImmutableCFOptions ioptions(options);
1333 const MutableCFOptions moptions(options);
1334 c.Finish(options, ioptions, moptions, table_options,
1335 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1336 auto& props = *c.GetTableReader()->GetTableProperties();
1337 ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1338 c.ResetTableReader();
1339 }
1340
1341 //
1342 // BlockBasedTableTest::PrefetchTest
1343 //
1344 void AssertKeysInCache(BlockBasedTable* table_reader,
1345 const std::vector<std::string>& keys_in_cache,
1346 const std::vector<std::string>& keys_not_in_cache,
1347 bool convert = false) {
1348 if (convert) {
1349 for (auto key : keys_in_cache) {
1350 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1351 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1352 }
1353 for (auto key : keys_not_in_cache) {
1354 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1355 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1356 }
1357 } else {
1358 for (auto key : keys_in_cache) {
1359 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
1360 }
1361 for (auto key : keys_not_in_cache) {
1362 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
1363 }
1364 }
1365 }
1366
1367 void PrefetchRange(TableConstructor* c, Options* opt,
1368 BlockBasedTableOptions* table_options, const char* key_begin,
1369 const char* key_end,
1370 const std::vector<std::string>& keys_in_cache,
1371 const std::vector<std::string>& keys_not_in_cache,
1372 const Status expected_status = Status::OK()) {
1373 // reset the cache and reopen the table
1374 table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1375 opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
1376 const ImmutableCFOptions ioptions2(*opt);
1377 const MutableCFOptions moptions(*opt);
1378 ASSERT_OK(c->Reopen(ioptions2, moptions));
1379
1380 // prefetch
1381 auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1382 Status s;
1383 std::unique_ptr<Slice> begin, end;
1384 std::unique_ptr<InternalKey> i_begin, i_end;
1385 if (key_begin != nullptr) {
1386 if (c->ConvertToInternalKey()) {
1387 i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
1388 begin.reset(new Slice(i_begin->Encode()));
1389 } else {
1390 begin.reset(new Slice(key_begin));
1391 }
1392 }
1393 if (key_end != nullptr) {
1394 if (c->ConvertToInternalKey()) {
1395 i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
1396 end.reset(new Slice(i_end->Encode()));
1397 } else {
1398 end.reset(new Slice(key_end));
1399 }
1400 }
1401 s = table_reader->Prefetch(begin.get(), end.get());
1402
1403 ASSERT_TRUE(s.code() == expected_status.code());
1404
1405 // assert our expectation in cache warmup
1406 AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
1407 c->ConvertToInternalKey());
1408 c->ResetTableReader();
1409 }
1410
1411 TEST_P(BlockBasedTableTest, PrefetchTest) {
1412 // The purpose of this test is to test the prefetching operation built into
1413 // BlockBasedTable.
1414 Options opt;
1415 std::unique_ptr<InternalKeyComparator> ikc;
1416 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
1417 opt.compression = kNoCompression;
1418 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1419 table_options.block_size = 1024;
1420 // big enough so we don't ever lose cached values.
1421 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1422 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
1423
1424 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1425 c.Add("k01", "hello");
1426 c.Add("k02", "hello2");
1427 c.Add("k03", std::string(10000, 'x'));
1428 c.Add("k04", std::string(200000, 'x'));
1429 c.Add("k05", std::string(300000, 'x'));
1430 c.Add("k06", "hello3");
1431 c.Add("k07", std::string(100000, 'x'));
1432 std::vector<std::string> keys;
1433 stl_wrappers::KVMap kvmap;
1434 const ImmutableCFOptions ioptions(opt);
1435 const MutableCFOptions moptions(opt);
1436 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1437 c.ResetTableReader();
1438
1439 // We get the following data spread :
1440 //
1441 // Data block Index
1442 // ========================
1443 // [ k01 k02 k03 ] k03
1444 // [ k04 ] k04
1445 // [ k05 ] k05
1446 // [ k06 k07 ] k07
1447
1448
1449 // Simple
1450 PrefetchRange(&c, &opt, &table_options,
1451 /*key_range=*/"k01", "k05",
1452 /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
1453 /*keys_not_in_cache=*/{"k06", "k07"});
1454 PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1455 {"k04", "k05", "k06", "k07"});
1456 // odd
1457 PrefetchRange(&c, &opt, &table_options, "a", "z",
1458 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1459 PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1460 {"k04", "k05", "k06", "k07"});
1461 // Edge cases
1462 PrefetchRange(&c, &opt, &table_options, "k00", "k06",
1463 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1464 PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
1465 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1466 // null keys
1467 PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
1468 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1469 PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
1470 {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
1471 PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
1472 {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1473 // invalid
1474 PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1475 Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1476 c.ResetTableReader();
1477 }
1478
1479 TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
1480 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1481 for (int i = 0; i < 4; ++i) {
1482 Options options;
1483 // Make each key/value an individual block
1484 table_options.block_size = 64;
1485 switch (i) {
1486 case 0:
1487 // Binary search index
1488 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1489 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1490 break;
1491 case 1:
1492 // Hash search index
1493 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1494 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1495 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1496 break;
1497 case 2:
1498 // Hash search index with hash_index_allow_collision
1499 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1500 table_options.hash_index_allow_collision = true;
1501 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1502 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1503 break;
1504 case 3:
1505 // Hash search index with filter policy
1506 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1507 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1508 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1509 options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1510 break;
1511 case 4:
1512 default:
1513 // Binary search index
1514 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1515 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1516 break;
1517 }
1518
1519 TableConstructor c(BytewiseComparator(),
1520 true /* convert_to_internal_key_ */);
1521 c.Add("aaaa1", std::string('a', 56));
1522 c.Add("bbaa1", std::string('a', 56));
1523 c.Add("cccc1", std::string('a', 56));
1524 c.Add("bbbb1", std::string('a', 56));
1525 c.Add("baaa1", std::string('a', 56));
1526 c.Add("abbb1", std::string('a', 56));
1527 c.Add("cccc2", std::string('a', 56));
1528 std::vector<std::string> keys;
1529 stl_wrappers::KVMap kvmap;
1530 const ImmutableCFOptions ioptions(options);
1531 const MutableCFOptions moptions(options);
1532 c.Finish(options, ioptions, moptions, table_options,
1533 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1534 auto props = c.GetTableReader()->GetTableProperties();
1535 ASSERT_EQ(7u, props->num_data_blocks);
1536 auto* reader = c.GetTableReader();
1537 ReadOptions ro;
1538 ro.total_order_seek = true;
1539 std::unique_ptr<InternalIterator> iter(
1540 reader->NewIterator(ro, moptions.prefix_extractor.get()));
1541
1542 iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
1543 ASSERT_OK(iter->status());
1544 ASSERT_TRUE(iter->Valid());
1545 ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
1546 iter->Next();
1547 ASSERT_OK(iter->status());
1548 ASSERT_TRUE(iter->Valid());
1549 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1550
1551 iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
1552 ASSERT_OK(iter->status());
1553 ASSERT_TRUE(iter->Valid());
1554 ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1555 iter->Next();
1556 ASSERT_OK(iter->status());
1557 ASSERT_TRUE(iter->Valid());
1558 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1559
1560 iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
1561 ASSERT_OK(iter->status());
1562 ASSERT_TRUE(iter->Valid());
1563 ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1564 iter->Next();
1565 ASSERT_OK(iter->status());
1566 ASSERT_TRUE(iter->Valid());
1567 ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
1568 }
1569 }
1570
1571 TEST_P(BlockBasedTableTest, NoopTransformSeek) {
1572 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1573 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1574
1575 Options options;
1576 options.comparator = BytewiseComparator();
1577 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1578 options.prefix_extractor.reset(NewNoopTransform());
1579
1580 TableConstructor c(options.comparator);
1581 // To tickle the PrefixMayMatch bug it is important that the
1582 // user-key is a single byte so that the index key exactly matches
1583 // the user-key.
1584 InternalKey key("a", 1, kTypeValue);
1585 c.Add(key.Encode().ToString(), "b");
1586 std::vector<std::string> keys;
1587 stl_wrappers::KVMap kvmap;
1588 const ImmutableCFOptions ioptions(options);
1589 const MutableCFOptions moptions(options);
1590 const InternalKeyComparator internal_comparator(options.comparator);
1591 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1592 &keys, &kvmap);
1593
1594 auto* reader = c.GetTableReader();
1595 for (int i = 0; i < 2; ++i) {
1596 ReadOptions ro;
1597 ro.total_order_seek = (i == 0);
1598 std::unique_ptr<InternalIterator> iter(
1599 reader->NewIterator(ro, moptions.prefix_extractor.get()));
1600
1601 iter->Seek(key.Encode());
1602 ASSERT_OK(iter->status());
1603 ASSERT_TRUE(iter->Valid());
1604 ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
1605 }
1606 }
1607
1608 TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
1609 // if DB is opened with a prefix extractor of a different name,
1610 // prefix bloom is skipped when read the file
1611 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1612 table_options.filter_policy.reset(NewBloomFilterPolicy(2));
1613 table_options.whole_key_filtering = false;
1614
1615 Options options;
1616 options.comparator = BytewiseComparator();
1617 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1618 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1619
1620 TableConstructor c(options.comparator);
1621 InternalKey key("abcdefghijk", 1, kTypeValue);
1622 c.Add(key.Encode().ToString(), "test");
1623 std::vector<std::string> keys;
1624 stl_wrappers::KVMap kvmap;
1625 const ImmutableCFOptions ioptions(options);
1626 const MutableCFOptions moptions(options);
1627 const InternalKeyComparator internal_comparator(options.comparator);
1628 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1629 &keys, &kvmap);
1630 // TODO(Zhongyi): update test to use MutableCFOptions
1631 options.prefix_extractor.reset(NewFixedPrefixTransform(9));
1632 const ImmutableCFOptions new_ioptions(options);
1633 const MutableCFOptions new_moptions(options);
1634 c.Reopen(new_ioptions, new_moptions);
1635 auto reader = c.GetTableReader();
1636 std::unique_ptr<InternalIterator> db_iter(
1637 reader->NewIterator(ReadOptions(), new_moptions.prefix_extractor.get()));
1638
1639 // Test point lookup
1640 // only one kv
1641 for (auto& kv : kvmap) {
1642 db_iter->Seek(kv.first);
1643 ASSERT_TRUE(db_iter->Valid());
1644 ASSERT_OK(db_iter->status());
1645 ASSERT_EQ(db_iter->key(), kv.first);
1646 ASSERT_EQ(db_iter->value(), kv.second);
1647 }
1648 }
1649
1650 static std::string RandomString(Random* rnd, int len) {
1651 std::string r;
1652 test::RandomString(rnd, len, &r);
1653 return r;
1654 }
1655
1656 void AddInternalKey(TableConstructor* c, const std::string& prefix,
1657 int /*suffix_len*/ = 800) {
1658 static Random rnd(1023);
1659 InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
1660 c->Add(k.Encode().ToString(), "v");
1661 }
1662
1663 void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1664 TableConstructor c(BytewiseComparator());
1665
1666 // keys with prefix length 3, make sure the key/value is big enough to fill
1667 // one block
1668 AddInternalKey(&c, "0015");
1669 AddInternalKey(&c, "0035");
1670
1671 AddInternalKey(&c, "0054");
1672 AddInternalKey(&c, "0055");
1673
1674 AddInternalKey(&c, "0056");
1675 AddInternalKey(&c, "0057");
1676
1677 AddInternalKey(&c, "0058");
1678 AddInternalKey(&c, "0075");
1679
1680 AddInternalKey(&c, "0076");
1681 AddInternalKey(&c, "0095");
1682
1683 std::vector<std::string> keys;
1684 stl_wrappers::KVMap kvmap;
1685 Options options;
1686 options.prefix_extractor.reset(NewFixedPrefixTransform(3));
1687 table_options.block_size = 1700;
1688 table_options.block_cache = NewLRUCache(1024, 4);
1689 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1690
1691 std::unique_ptr<InternalKeyComparator> comparator(
1692 new InternalKeyComparator(BytewiseComparator()));
1693 const ImmutableCFOptions ioptions(options);
1694 const MutableCFOptions moptions(options);
1695 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
1696 &kvmap);
1697 auto reader = c.GetTableReader();
1698
1699 auto props = reader->GetTableProperties();
1700 ASSERT_EQ(5u, props->num_data_blocks);
1701
1702 // TODO(Zhongyi): update test to use MutableCFOptions
1703 std::unique_ptr<InternalIterator> index_iter(
1704 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
1705
1706 // -- Find keys do not exist, but have common prefix.
1707 std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1708 std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
1709 keys[7], keys[9], };
1710
1711 // find the lower bound of the prefix
1712 for (size_t i = 0; i < prefixes.size(); ++i) {
1713 index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
1714 ASSERT_OK(index_iter->status());
1715 ASSERT_TRUE(index_iter->Valid());
1716
1717 // seek the first element in the block
1718 ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
1719 ASSERT_EQ("v", index_iter->value().ToString());
1720 }
1721
1722 // find the upper bound of prefixes
1723 std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
1724
1725 // find existing keys
1726 for (const auto& item : kvmap) {
1727 auto ukey = ExtractUserKey(item.first).ToString();
1728 index_iter->Seek(ukey);
1729
1730 // ASSERT_OK(regular_iter->status());
1731 ASSERT_OK(index_iter->status());
1732
1733 // ASSERT_TRUE(regular_iter->Valid());
1734 ASSERT_TRUE(index_iter->Valid());
1735
1736 ASSERT_EQ(item.first, index_iter->key().ToString());
1737 ASSERT_EQ(item.second, index_iter->value().ToString());
1738 }
1739
1740 for (size_t i = 0; i < prefixes.size(); ++i) {
1741 // the key is greater than any existing keys.
1742 auto key = prefixes[i] + "9";
1743 index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
1744
1745 ASSERT_OK(index_iter->status());
1746 if (i == prefixes.size() - 1) {
1747 // last key
1748 ASSERT_TRUE(!index_iter->Valid());
1749 } else {
1750 ASSERT_TRUE(index_iter->Valid());
1751 // seek the first element in the block
1752 ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
1753 ASSERT_EQ("v", index_iter->value().ToString());
1754 }
1755 }
1756
1757 // find keys with prefix that don't match any of the existing prefixes.
1758 std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
1759 for (const auto& prefix : non_exist_prefixes) {
1760 index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
1761 // regular_iter->Seek(prefix);
1762
1763 ASSERT_OK(index_iter->status());
1764 // Seek to non-existing prefixes should yield either invalid, or a
1765 // key with prefix greater than the target.
1766 if (index_iter->Valid()) {
1767 Slice ukey = ExtractUserKey(index_iter->key());
1768 Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
1769 ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
1770 }
1771 }
1772 c.ResetTableReader();
1773 }
1774
1775 TEST_P(BlockBasedTableTest, BinaryIndexTest) {
1776 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1777 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1778 IndexTest(table_options);
1779 }
1780
1781 TEST_P(BlockBasedTableTest, HashIndexTest) {
1782 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1783 table_options.index_type = BlockBasedTableOptions::kHashSearch;
1784 IndexTest(table_options);
1785 }
1786
1787 TEST_P(BlockBasedTableTest, PartitionIndexTest) {
1788 const int max_index_keys = 5;
1789 const int est_max_index_key_value_size = 32;
1790 const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
1791 for (int i = 1; i <= est_max_index_size + 1; i++) {
1792 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1793 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1794 table_options.metadata_block_size = i;
1795 IndexTest(table_options);
1796 }
1797 }
1798
1799 // It's very hard to figure out the index block size of a block accurately.
1800 // To make sure we get the index size, we just make sure as key number
1801 // grows, the filter block size also grows.
1802 TEST_P(BlockBasedTableTest, IndexSizeStat) {
1803 uint64_t last_index_size = 0;
1804
1805 // we need to use random keys since the pure human readable texts
1806 // may be well compressed, resulting insignifcant change of index
1807 // block size.
1808 Random rnd(test::RandomSeed());
1809 std::vector<std::string> keys;
1810
1811 for (int i = 0; i < 100; ++i) {
1812 keys.push_back(RandomString(&rnd, 10000));
1813 }
1814
1815 // Each time we load one more key to the table. the table index block
1816 // size is expected to be larger than last time's.
1817 for (size_t i = 1; i < keys.size(); ++i) {
1818 TableConstructor c(BytewiseComparator(),
1819 true /* convert_to_internal_key_ */);
1820 for (size_t j = 0; j < i; ++j) {
1821 c.Add(keys[j], "val");
1822 }
1823
1824 std::vector<std::string> ks;
1825 stl_wrappers::KVMap kvmap;
1826 Options options;
1827 options.compression = kNoCompression;
1828 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1829 table_options.block_restart_interval = 1;
1830 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1831
1832 const ImmutableCFOptions ioptions(options);
1833 const MutableCFOptions moptions(options);
1834 c.Finish(options, ioptions, moptions, table_options,
1835 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
1836 auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
1837 ASSERT_GT(index_size, last_index_size);
1838 last_index_size = index_size;
1839 c.ResetTableReader();
1840 }
1841 }
1842
1843 TEST_P(BlockBasedTableTest, NumBlockStat) {
1844 Random rnd(test::RandomSeed());
1845 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1846 Options options;
1847 options.compression = kNoCompression;
1848 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1849 table_options.block_restart_interval = 1;
1850 table_options.block_size = 1000;
1851 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1852
1853 for (int i = 0; i < 10; ++i) {
1854 // the key/val are slightly smaller than block size, so that each block
1855 // holds roughly one key/value pair.
1856 c.Add(RandomString(&rnd, 900), "val");
1857 }
1858
1859 std::vector<std::string> ks;
1860 stl_wrappers::KVMap kvmap;
1861 const ImmutableCFOptions ioptions(options);
1862 const MutableCFOptions moptions(options);
1863 c.Finish(options, ioptions, moptions, table_options,
1864 GetPlainInternalComparator(options.comparator), &ks, &kvmap);
1865 ASSERT_EQ(kvmap.size(),
1866 c.GetTableReader()->GetTableProperties()->num_data_blocks);
1867 c.ResetTableReader();
1868 }
1869
1870 // A simple tool that takes the snapshot of block cache statistics.
1871 class BlockCachePropertiesSnapshot {
1872 public:
1873 explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
1874 block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
1875 block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
1876 index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
1877 index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
1878 data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
1879 data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
1880 filter_block_cache_miss =
1881 statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
1882 filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
1883 block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
1884 block_cache_bytes_write =
1885 statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
1886 }
1887
1888 void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
1889 int64_t expected_index_block_cache_hit) {
1890 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
1891 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
1892 }
1893
1894 void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
1895 int64_t expected_filter_block_cache_hit) {
1896 ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
1897 ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
1898 }
1899
1900 // Check if the fetched props matches the expected ones.
1901 // TODO(kailiu) Use this only when you disabled filter policy!
1902 void AssertEqual(int64_t expected_index_block_cache_miss,
1903 int64_t expected_index_block_cache_hit,
1904 int64_t expected_data_block_cache_miss,
1905 int64_t expected_data_block_cache_hit) const {
1906 ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
1907 ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
1908 ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
1909 ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
1910 ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
1911 block_cache_miss);
1912 ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
1913 block_cache_hit);
1914 }
1915
1916 int64_t GetCacheBytesRead() { return block_cache_bytes_read; }
1917
1918 int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }
1919
1920 private:
1921 int64_t block_cache_miss = 0;
1922 int64_t block_cache_hit = 0;
1923 int64_t index_block_cache_miss = 0;
1924 int64_t index_block_cache_hit = 0;
1925 int64_t data_block_cache_miss = 0;
1926 int64_t data_block_cache_hit = 0;
1927 int64_t filter_block_cache_miss = 0;
1928 int64_t filter_block_cache_hit = 0;
1929 int64_t block_cache_bytes_read = 0;
1930 int64_t block_cache_bytes_write = 0;
1931 };
1932
1933 // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
1934 // use block cache to store them).
1935 TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
1936 Options options;
1937 options.create_if_missing = true;
1938 options.statistics = CreateDBStatistics();
1939 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1940 table_options.block_cache = NewLRUCache(1024, 4);
1941 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1942 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1943 std::vector<std::string> keys;
1944 stl_wrappers::KVMap kvmap;
1945
1946 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1947 c.Add("key", "value");
1948 const ImmutableCFOptions ioptions(options);
1949 const MutableCFOptions moptions(options);
1950 c.Finish(options, ioptions, moptions, table_options,
1951 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1952
1953 // preloading filter/index blocks is enabled.
1954 auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
1955 ASSERT_TRUE(reader->TEST_filter_block_preloaded());
1956 ASSERT_TRUE(reader->TEST_index_reader_preloaded());
1957
1958 {
1959 // nothing happens in the beginning
1960 BlockCachePropertiesSnapshot props(options.statistics.get());
1961 props.AssertIndexBlockStat(0, 0);
1962 props.AssertFilterBlockStat(0, 0);
1963 }
1964
1965 {
1966 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
1967 GetContext::kNotFound, Slice(), nullptr, nullptr,
1968 nullptr, nullptr, nullptr);
1969 // a hack that just to trigger BlockBasedTable::GetFilter.
1970 reader->Get(ReadOptions(), "non-exist-key", &get_context,
1971 moptions.prefix_extractor.get());
1972 BlockCachePropertiesSnapshot props(options.statistics.get());
1973 props.AssertIndexBlockStat(0, 0);
1974 props.AssertFilterBlockStat(0, 0);
1975 }
1976 }
1977
1978 // Due to the difficulities of the intersaction between statistics, this test
1979 // only tests the case when "index block is put to block cache"
1980 TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
1981 // -- Table construction
1982 Options options;
1983 options.create_if_missing = true;
1984 options.statistics = CreateDBStatistics();
1985
1986 // Enable the cache for index/filter blocks
1987 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1988 table_options.block_cache = NewLRUCache(2048, 2);
1989 table_options.cache_index_and_filter_blocks = true;
1990 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1991 std::vector<std::string> keys;
1992 stl_wrappers::KVMap kvmap;
1993
1994 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1995 c.Add("key", "value");
1996 const ImmutableCFOptions ioptions(options);
1997 const MutableCFOptions moptions(options);
1998 c.Finish(options, ioptions, moptions, table_options,
1999 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2000 // preloading filter/index blocks is prohibited.
2001 auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2002 ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
2003 ASSERT_TRUE(!reader->TEST_index_reader_preloaded());
2004
2005 // -- PART 1: Open with regular block cache.
2006 // Since block_cache is disabled, no cache activities will be involved.
2007 std::unique_ptr<InternalIterator> iter;
2008
2009 int64_t last_cache_bytes_read = 0;
2010 // At first, no block will be accessed.
2011 {
2012 BlockCachePropertiesSnapshot props(options.statistics.get());
2013 // index will be added to block cache.
2014 props.AssertEqual(1, // index block miss
2015 0, 0, 0);
2016 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2017 ASSERT_EQ(props.GetCacheBytesWrite(),
2018 table_options.block_cache->GetUsage());
2019 last_cache_bytes_read = props.GetCacheBytesRead();
2020 }
2021
2022 // Only index block will be accessed
2023 {
2024 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2025 BlockCachePropertiesSnapshot props(options.statistics.get());
2026 // NOTE: to help better highlight the "detla" of each ticker, I use
2027 // <last_value> + <added_value> to indicate the increment of changed
2028 // value; other numbers remain the same.
2029 props.AssertEqual(1, 0 + 1, // index block hit
2030 0, 0);
2031 // Cache hit, bytes read from cache should increase
2032 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2033 ASSERT_EQ(props.GetCacheBytesWrite(),
2034 table_options.block_cache->GetUsage());
2035 last_cache_bytes_read = props.GetCacheBytesRead();
2036 }
2037
2038 // Only data block will be accessed
2039 {
2040 iter->SeekToFirst();
2041 BlockCachePropertiesSnapshot props(options.statistics.get());
2042 props.AssertEqual(1, 1, 0 + 1, // data block miss
2043 0);
2044 // Cache miss, Bytes read from cache should not change
2045 ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
2046 ASSERT_EQ(props.GetCacheBytesWrite(),
2047 table_options.block_cache->GetUsage());
2048 last_cache_bytes_read = props.GetCacheBytesRead();
2049 }
2050
2051 // Data block will be in cache
2052 {
2053 iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2054 iter->SeekToFirst();
2055 BlockCachePropertiesSnapshot props(options.statistics.get());
2056 props.AssertEqual(1, 1 + 1, /* index block hit */
2057 1, 0 + 1 /* data block hit */);
2058 // Cache hit, bytes read from cache should increase
2059 ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2060 ASSERT_EQ(props.GetCacheBytesWrite(),
2061 table_options.block_cache->GetUsage());
2062 }
2063 // release the iterator so that the block cache can reset correctly.
2064 iter.reset();
2065
2066 c.ResetTableReader();
2067
2068 // -- PART 2: Open with very small block cache
2069 // In this test, no block will ever get hit since the block cache is
2070 // too small to fit even one entry.
2071 table_options.block_cache = NewLRUCache(1, 4);
2072 options.statistics = CreateDBStatistics();
2073 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2074 const ImmutableCFOptions ioptions2(options);
2075 const MutableCFOptions moptions2(options);
2076 c.Reopen(ioptions2, moptions2);
2077 {
2078 BlockCachePropertiesSnapshot props(options.statistics.get());
2079 props.AssertEqual(1, // index block miss
2080 0, 0, 0);
2081 // Cache miss, Bytes read from cache should not change
2082 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2083 }
2084
2085 {
2086 // Both index and data block get accessed.
2087 // It first cache index block then data block. But since the cache size
2088 // is only 1, index block will be purged after data block is inserted.
2089 iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2090 BlockCachePropertiesSnapshot props(options.statistics.get());
2091 props.AssertEqual(1 + 1, // index block miss
2092 0, 0, // data block miss
2093 0);
2094 // Cache hit, bytes read from cache should increase
2095 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2096 }
2097
2098 {
2099 // SeekToFirst() accesses data block. With similar reason, we expect data
2100 // block's cache miss.
2101 iter->SeekToFirst();
2102 BlockCachePropertiesSnapshot props(options.statistics.get());
2103 props.AssertEqual(2, 0, 0 + 1, // data block miss
2104 0);
2105 // Cache miss, Bytes read from cache should not change
2106 ASSERT_EQ(props.GetCacheBytesRead(), 0);
2107 }
2108 iter.reset();
2109 c.ResetTableReader();
2110
2111 // -- PART 3: Open table with bloom filter enabled but not in SST file
2112 table_options.block_cache = NewLRUCache(4096, 4);
2113 table_options.cache_index_and_filter_blocks = false;
2114 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2115
2116 TableConstructor c3(BytewiseComparator());
2117 std::string user_key = "k01";
2118 InternalKey internal_key(user_key, 0, kTypeValue);
2119 c3.Add(internal_key.Encode().ToString(), "hello");
2120 ImmutableCFOptions ioptions3(options);
2121 MutableCFOptions moptions3(options);
2122 // Generate table without filter policy
2123 c3.Finish(options, ioptions3, moptions3, table_options,
2124 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2125 c3.ResetTableReader();
2126
2127 // Open table with filter policy
2128 table_options.filter_policy.reset(NewBloomFilterPolicy(1));
2129 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2130 options.statistics = CreateDBStatistics();
2131 ImmutableCFOptions ioptions4(options);
2132 MutableCFOptions moptions4(options);
2133 ASSERT_OK(c3.Reopen(ioptions4, moptions4));
2134 reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
2135 ASSERT_TRUE(!reader->TEST_filter_block_preloaded());
2136 PinnableSlice value;
2137 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2138 GetContext::kNotFound, user_key, &value, nullptr,
2139 nullptr, nullptr, nullptr);
2140 ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
2141 moptions4.prefix_extractor.get()));
2142 ASSERT_STREQ(value.data(), "hello");
2143 BlockCachePropertiesSnapshot props(options.statistics.get());
2144 props.AssertFilterBlockStat(0, 0);
2145 c3.ResetTableReader();
2146 }
2147
2148 void ValidateBlockSizeDeviation(int value, int expected) {
2149 BlockBasedTableOptions table_options;
2150 table_options.block_size_deviation = value;
2151 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2152
2153 const BlockBasedTableOptions* normalized_table_options =
2154 (const BlockBasedTableOptions*)factory->GetOptions();
2155 ASSERT_EQ(normalized_table_options->block_size_deviation, expected);
2156
2157 delete factory;
2158 }
2159
2160 void ValidateBlockRestartInterval(int value, int expected) {
2161 BlockBasedTableOptions table_options;
2162 table_options.block_restart_interval = value;
2163 BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2164
2165 const BlockBasedTableOptions* normalized_table_options =
2166 (const BlockBasedTableOptions*)factory->GetOptions();
2167 ASSERT_EQ(normalized_table_options->block_restart_interval, expected);
2168
2169 delete factory;
2170 }
2171
2172 TEST_P(BlockBasedTableTest, InvalidOptions) {
2173 // invalid values for block_size_deviation (<0 or >100) are silently set to 0
2174 ValidateBlockSizeDeviation(-10, 0);
2175 ValidateBlockSizeDeviation(-1, 0);
2176 ValidateBlockSizeDeviation(0, 0);
2177 ValidateBlockSizeDeviation(1, 1);
2178 ValidateBlockSizeDeviation(99, 99);
2179 ValidateBlockSizeDeviation(100, 100);
2180 ValidateBlockSizeDeviation(101, 0);
2181 ValidateBlockSizeDeviation(1000, 0);
2182
2183 // invalid values for block_restart_interval (<1) are silently set to 1
2184 ValidateBlockRestartInterval(-10, 1);
2185 ValidateBlockRestartInterval(-1, 1);
2186 ValidateBlockRestartInterval(0, 1);
2187 ValidateBlockRestartInterval(1, 1);
2188 ValidateBlockRestartInterval(2, 2);
2189 ValidateBlockRestartInterval(1000, 1000);
2190 }
2191
2192 TEST_P(BlockBasedTableTest, BlockReadCountTest) {
2193 // bloom_filter_type = 0 -- block-based filter
2194 // bloom_filter_type = 0 -- full filter
2195 for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
2196 for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
2197 ++index_and_filter_in_cache) {
2198 Options options;
2199 options.create_if_missing = true;
2200
2201 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2202 table_options.block_cache = NewLRUCache(1, 0);
2203 table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
2204 table_options.filter_policy.reset(
2205 NewBloomFilterPolicy(10, bloom_filter_type == 0));
2206 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2207 std::vector<std::string> keys;
2208 stl_wrappers::KVMap kvmap;
2209
2210 TableConstructor c(BytewiseComparator());
2211 std::string user_key = "k04";
2212 InternalKey internal_key(user_key, 0, kTypeValue);
2213 std::string encoded_key = internal_key.Encode().ToString();
2214 c.Add(encoded_key, "hello");
2215 ImmutableCFOptions ioptions(options);
2216 MutableCFOptions moptions(options);
2217 // Generate table with filter policy
2218 c.Finish(options, ioptions, moptions, table_options,
2219 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2220 auto reader = c.GetTableReader();
2221 PinnableSlice value;
2222 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2223 GetContext::kNotFound, user_key, &value, nullptr,
2224 nullptr, nullptr, nullptr);
2225 get_perf_context()->Reset();
2226 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
2227 moptions.prefix_extractor.get()));
2228 if (index_and_filter_in_cache) {
2229 // data, index and filter block
2230 ASSERT_EQ(get_perf_context()->block_read_count, 3);
2231 } else {
2232 // just the data block
2233 ASSERT_EQ(get_perf_context()->block_read_count, 1);
2234 }
2235 ASSERT_EQ(get_context.State(), GetContext::kFound);
2236 ASSERT_STREQ(value.data(), "hello");
2237
2238 // Get non-existing key
2239 user_key = "does-not-exist";
2240 internal_key = InternalKey(user_key, 0, kTypeValue);
2241 encoded_key = internal_key.Encode().ToString();
2242
2243 value.Reset();
2244 get_context = GetContext(options.comparator, nullptr, nullptr, nullptr,
2245 GetContext::kNotFound, user_key, &value, nullptr,
2246 nullptr, nullptr, nullptr);
2247 get_perf_context()->Reset();
2248 ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
2249 moptions.prefix_extractor.get()));
2250 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
2251
2252 if (index_and_filter_in_cache) {
2253 if (bloom_filter_type == 0) {
2254 // with block-based, we read index and then the filter
2255 ASSERT_EQ(get_perf_context()->block_read_count, 2);
2256 } else {
2257 // with full-filter, we read filter first and then we stop
2258 ASSERT_EQ(get_perf_context()->block_read_count, 1);
2259 }
2260 } else {
2261 // filter is already in memory and it figures out that the key doesn't
2262 // exist
2263 ASSERT_EQ(get_perf_context()->block_read_count, 0);
2264 }
2265 }
2266 }
2267 }
2268
2269 // A wrapper around LRICache that also keeps track of data blocks (in contrast
2270 // with the objects) in the cache. The class is very simple and can be used only
2271 // for trivial tests.
2272 class MockCache : public LRUCache {
2273 public:
2274 MockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
2275 double high_pri_pool_ratio)
2276 : LRUCache(capacity, num_shard_bits, strict_capacity_limit,
2277 high_pri_pool_ratio) {}
2278 Status Insert(const Slice& key, void* value, size_t charge,
2279 void (*deleter)(const Slice& key, void* value),
2280 Handle** handle = nullptr,
2281 Priority priority = Priority::LOW) override {
2282 // Replace the deleter with our own so that we keep track of data blocks
2283 // erased from the cache
2284 deleters_[key.ToString()] = deleter;
2285 return ShardedCache::Insert(key, value, charge, &MockDeleter, handle,
2286 priority);
2287 }
2288 // This is called by the application right after inserting a data block
2289 void TEST_mark_as_data_block(const Slice& key, size_t charge) override {
2290 marked_data_in_cache_[key.ToString()] = charge;
2291 marked_size_ += charge;
2292 }
2293 using DeleterFunc = void (*)(const Slice& key, void* value);
2294 static std::map<std::string, DeleterFunc> deleters_;
2295 static std::map<std::string, size_t> marked_data_in_cache_;
2296 static size_t marked_size_;
2297 static void MockDeleter(const Slice& key, void* value) {
2298 // If the item was marked for being data block, decrease its usage from the
2299 // total data block usage of the cache
2300 if (marked_data_in_cache_.find(key.ToString()) !=
2301 marked_data_in_cache_.end()) {
2302 marked_size_ -= marked_data_in_cache_[key.ToString()];
2303 }
2304 // Then call the origianl deleter
2305 assert(deleters_.find(key.ToString()) != deleters_.end());
2306 auto deleter = deleters_[key.ToString()];
2307 deleter(key, value);
2308 }
2309 };
2310
2311 size_t MockCache::marked_size_ = 0;
2312 std::map<std::string, MockCache::DeleterFunc> MockCache::deleters_;
2313 std::map<std::string, size_t> MockCache::marked_data_in_cache_;
2314
2315 // Block cache can contain raw data blocks as well as general objects. If an
2316 // object depends on the table to be live, it then must be destructed before the
2317 // table is closed. This test makes sure that the only items remains in the
2318 // cache after the table is closed are raw data blocks.
2319 TEST_P(BlockBasedTableTest, NoObjectInCacheAfterTableClose) {
2320 std::vector<CompressionType> compression_types{kNoCompression};
2321
2322 // The following are the compression library versions supporting compression
2323 // dictionaries. See the test case CacheCompressionDict in the
2324 // DBBlockCacheTest suite.
2325 #ifdef ZLIB
2326 compression_types.push_back(kZlibCompression);
2327 #endif // ZLIB
2328 #if LZ4_VERSION_NUMBER >= 10400
2329 compression_types.push_back(kLZ4Compression);
2330 compression_types.push_back(kLZ4HCCompression);
2331 #endif // LZ4_VERSION_NUMBER >= 10400
2332 #if ZSTD_VERSION_NUMBER >= 500
2333 compression_types.push_back(kZSTD);
2334 #endif // ZSTD_VERSION_NUMBER >= 500
2335
2336 for (int level: {-1, 0, 1, 10}) {
2337 for (auto index_type :
2338 {BlockBasedTableOptions::IndexType::kBinarySearch,
2339 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}) {
2340 for (bool block_based_filter : {true, false}) {
2341 for (bool partition_filter : {true, false}) {
2342 if (partition_filter &&
2343 (block_based_filter ||
2344 index_type !=
2345 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch)) {
2346 continue;
2347 }
2348 for (bool index_and_filter_in_cache : {true, false}) {
2349 for (bool pin_l0 : {true, false}) {
2350 for (bool pin_top_level : {true, false}) {
2351 if (pin_l0 && !index_and_filter_in_cache) {
2352 continue;
2353 }
2354
2355 for (auto compression_type : compression_types) {
2356 for (uint32_t max_dict_bytes : {0, 1 << 14}) {
2357 if (compression_type == kNoCompression && max_dict_bytes)
2358 continue;
2359
2360 // Create a table
2361 Options opt;
2362 std::unique_ptr<InternalKeyComparator> ikc;
2363 ikc.reset(new test::PlainInternalKeyComparator(
2364 opt.comparator));
2365 opt.compression = compression_type;
2366 opt.compression_opts.max_dict_bytes = max_dict_bytes;
2367 BlockBasedTableOptions table_options =
2368 GetBlockBasedTableOptions();
2369 table_options.block_size = 1024;
2370 table_options.index_type = index_type;
2371 table_options.pin_l0_filter_and_index_blocks_in_cache =
2372 pin_l0;
2373 table_options.pin_top_level_index_and_filter =
2374 pin_top_level;
2375 table_options.partition_filters = partition_filter;
2376 table_options.cache_index_and_filter_blocks =
2377 index_and_filter_in_cache;
2378 // big enough so we don't ever lose cached values.
2379 table_options.block_cache = std::make_shared<MockCache>(
2380 16 * 1024 * 1024, 4, false, 0.0);
2381 table_options.filter_policy.reset(
2382 rocksdb::NewBloomFilterPolicy(10, block_based_filter));
2383 opt.table_factory.reset(NewBlockBasedTableFactory(
2384 table_options));
2385
2386 bool convert_to_internal_key = false;
2387 TableConstructor c(BytewiseComparator(),
2388 convert_to_internal_key, level);
2389 std::string user_key = "k01";
2390 std::string key =
2391 InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2392 c.Add(key, "hello");
2393 std::vector<std::string> keys;
2394 stl_wrappers::KVMap kvmap;
2395 const ImmutableCFOptions ioptions(opt);
2396 const MutableCFOptions moptions(opt);
2397 c.Finish(opt, ioptions, moptions, table_options, *ikc,
2398 &keys, &kvmap);
2399
2400 // Doing a read to make index/filter loaded into the cache
2401 auto table_reader =
2402 dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2403 PinnableSlice value;
2404 GetContext get_context(opt.comparator, nullptr, nullptr,
2405 nullptr, GetContext::kNotFound, user_key, &value,
2406 nullptr, nullptr, nullptr, nullptr);
2407 InternalKey ikey(user_key, 0, kTypeValue);
2408 auto s = table_reader->Get(ReadOptions(), key, &get_context,
2409 moptions.prefix_extractor.get());
2410 ASSERT_EQ(get_context.State(), GetContext::kFound);
2411 ASSERT_STREQ(value.data(), "hello");
2412
2413 // Close the table
2414 c.ResetTableReader();
2415
2416 auto usage = table_options.block_cache->GetUsage();
2417 auto pinned_usage =
2418 table_options.block_cache->GetPinnedUsage();
2419 // The only usage must be for marked data blocks
2420 ASSERT_EQ(usage, MockCache::marked_size_);
2421 // There must be some pinned data since PinnableSlice has
2422 // not released them yet
2423 ASSERT_GT(pinned_usage, 0);
2424 // Release pinnable slice reousrces
2425 value.Reset();
2426 pinned_usage = table_options.block_cache->GetPinnedUsage();
2427 ASSERT_EQ(pinned_usage, 0);
2428 }
2429 }
2430 }
2431 }
2432 }
2433 }
2434 }
2435 }
2436 } // level
2437 }
2438
2439 TEST_P(BlockBasedTableTest, BlockCacheLeak) {
2440 // Check that when we reopen a table we don't lose access to blocks already
2441 // in the cache. This test checks whether the Table actually makes use of the
2442 // unique ID from the file.
2443
2444 Options opt;
2445 std::unique_ptr<InternalKeyComparator> ikc;
2446 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
2447 opt.compression = kNoCompression;
2448 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2449 table_options.block_size = 1024;
2450 // big enough so we don't ever lose cached values.
2451 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2452 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2453
2454 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2455 c.Add("k01", "hello");
2456 c.Add("k02", "hello2");
2457 c.Add("k03", std::string(10000, 'x'));
2458 c.Add("k04", std::string(200000, 'x'));
2459 c.Add("k05", std::string(300000, 'x'));
2460 c.Add("k06", "hello3");
2461 c.Add("k07", std::string(100000, 'x'));
2462 std::vector<std::string> keys;
2463 stl_wrappers::KVMap kvmap;
2464 const ImmutableCFOptions ioptions(opt);
2465 const MutableCFOptions moptions(opt);
2466 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
2467
2468 std::unique_ptr<InternalIterator> iter(
2469 c.NewIterator(moptions.prefix_extractor.get()));
2470 iter->SeekToFirst();
2471 while (iter->Valid()) {
2472 iter->key();
2473 iter->value();
2474 iter->Next();
2475 }
2476 ASSERT_OK(iter->status());
2477 iter.reset();
2478
2479 const ImmutableCFOptions ioptions1(opt);
2480 const MutableCFOptions moptions1(opt);
2481 ASSERT_OK(c.Reopen(ioptions1, moptions1));
2482 auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2483 for (const std::string& key : keys) {
2484 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
2485 ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
2486 }
2487 c.ResetTableReader();
2488
2489 // rerun with different block cache
2490 table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2491 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2492 const ImmutableCFOptions ioptions2(opt);
2493 const MutableCFOptions moptions2(opt);
2494 ASSERT_OK(c.Reopen(ioptions2, moptions2));
2495 table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2496 for (const std::string& key : keys) {
2497 InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
2498 ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
2499 }
2500 c.ResetTableReader();
2501 }
2502
2503 namespace {
2504 class CustomMemoryAllocator : public MemoryAllocator {
2505 public:
2506 const char* Name() const override { return "CustomMemoryAllocator"; }
2507
2508 void* Allocate(size_t size) override {
2509 ++numAllocations;
2510 auto ptr = new char[size + 16];
2511 memcpy(ptr, "memory_allocator_", 16); // mangle first 16 bytes
2512 return reinterpret_cast<void*>(ptr + 16);
2513 }
2514 void Deallocate(void* p) override {
2515 ++numDeallocations;
2516 char* ptr = reinterpret_cast<char*>(p) - 16;
2517 delete[] ptr;
2518 }
2519
2520 std::atomic<int> numAllocations;
2521 std::atomic<int> numDeallocations;
2522 };
2523 } // namespace
2524
2525 TEST_P(BlockBasedTableTest, MemoryAllocator) {
2526 auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
2527 {
2528 Options opt;
2529 std::unique_ptr<InternalKeyComparator> ikc;
2530 ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
2531 opt.compression = kNoCompression;
2532 BlockBasedTableOptions table_options;
2533 table_options.block_size = 1024;
2534 LRUCacheOptions lruOptions;
2535 lruOptions.memory_allocator = custom_memory_allocator;
2536 lruOptions.capacity = 16 * 1024 * 1024;
2537 lruOptions.num_shard_bits = 4;
2538 table_options.block_cache = NewLRUCache(std::move(lruOptions));
2539 opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
2540
2541 TableConstructor c(BytewiseComparator(),
2542 true /* convert_to_internal_key_ */);
2543 c.Add("k01", "hello");
2544 c.Add("k02", "hello2");
2545 c.Add("k03", std::string(10000, 'x'));
2546 c.Add("k04", std::string(200000, 'x'));
2547 c.Add("k05", std::string(300000, 'x'));
2548 c.Add("k06", "hello3");
2549 c.Add("k07", std::string(100000, 'x'));
2550 std::vector<std::string> keys;
2551 stl_wrappers::KVMap kvmap;
2552 const ImmutableCFOptions ioptions(opt);
2553 const MutableCFOptions moptions(opt);
2554 c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
2555
2556 std::unique_ptr<InternalIterator> iter(
2557 c.NewIterator(moptions.prefix_extractor.get()));
2558 iter->SeekToFirst();
2559 while (iter->Valid()) {
2560 iter->key();
2561 iter->value();
2562 iter->Next();
2563 }
2564 ASSERT_OK(iter->status());
2565 }
2566
2567 // out of scope, block cache should have been deleted, all allocations
2568 // deallocated
2569 EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
2570 custom_memory_allocator->numDeallocations.load());
2571 // make sure that allocations actually happened through the cache allocator
2572 EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
2573 }
2574
2575 TEST_P(BlockBasedTableTest, NewIndexIteratorLeak) {
2576 // A regression test to avoid data race described in
2577 // https://github.com/facebook/rocksdb/issues/1267
2578 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2579 std::vector<std::string> keys;
2580 stl_wrappers::KVMap kvmap;
2581 c.Add("a1", "val1");
2582 Options options;
2583 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
2584 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2585 table_options.index_type = BlockBasedTableOptions::kHashSearch;
2586 table_options.cache_index_and_filter_blocks = true;
2587 table_options.block_cache = NewLRUCache(0);
2588 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2589 const ImmutableCFOptions ioptions(options);
2590 const MutableCFOptions moptions(options);
2591 c.Finish(options, ioptions, moptions, table_options,
2592 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2593
2594 rocksdb::SyncPoint::GetInstance()->LoadDependencyAndMarkers(
2595 {
2596 {"BlockBasedTable::NewIndexIterator::thread1:1",
2597 "BlockBasedTable::NewIndexIterator::thread2:2"},
2598 {"BlockBasedTable::NewIndexIterator::thread2:3",
2599 "BlockBasedTable::NewIndexIterator::thread1:4"},
2600 },
2601 {
2602 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker",
2603 "BlockBasedTable::NewIndexIterator::thread1:1"},
2604 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker",
2605 "BlockBasedTable::NewIndexIterator::thread1:4"},
2606 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker",
2607 "BlockBasedTable::NewIndexIterator::thread2:2"},
2608 {"BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker",
2609 "BlockBasedTable::NewIndexIterator::thread2:3"},
2610 });
2611
2612 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
2613 ReadOptions ro;
2614 auto* reader = c.GetTableReader();
2615
2616 std::function<void()> func1 = [&]() {
2617 TEST_SYNC_POINT("BlockBasedTableTest::NewIndexIteratorLeak:Thread1Marker");
2618 // TODO(Zhongyi): update test to use MutableCFOptions
2619 std::unique_ptr<InternalIterator> iter(
2620 reader->NewIterator(ro, moptions.prefix_extractor.get()));
2621 iter->Seek(InternalKey("a1", 0, kTypeValue).Encode());
2622 };
2623
2624 std::function<void()> func2 = [&]() {
2625 TEST_SYNC_POINT("BlockBasedTableTest::NewIndexIteratorLeak:Thread2Marker");
2626 std::unique_ptr<InternalIterator> iter(
2627 reader->NewIterator(ro, moptions.prefix_extractor.get()));
2628 };
2629
2630 auto thread1 = port::Thread(func1);
2631 auto thread2 = port::Thread(func2);
2632 thread1.join();
2633 thread2.join();
2634 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
2635 c.ResetTableReader();
2636 }
2637
2638 // Plain table is not supported in ROCKSDB_LITE
2639 #ifndef ROCKSDB_LITE
2640 TEST_F(PlainTableTest, BasicPlainTableProperties) {
2641 PlainTableOptions plain_table_options;
2642 plain_table_options.user_key_len = 8;
2643 plain_table_options.bloom_bits_per_key = 8;
2644 plain_table_options.hash_table_ratio = 0;
2645
2646 PlainTableFactory factory(plain_table_options);
2647 test::StringSink sink;
2648 std::unique_ptr<WritableFileWriter> file_writer(
2649 test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */));
2650 Options options;
2651 const ImmutableCFOptions ioptions(options);
2652 const MutableCFOptions moptions(options);
2653 InternalKeyComparator ikc(options.comparator);
2654 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
2655 int_tbl_prop_collector_factories;
2656 std::string column_family_name;
2657 int unknown_level = -1;
2658 std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
2659 TableBuilderOptions(
2660 ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
2661 kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
2662 false /* skip_filters */, column_family_name, unknown_level),
2663 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
2664 file_writer.get()));
2665
2666 for (char c = 'a'; c <= 'z'; ++c) {
2667 std::string key(8, c);
2668 key.append("\1 "); // PlainTable expects internal key structure
2669 std::string value(28, c + 42);
2670 builder->Add(key, value);
2671 }
2672 ASSERT_OK(builder->Finish());
2673 file_writer->Flush();
2674
2675 test::StringSink* ss =
2676 static_cast<test::StringSink*>(file_writer->writable_file());
2677 std::unique_ptr<RandomAccessFileReader> file_reader(
2678 test::GetRandomAccessFileReader(
2679 new test::StringSource(ss->contents(), 72242, true)));
2680
2681 TableProperties* props = nullptr;
2682 auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
2683 kPlainTableMagicNumber, ioptions,
2684 &props, true /* compression_type_missing */);
2685 std::unique_ptr<TableProperties> props_guard(props);
2686 ASSERT_OK(s);
2687
2688 ASSERT_EQ(0ul, props->index_size);
2689 ASSERT_EQ(0ul, props->filter_size);
2690 ASSERT_EQ(16ul * 26, props->raw_key_size);
2691 ASSERT_EQ(28ul * 26, props->raw_value_size);
2692 ASSERT_EQ(26ul, props->num_entries);
2693 ASSERT_EQ(1ul, props->num_data_blocks);
2694 }
2695 #endif // !ROCKSDB_LITE
2696
2697 TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
2698 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2699 c.Add("k01", "hello");
2700 c.Add("k02", "hello2");
2701 c.Add("k03", std::string(10000, 'x'));
2702 c.Add("k04", std::string(200000, 'x'));
2703 c.Add("k05", std::string(300000, 'x'));
2704 c.Add("k06", "hello3");
2705 c.Add("k07", std::string(100000, 'x'));
2706 std::vector<std::string> keys;
2707 stl_wrappers::KVMap kvmap;
2708 Options options;
2709 test::PlainInternalKeyComparator internal_comparator(options.comparator);
2710 options.compression = kNoCompression;
2711 BlockBasedTableOptions table_options;
2712 table_options.block_size = 1024;
2713 const ImmutableCFOptions ioptions(options);
2714 const MutableCFOptions moptions(options);
2715 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
2716 &keys, &kvmap);
2717
2718 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
2719 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
2720 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0));
2721 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
2722 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0));
2723 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000));
2724 // k04 and k05 will be in two consecutive blocks, the index is
2725 // an arbitrary slice between k04 and k05, either before or after k04a
2726 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
2727 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
2728 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
2729 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
2730 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
2731 c.ResetTableReader();
2732 }
2733
2734 static void DoCompressionTest(CompressionType comp) {
2735 Random rnd(301);
2736 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2737 std::string tmp;
2738 c.Add("k01", "hello");
2739 c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
2740 c.Add("k03", "hello3");
2741 c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
2742 std::vector<std::string> keys;
2743 stl_wrappers::KVMap kvmap;
2744 Options options;
2745 test::PlainInternalKeyComparator ikc(options.comparator);
2746 options.compression = comp;
2747 BlockBasedTableOptions table_options;
2748 table_options.block_size = 1024;
2749 const ImmutableCFOptions ioptions(options);
2750 const MutableCFOptions moptions(options);
2751 c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
2752
2753 ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
2754 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
2755 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
2756 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500));
2757 ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500));
2758 ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6500));
2759 c.ResetTableReader();
2760 }
2761
2762 TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
2763 std::vector<CompressionType> compression_state;
2764 if (!Snappy_Supported()) {
2765 fprintf(stderr, "skipping snappy compression tests\n");
2766 } else {
2767 compression_state.push_back(kSnappyCompression);
2768 }
2769
2770 if (!Zlib_Supported()) {
2771 fprintf(stderr, "skipping zlib compression tests\n");
2772 } else {
2773 compression_state.push_back(kZlibCompression);
2774 }
2775
2776 // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
2777 /*
2778 if (!BZip2_Supported()) {
2779 fprintf(stderr, "skipping bzip2 compression tests\n");
2780 } else {
2781 compression_state.push_back(kBZip2Compression);
2782 }
2783 */
2784
2785 if (!LZ4_Supported()) {
2786 fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
2787 } else {
2788 compression_state.push_back(kLZ4Compression);
2789 compression_state.push_back(kLZ4HCCompression);
2790 }
2791
2792 if (!XPRESS_Supported()) {
2793 fprintf(stderr, "skipping xpress and xpress compression tests\n");
2794 }
2795 else {
2796 compression_state.push_back(kXpressCompression);
2797 }
2798
2799 for (auto state : compression_state) {
2800 DoCompressionTest(state);
2801 }
2802 }
2803
2804 #ifndef ROCKSDB_VALGRIND_RUN
2805 // RandomizedHarnessTest is very slow for certain combination of arguments
2806 // Split into 8 pieces to reduce the time individual tests take.
2807 TEST_F(HarnessTest, Randomized1) {
2808 // part 1 out of 8
2809 const size_t part = 1;
2810 const size_t total = 8;
2811 RandomizedHarnessTest(part, total);
2812 }
2813
2814 TEST_F(HarnessTest, Randomized2) {
2815 // part 2 out of 8
2816 const size_t part = 2;
2817 const size_t total = 8;
2818 RandomizedHarnessTest(part, total);
2819 }
2820
2821 TEST_F(HarnessTest, Randomized3) {
2822 // part 3 out of 8
2823 const size_t part = 3;
2824 const size_t total = 8;
2825 RandomizedHarnessTest(part, total);
2826 }
2827
2828 TEST_F(HarnessTest, Randomized4) {
2829 // part 4 out of 8
2830 const size_t part = 4;
2831 const size_t total = 8;
2832 RandomizedHarnessTest(part, total);
2833 }
2834
2835 TEST_F(HarnessTest, Randomized5) {
2836 // part 5 out of 8
2837 const size_t part = 5;
2838 const size_t total = 8;
2839 RandomizedHarnessTest(part, total);
2840 }
2841
2842 TEST_F(HarnessTest, Randomized6) {
2843 // part 6 out of 8
2844 const size_t part = 6;
2845 const size_t total = 8;
2846 RandomizedHarnessTest(part, total);
2847 }
2848
2849 TEST_F(HarnessTest, Randomized7) {
2850 // part 7 out of 8
2851 const size_t part = 7;
2852 const size_t total = 8;
2853 RandomizedHarnessTest(part, total);
2854 }
2855
2856 TEST_F(HarnessTest, Randomized8) {
2857 // part 8 out of 8
2858 const size_t part = 8;
2859 const size_t total = 8;
2860 RandomizedHarnessTest(part, total);
2861 }
2862
2863 #ifndef ROCKSDB_LITE
2864 TEST_F(HarnessTest, RandomizedLongDB) {
2865 Random rnd(test::RandomSeed());
2866 TestArgs args = {DB_TEST, false, 16, kNoCompression, 0, false};
2867 Init(args);
2868 int num_entries = 100000;
2869 for (int e = 0; e < num_entries; e++) {
2870 std::string v;
2871 Add(test::RandomKey(&rnd, rnd.Skewed(4)),
2872 test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
2873 }
2874 Test(&rnd);
2875
2876 // We must have created enough data to force merging
2877 int files = 0;
2878 for (int level = 0; level < db()->NumberLevels(); level++) {
2879 std::string value;
2880 char name[100];
2881 snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
2882 ASSERT_TRUE(db()->GetProperty(name, &value));
2883 files += atoi(value.c_str());
2884 }
2885 ASSERT_GT(files, 0);
2886 }
2887 #endif // ROCKSDB_LITE
2888 #endif // ROCKSDB_VALGRIND_RUN
2889
2890 class MemTableTest : public testing::Test {};
2891
2892 TEST_F(MemTableTest, Simple) {
2893 InternalKeyComparator cmp(BytewiseComparator());
2894 auto table_factory = std::make_shared<SkipListFactory>();
2895 Options options;
2896 options.memtable_factory = table_factory;
2897 ImmutableCFOptions ioptions(options);
2898 WriteBufferManager wb(options.db_write_buffer_size);
2899 MemTable* memtable =
2900 new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
2901 kMaxSequenceNumber, 0 /* column_family_id */);
2902 memtable->Ref();
2903 WriteBatch batch;
2904 WriteBatchInternal::SetSequence(&batch, 100);
2905 batch.Put(std::string("k1"), std::string("v1"));
2906 batch.Put(std::string("k2"), std::string("v2"));
2907 batch.Put(std::string("k3"), std::string("v3"));
2908 batch.Put(std::string("largekey"), std::string("vlarge"));
2909 batch.DeleteRange(std::string("chi"), std::string("xigua"));
2910 batch.DeleteRange(std::string("begin"), std::string("end"));
2911 ColumnFamilyMemTablesDefault cf_mems_default(memtable);
2912 ASSERT_TRUE(
2913 WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr).ok());
2914
2915 for (int i = 0; i < 2; ++i) {
2916 Arena arena;
2917 ScopedArenaIterator arena_iter_guard;
2918 std::unique_ptr<InternalIterator> iter_guard;
2919 InternalIterator* iter;
2920 if (i == 0) {
2921 iter = memtable->NewIterator(ReadOptions(), &arena);
2922 arena_iter_guard.set(iter);
2923 } else {
2924 iter = memtable->NewRangeTombstoneIterator(
2925 ReadOptions(), kMaxSequenceNumber /* read_seq */);
2926 iter_guard.reset(iter);
2927 }
2928 if (iter == nullptr) {
2929 continue;
2930 }
2931 iter->SeekToFirst();
2932 while (iter->Valid()) {
2933 fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
2934 iter->value().ToString().c_str());
2935 iter->Next();
2936 }
2937 }
2938
2939 delete memtable->Unref();
2940 }
2941
2942 // Test the empty key
2943 TEST_F(HarnessTest, SimpleEmptyKey) {
2944 auto args = GenerateArgList();
2945 for (const auto& arg : args) {
2946 Init(arg);
2947 Random rnd(test::RandomSeed() + 1);
2948 Add("", "v");
2949 Test(&rnd);
2950 }
2951 }
2952
2953 TEST_F(HarnessTest, SimpleSingle) {
2954 auto args = GenerateArgList();
2955 for (const auto& arg : args) {
2956 Init(arg);
2957 Random rnd(test::RandomSeed() + 2);
2958 Add("abc", "v");
2959 Test(&rnd);
2960 }
2961 }
2962
2963 TEST_F(HarnessTest, SimpleMulti) {
2964 auto args = GenerateArgList();
2965 for (const auto& arg : args) {
2966 Init(arg);
2967 Random rnd(test::RandomSeed() + 3);
2968 Add("abc", "v");
2969 Add("abcd", "v");
2970 Add("ac", "v2");
2971 Test(&rnd);
2972 }
2973 }
2974
2975 TEST_F(HarnessTest, SimpleSpecialKey) {
2976 auto args = GenerateArgList();
2977 for (const auto& arg : args) {
2978 Init(arg);
2979 Random rnd(test::RandomSeed() + 4);
2980 Add("\xff\xff", "v3");
2981 Test(&rnd);
2982 }
2983 }
2984
2985 TEST_F(HarnessTest, FooterTests) {
2986 {
2987 // upconvert legacy block based
2988 std::string encoded;
2989 Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
2990 BlockHandle meta_index(10, 5), index(20, 15);
2991 footer.set_metaindex_handle(meta_index);
2992 footer.set_index_handle(index);
2993 footer.EncodeTo(&encoded);
2994 Footer decoded_footer;
2995 Slice encoded_slice(encoded);
2996 decoded_footer.DecodeFrom(&encoded_slice);
2997 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
2998 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
2999 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3000 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3001 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3002 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3003 ASSERT_EQ(decoded_footer.version(), 0U);
3004 }
3005 {
3006 // xxhash block based
3007 std::string encoded;
3008 Footer footer(kBlockBasedTableMagicNumber, 1);
3009 BlockHandle meta_index(10, 5), index(20, 15);
3010 footer.set_metaindex_handle(meta_index);
3011 footer.set_index_handle(index);
3012 footer.set_checksum(kxxHash);
3013 footer.EncodeTo(&encoded);
3014 Footer decoded_footer;
3015 Slice encoded_slice(encoded);
3016 decoded_footer.DecodeFrom(&encoded_slice);
3017 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3018 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3019 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3020 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3021 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3022 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3023 ASSERT_EQ(decoded_footer.version(), 1U);
3024 }
3025 {
3026 // xxhash64 block based
3027 std::string encoded;
3028 Footer footer(kBlockBasedTableMagicNumber, 1);
3029 BlockHandle meta_index(10, 5), index(20, 15);
3030 footer.set_metaindex_handle(meta_index);
3031 footer.set_index_handle(index);
3032 footer.set_checksum(kxxHash64);
3033 footer.EncodeTo(&encoded);
3034 Footer decoded_footer;
3035 Slice encoded_slice(encoded);
3036 decoded_footer.DecodeFrom(&encoded_slice);
3037 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3038 ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
3039 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3040 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3041 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3042 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3043 ASSERT_EQ(decoded_footer.version(), 1U);
3044 }
3045 // Plain table is not supported in ROCKSDB_LITE
3046 #ifndef ROCKSDB_LITE
3047 {
3048 // upconvert legacy plain table
3049 std::string encoded;
3050 Footer footer(kLegacyPlainTableMagicNumber, 0);
3051 BlockHandle meta_index(10, 5), index(20, 15);
3052 footer.set_metaindex_handle(meta_index);
3053 footer.set_index_handle(index);
3054 footer.EncodeTo(&encoded);
3055 Footer decoded_footer;
3056 Slice encoded_slice(encoded);
3057 decoded_footer.DecodeFrom(&encoded_slice);
3058 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3059 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3060 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3061 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3062 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3063 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3064 ASSERT_EQ(decoded_footer.version(), 0U);
3065 }
3066 {
3067 // xxhash block based
3068 std::string encoded;
3069 Footer footer(kPlainTableMagicNumber, 1);
3070 BlockHandle meta_index(10, 5), index(20, 15);
3071 footer.set_metaindex_handle(meta_index);
3072 footer.set_index_handle(index);
3073 footer.set_checksum(kxxHash);
3074 footer.EncodeTo(&encoded);
3075 Footer decoded_footer;
3076 Slice encoded_slice(encoded);
3077 decoded_footer.DecodeFrom(&encoded_slice);
3078 ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3079 ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3080 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3081 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3082 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3083 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3084 ASSERT_EQ(decoded_footer.version(), 1U);
3085 }
3086 #endif // !ROCKSDB_LITE
3087 {
3088 // version == 2
3089 std::string encoded;
3090 Footer footer(kBlockBasedTableMagicNumber, 2);
3091 BlockHandle meta_index(10, 5), index(20, 15);
3092 footer.set_metaindex_handle(meta_index);
3093 footer.set_index_handle(index);
3094 footer.EncodeTo(&encoded);
3095 Footer decoded_footer;
3096 Slice encoded_slice(encoded);
3097 decoded_footer.DecodeFrom(&encoded_slice);
3098 ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3099 ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3100 ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3101 ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3102 ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3103 ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3104 ASSERT_EQ(decoded_footer.version(), 2U);
3105 }
3106 }
3107
3108 class IndexBlockRestartIntervalTest
3109 : public TableTest,
3110 public ::testing::WithParamInterface<std::pair<int, bool>> {
3111 public:
3112 static std::vector<std::pair<int, bool>> GetRestartValues() {
3113 return {{-1, false}, {0, false}, {1, false}, {8, false},
3114 {16, false}, {32, false}, {-1, true}, {0, true},
3115 {1, true}, {8, true}, {16, true}, {32, true}};
3116 }
3117 };
3118
3119 INSTANTIATE_TEST_CASE_P(
3120 IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
3121 ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
3122
3123 TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
3124 const int kKeysInTable = 10000;
3125 const int kKeySize = 100;
3126 const int kValSize = 500;
3127
3128 const int index_block_restart_interval = std::get<0>(GetParam());
3129 const bool value_delta_encoding = std::get<1>(GetParam());
3130
3131 Options options;
3132 BlockBasedTableOptions table_options;
3133 table_options.block_size = 64; // small block size to get big index block
3134 table_options.index_block_restart_interval = index_block_restart_interval;
3135 if (value_delta_encoding) {
3136 table_options.format_version = 4;
3137 }
3138 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3139
3140 TableConstructor c(BytewiseComparator());
3141 static Random rnd(301);
3142 for (int i = 0; i < kKeysInTable; i++) {
3143 InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
3144 c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
3145 }
3146
3147 std::vector<std::string> keys;
3148 stl_wrappers::KVMap kvmap;
3149 std::unique_ptr<InternalKeyComparator> comparator(
3150 new InternalKeyComparator(BytewiseComparator()));
3151 const ImmutableCFOptions ioptions(options);
3152 const MutableCFOptions moptions(options);
3153 c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
3154 &kvmap);
3155 auto reader = c.GetTableReader();
3156
3157 std::unique_ptr<InternalIterator> db_iter(
3158 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
3159
3160 // Test point lookup
3161 for (auto& kv : kvmap) {
3162 db_iter->Seek(kv.first);
3163
3164 ASSERT_TRUE(db_iter->Valid());
3165 ASSERT_OK(db_iter->status());
3166 ASSERT_EQ(db_iter->key(), kv.first);
3167 ASSERT_EQ(db_iter->value(), kv.second);
3168 }
3169
3170 // Test iterating
3171 auto kv_iter = kvmap.begin();
3172 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3173 ASSERT_EQ(db_iter->key(), kv_iter->first);
3174 ASSERT_EQ(db_iter->value(), kv_iter->second);
3175 kv_iter++;
3176 }
3177 ASSERT_EQ(kv_iter, kvmap.end());
3178 c.ResetTableReader();
3179 }
3180
3181 class PrefixTest : public testing::Test {
3182 public:
3183 PrefixTest() : testing::Test() {}
3184 ~PrefixTest() override {}
3185 };
3186
3187 namespace {
3188 // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3189 class TestPrefixExtractor : public rocksdb::SliceTransform {
3190 public:
3191 ~TestPrefixExtractor() override{};
3192 const char* Name() const override { return "TestPrefixExtractor"; }
3193
3194 rocksdb::Slice Transform(const rocksdb::Slice& src) const override {
3195 assert(IsValid(src));
3196 return rocksdb::Slice(src.data(), 3);
3197 }
3198
3199 bool InDomain(const rocksdb::Slice& src) const override {
3200 assert(IsValid(src));
3201 return true;
3202 }
3203
3204 bool InRange(const rocksdb::Slice& /*dst*/) const override { return true; }
3205
3206 bool IsValid(const rocksdb::Slice& src) const {
3207 if (src.size() != 4) {
3208 return false;
3209 }
3210 if (src[0] != '[') {
3211 return false;
3212 }
3213 if (src[1] < '0' || src[1] > '9') {
3214 return false;
3215 }
3216 if (src[2] != ']') {
3217 return false;
3218 }
3219 if (src[3] < '0' || src[3] > '9') {
3220 return false;
3221 }
3222 return true;
3223 }
3224 };
3225 } // namespace
3226
3227 TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
3228 rocksdb::Options options;
3229 options.compaction_style = rocksdb::kCompactionStyleUniversal;
3230 options.num_levels = 20;
3231 options.create_if_missing = true;
3232 options.optimize_filters_for_hits = false;
3233 options.target_file_size_base = 268435456;
3234 options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
3235 rocksdb::BlockBasedTableOptions bbto;
3236 bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
3237 bbto.block_size = 262144;
3238 bbto.whole_key_filtering = true;
3239
3240 const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
3241 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3242 DestroyDB(kDBPath, options);
3243 rocksdb::DB* db;
3244 ASSERT_OK(rocksdb::DB::Open(options, kDBPath, &db));
3245
3246 // Create a bunch of keys with 10 filters.
3247 for (int i = 0; i < 10; i++) {
3248 std::string prefix = "[" + std::to_string(i) + "]";
3249 for (int j = 0; j < 10; j++) {
3250 std::string key = prefix + std::to_string(j);
3251 db->Put(rocksdb::WriteOptions(), key, "1");
3252 }
3253 }
3254
3255 // Trigger compaction.
3256 db->CompactRange(CompactRangeOptions(), nullptr, nullptr);
3257 delete db;
3258 // In the second round, turn whole_key_filtering off and expect
3259 // rocksdb still works.
3260 }
3261
3262 /*
3263 * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
3264 * the SST file any more. Instead, RocksDB deduces global_seqno from the
3265 * MANIFEST while reading from an SST. Therefore, it's not possible to test the
3266 * functionality of global_seqno in a single, isolated unit test without the
3267 * involvement of Version, VersionSet, etc.
3268 */
3269 TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
3270 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3271 test::StringSink* sink = new test::StringSink();
3272 std::unique_ptr<WritableFileWriter> file_writer(
3273 test::GetWritableFileWriter(sink, "" /* don't care */));
3274 Options options;
3275 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3276 const ImmutableCFOptions ioptions(options);
3277 const MutableCFOptions moptions(options);
3278 InternalKeyComparator ikc(options.comparator);
3279 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3280 int_tbl_prop_collector_factories;
3281 int_tbl_prop_collector_factories.emplace_back(
3282 new SstFileWriterPropertiesCollectorFactory(2 /* version */,
3283 0 /* global_seqno*/));
3284 std::string column_family_name;
3285 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3286 TableBuilderOptions(ioptions, moptions, ikc,
3287 &int_tbl_prop_collector_factories, kNoCompression,
3288 0 /* sample_for_compression */, CompressionOptions(),
3289 false /* skip_filters */, column_family_name, -1),
3290 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3291 file_writer.get()));
3292
3293 for (char c = 'a'; c <= 'z'; ++c) {
3294 std::string key(8, c);
3295 std::string value = key;
3296 InternalKey ik(key, 0, kTypeValue);
3297
3298 builder->Add(ik.Encode(), value);
3299 }
3300 ASSERT_OK(builder->Finish());
3301 file_writer->Flush();
3302
3303 test::RandomRWStringSink ss_rw(sink);
3304 uint32_t version;
3305 uint64_t global_seqno;
3306 uint64_t global_seqno_offset;
3307
3308 // Helper function to get version, global_seqno, global_seqno_offset
3309 std::function<void()> GetVersionAndGlobalSeqno = [&]() {
3310 std::unique_ptr<RandomAccessFileReader> file_reader(
3311 test::GetRandomAccessFileReader(
3312 new test::StringSource(ss_rw.contents(), 73342, true)));
3313
3314 TableProperties* props = nullptr;
3315 ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
3316 kBlockBasedTableMagicNumber, ioptions,
3317 &props, true /* compression_type_missing */));
3318
3319 UserCollectedProperties user_props = props->user_collected_properties;
3320 version = DecodeFixed32(
3321 user_props[ExternalSstFilePropertyNames::kVersion].c_str());
3322 global_seqno = DecodeFixed64(
3323 user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
3324 global_seqno_offset =
3325 props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];
3326
3327 delete props;
3328 };
3329
3330 // Helper function to update the value of the global seqno in the file
3331 std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
3332 std::string new_global_seqno;
3333 PutFixed64(&new_global_seqno, val);
3334
3335 ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno));
3336 };
3337
3338 // Helper function to get the contents of the table InternalIterator
3339 std::unique_ptr<TableReader> table_reader;
3340 std::function<InternalIterator*()> GetTableInternalIter = [&]() {
3341 std::unique_ptr<RandomAccessFileReader> file_reader(
3342 test::GetRandomAccessFileReader(
3343 new test::StringSource(ss_rw.contents(), 73342, true)));
3344
3345 options.table_factory->NewTableReader(
3346 TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
3347 EnvOptions(), ikc),
3348 std::move(file_reader), ss_rw.contents().size(), &table_reader);
3349
3350 return table_reader->NewIterator(ReadOptions(),
3351 moptions.prefix_extractor.get());
3352 };
3353
3354 GetVersionAndGlobalSeqno();
3355 ASSERT_EQ(2, version);
3356 ASSERT_EQ(0, global_seqno);
3357
3358 InternalIterator* iter = GetTableInternalIter();
3359 char current_c = 'a';
3360 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3361 ParsedInternalKey pik;
3362 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3363
3364 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3365 ASSERT_EQ(pik.sequence, 0);
3366 ASSERT_EQ(pik.user_key, iter->value());
3367 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3368 current_c++;
3369 }
3370 ASSERT_EQ(current_c, 'z' + 1);
3371 delete iter;
3372
3373 // Update global sequence number to 10
3374 SetGlobalSeqno(10);
3375 GetVersionAndGlobalSeqno();
3376 ASSERT_EQ(2, version);
3377 ASSERT_EQ(10, global_seqno);
3378
3379 iter = GetTableInternalIter();
3380 current_c = 'a';
3381 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3382 ParsedInternalKey pik;
3383 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3384
3385 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3386 ASSERT_EQ(pik.sequence, 10);
3387 ASSERT_EQ(pik.user_key, iter->value());
3388 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3389 current_c++;
3390 }
3391 ASSERT_EQ(current_c, 'z' + 1);
3392
3393 // Verify Seek
3394 for (char c = 'a'; c <= 'z'; c++) {
3395 std::string k = std::string(8, c);
3396 InternalKey ik(k, 10, kValueTypeForSeek);
3397 iter->Seek(ik.Encode());
3398 ASSERT_TRUE(iter->Valid());
3399
3400 ParsedInternalKey pik;
3401 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3402
3403 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3404 ASSERT_EQ(pik.sequence, 10);
3405 ASSERT_EQ(pik.user_key.ToString(), k);
3406 ASSERT_EQ(iter->value().ToString(), k);
3407 }
3408 delete iter;
3409
3410 // Update global sequence number to 3
3411 SetGlobalSeqno(3);
3412 GetVersionAndGlobalSeqno();
3413 ASSERT_EQ(2, version);
3414 ASSERT_EQ(3, global_seqno);
3415
3416 iter = GetTableInternalIter();
3417 current_c = 'a';
3418 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
3419 ParsedInternalKey pik;
3420 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3421
3422 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3423 ASSERT_EQ(pik.sequence, 3);
3424 ASSERT_EQ(pik.user_key, iter->value());
3425 ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
3426 current_c++;
3427 }
3428 ASSERT_EQ(current_c, 'z' + 1);
3429
3430 // Verify Seek
3431 for (char c = 'a'; c <= 'z'; c++) {
3432 std::string k = std::string(8, c);
3433 // seqno=4 is less than 3 so we still should get our key
3434 InternalKey ik(k, 4, kValueTypeForSeek);
3435 iter->Seek(ik.Encode());
3436 ASSERT_TRUE(iter->Valid());
3437
3438 ParsedInternalKey pik;
3439 ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
3440
3441 ASSERT_EQ(pik.type, ValueType::kTypeValue);
3442 ASSERT_EQ(pik.sequence, 3);
3443 ASSERT_EQ(pik.user_key.ToString(), k);
3444 ASSERT_EQ(iter->value().ToString(), k);
3445 }
3446
3447 delete iter;
3448 }
3449
3450 TEST_P(BlockBasedTableTest, BlockAlignTest) {
3451 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3452 bbto.block_align = true;
3453 test::StringSink* sink = new test::StringSink();
3454 std::unique_ptr<WritableFileWriter> file_writer(
3455 test::GetWritableFileWriter(sink, "" /* don't care */));
3456 Options options;
3457 options.compression = kNoCompression;
3458 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3459 const ImmutableCFOptions ioptions(options);
3460 const MutableCFOptions moptions(options);
3461 InternalKeyComparator ikc(options.comparator);
3462 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3463 int_tbl_prop_collector_factories;
3464 std::string column_family_name;
3465 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3466 TableBuilderOptions(ioptions, moptions, ikc,
3467 &int_tbl_prop_collector_factories, kNoCompression,
3468 0 /* sample_for_compression */, CompressionOptions(),
3469 false /* skip_filters */, column_family_name, -1),
3470 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3471 file_writer.get()));
3472
3473 for (int i = 1; i <= 10000; ++i) {
3474 std::ostringstream ostr;
3475 ostr << std::setfill('0') << std::setw(5) << i;
3476 std::string key = ostr.str();
3477 std::string value = "val";
3478 InternalKey ik(key, 0, kTypeValue);
3479
3480 builder->Add(ik.Encode(), value);
3481 }
3482 ASSERT_OK(builder->Finish());
3483 file_writer->Flush();
3484
3485 test::RandomRWStringSink ss_rw(sink);
3486 std::unique_ptr<RandomAccessFileReader> file_reader(
3487 test::GetRandomAccessFileReader(
3488 new test::StringSource(ss_rw.contents(), 73342, true)));
3489
3490 // Helper function to get version, global_seqno, global_seqno_offset
3491 std::function<void()> VerifyBlockAlignment = [&]() {
3492 TableProperties* props = nullptr;
3493 ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
3494 kBlockBasedTableMagicNumber, ioptions,
3495 &props, true /* compression_type_missing */));
3496
3497 uint64_t data_block_size = props->data_size / props->num_data_blocks;
3498 ASSERT_EQ(data_block_size, 4096);
3499 ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
3500 delete props;
3501 };
3502
3503 VerifyBlockAlignment();
3504
3505 // The below block of code verifies that we can read back the keys. Set
3506 // block_align to false when creating the reader to ensure we can flip between
3507 // the two modes without any issues
3508 std::unique_ptr<TableReader> table_reader;
3509 bbto.block_align = false;
3510 Options options2;
3511 options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
3512 ImmutableCFOptions ioptions2(options2);
3513 const MutableCFOptions moptions2(options2);
3514
3515 ASSERT_OK(ioptions.table_factory->NewTableReader(
3516 TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
3517 EnvOptions(),
3518 GetPlainInternalComparator(options2.comparator)),
3519 std::move(file_reader), ss_rw.contents().size(), &table_reader));
3520
3521 std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
3522 ReadOptions(), moptions2.prefix_extractor.get()));
3523
3524 int expected_key = 1;
3525 for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3526 std::ostringstream ostr;
3527 ostr << std::setfill('0') << std::setw(5) << expected_key++;
3528 std::string key = ostr.str();
3529 std::string value = "val";
3530
3531 ASSERT_OK(db_iter->status());
3532 ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
3533 ASSERT_EQ(db_iter->value().ToString(), value);
3534 }
3535 expected_key--;
3536 ASSERT_EQ(expected_key, 10000);
3537 table_reader.reset();
3538 }
3539
3540 TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
3541 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3542 bbto.block_align = true;
3543 test::StringSink* sink = new test::StringSink();
3544 std::unique_ptr<WritableFileWriter> file_writer(
3545 test::GetWritableFileWriter(sink, "" /* don't care */));
3546
3547 Options options;
3548 options.compression = kNoCompression;
3549 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3550
3551 const ImmutableCFOptions ioptions(options);
3552 const MutableCFOptions moptions(options);
3553 InternalKeyComparator ikc(options.comparator);
3554 std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3555 int_tbl_prop_collector_factories;
3556 std::string column_family_name;
3557
3558 std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3559 TableBuilderOptions(ioptions, moptions, ikc,
3560 &int_tbl_prop_collector_factories, kNoCompression,
3561 0 /* sample_for_compression */, CompressionOptions(),
3562 false /* skip_filters */, column_family_name, -1),
3563 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3564 file_writer.get()));
3565
3566 for (int i = 1; i <= 10000; ++i) {
3567 std::ostringstream ostr;
3568 ostr << std::setfill('0') << std::setw(5) << i;
3569 std::string key = ostr.str();
3570 std::string value = "val";
3571 InternalKey ik(key, 0, kTypeValue);
3572
3573 builder->Add(ik.Encode(), value);
3574 }
3575 ASSERT_OK(builder->Finish());
3576 file_writer->Flush();
3577
3578 test::RandomRWStringSink ss_rw(sink);
3579 std::unique_ptr<RandomAccessFileReader> file_reader(
3580 test::GetRandomAccessFileReader(
3581 new test::StringSource(ss_rw.contents(), 73342, true)));
3582
3583 {
3584 RandomAccessFileReader* file = file_reader.get();
3585 uint64_t file_size = ss_rw.contents().size();
3586
3587 Footer footer;
3588 ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
3589 &footer, kBlockBasedTableMagicNumber));
3590
3591 auto BlockFetchHelper = [&](const BlockHandle& handle,
3592 BlockContents* contents) {
3593 ReadOptions read_options;
3594 read_options.verify_checksums = false;
3595 PersistentCacheOptions cache_options;
3596
3597 BlockFetcher block_fetcher(
3598 file, nullptr /* prefetch_buffer */, footer, read_options, handle,
3599 contents, ioptions, false /* decompress */,
3600 false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(),
3601 cache_options);
3602
3603 ASSERT_OK(block_fetcher.ReadBlockContents());
3604 };
3605
3606 // -- Read metaindex block
3607 auto metaindex_handle = footer.metaindex_handle();
3608 BlockContents metaindex_contents;
3609
3610 BlockFetchHelper(metaindex_handle, &metaindex_contents);
3611 Block metaindex_block(std::move(metaindex_contents),
3612 kDisableGlobalSequenceNumber);
3613
3614 std::unique_ptr<InternalIterator> meta_iter(
3615 metaindex_block.NewIterator<DataBlockIter>(BytewiseComparator(),
3616 BytewiseComparator()));
3617 bool found_properties_block = true;
3618 ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
3619 ASSERT_TRUE(found_properties_block);
3620
3621 // -- Read properties block
3622 Slice v = meta_iter->value();
3623 BlockHandle properties_handle;
3624 ASSERT_OK(properties_handle.DecodeFrom(&v));
3625 BlockContents properties_contents;
3626
3627 BlockFetchHelper(properties_handle, &properties_contents);
3628 Block properties_block(std::move(properties_contents),
3629 kDisableGlobalSequenceNumber);
3630
3631 ASSERT_EQ(properties_block.NumRestarts(), 1);
3632 }
3633 }
3634
3635 TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
3636 // The properties meta-block should come at the end since we always need to
3637 // read it when opening a file, unlike index/filter/other meta-blocks, which
3638 // are sometimes read depending on the user's configuration. This ordering
3639 // allows us to do a small readahead on the end of the file to read properties
3640 // and meta-index blocks with one I/O.
3641 TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3642 c.Add("a1", "val1");
3643 c.Add("b2", "val2");
3644 c.Add("c3", "val3");
3645 c.Add("d4", "val4");
3646 c.Add("e5", "val5");
3647 c.Add("f6", "val6");
3648 c.Add("g7", "val7");
3649 c.Add("h8", "val8");
3650 c.Add("j9", "val9");
3651
3652 // write an SST file
3653 Options options;
3654 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3655 table_options.filter_policy.reset(NewBloomFilterPolicy(
3656 8 /* bits_per_key */, false /* use_block_based_filter */));
3657 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3658 ImmutableCFOptions ioptions(options);
3659 MutableCFOptions moptions(options);
3660 std::vector<std::string> keys;
3661 stl_wrappers::KVMap kvmap;
3662 c.Finish(options, ioptions, moptions, table_options,
3663 GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3664
3665 // get file reader
3666 test::StringSink* table_sink = c.TEST_GetSink();
3667 std::unique_ptr<RandomAccessFileReader> table_reader{
3668 test::GetRandomAccessFileReader(
3669 new test::StringSource(table_sink->contents(), 0 /* unique_id */,
3670 false /* allow_mmap_reads */))};
3671 size_t table_size = table_sink->contents().size();
3672
3673 // read footer
3674 Footer footer;
3675 ASSERT_OK(ReadFooterFromFile(table_reader.get(),
3676 nullptr /* prefetch_buffer */, table_size,
3677 &footer, kBlockBasedTableMagicNumber));
3678
3679 // read metaindex
3680 auto metaindex_handle = footer.metaindex_handle();
3681 BlockContents metaindex_contents;
3682 PersistentCacheOptions pcache_opts;
3683 BlockFetcher block_fetcher(
3684 table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
3685 metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
3686 false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(),
3687 pcache_opts, nullptr /*memory_allocator*/);
3688 ASSERT_OK(block_fetcher.ReadBlockContents());
3689 Block metaindex_block(std::move(metaindex_contents),
3690 kDisableGlobalSequenceNumber);
3691
3692 // verify properties block comes last
3693 std::unique_ptr<InternalIterator> metaindex_iter{
3694 metaindex_block.NewIterator<DataBlockIter>(options.comparator,
3695 options.comparator)};
3696 uint64_t max_offset = 0;
3697 std::string key_at_max_offset;
3698 for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
3699 metaindex_iter->Next()) {
3700 BlockHandle handle;
3701 Slice value = metaindex_iter->value();
3702 ASSERT_OK(handle.DecodeFrom(&value));
3703 if (handle.offset() > max_offset) {
3704 max_offset = handle.offset();
3705 key_at_max_offset = metaindex_iter->key().ToString();
3706 }
3707 }
3708 ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
3709 // index handle is stored in footer rather than metaindex block, so need
3710 // separate logic to verify it comes before properties block.
3711 ASSERT_GT(max_offset, footer.index_handle().offset());
3712 c.ResetTableReader();
3713 }
3714
3715 TEST_P(BlockBasedTableTest, BadOptions) {
3716 rocksdb::Options options;
3717 options.compression = kNoCompression;
3718 BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3719 bbto.block_size = 4000;
3720 bbto.block_align = true;
3721
3722 const std::string kDBPath =
3723 test::PerThreadDBPath("block_based_table_bad_options_test");
3724 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3725 DestroyDB(kDBPath, options);
3726 rocksdb::DB* db;
3727 ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));
3728
3729 bbto.block_size = 4096;
3730 options.compression = kSnappyCompression;
3731 options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3732 ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));
3733 }
3734
3735 TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
3736 TailPrefetchStats tpstats;
3737 ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
3738 tpstats.RecordEffectiveSize(size_t{1000});
3739 tpstats.RecordEffectiveSize(size_t{1005});
3740 tpstats.RecordEffectiveSize(size_t{1002});
3741 ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());
3742
3743 // One single super large value shouldn't influence much
3744 tpstats.RecordEffectiveSize(size_t{1002000});
3745 tpstats.RecordEffectiveSize(size_t{999});
3746 ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
3747 ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());
3748
3749 // Only history of 32 is kept
3750 for (int i = 0; i < 32; i++) {
3751 tpstats.RecordEffectiveSize(size_t{100});
3752 }
3753 ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());
3754
3755 // 16 large values and 16 small values. The result should be closer
3756 // to the small value as the algorithm.
3757 for (int i = 0; i < 16; i++) {
3758 tpstats.RecordEffectiveSize(size_t{1000});
3759 }
3760 tpstats.RecordEffectiveSize(size_t{10});
3761 tpstats.RecordEffectiveSize(size_t{20});
3762 for (int i = 0; i < 6; i++) {
3763 tpstats.RecordEffectiveSize(size_t{100});
3764 }
3765 ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
3766 ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
3767 }
3768
3769 TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
3770 TailPrefetchStats tpstats;
3771 FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
3772 buffer.TryReadFromCache(500, 10, nullptr);
3773 buffer.TryReadFromCache(480, 10, nullptr);
3774 buffer.TryReadFromCache(490, 10, nullptr);
3775 ASSERT_EQ(480, buffer.min_offset_read());
3776 }
3777
3778 TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
3779 const int kNumKeys = 500;
3780 const int kKeySize = 8;
3781 const int kValSize = 40;
3782
3783 BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3784 table_options.data_block_index_type =
3785 BlockBasedTableOptions::kDataBlockBinaryAndHash;
3786
3787 Options options;
3788 options.comparator = BytewiseComparator();
3789
3790 options.table_factory.reset(new BlockBasedTableFactory(table_options));
3791
3792 TableConstructor c(options.comparator);
3793
3794 static Random rnd(1048);
3795 for (int i = 0; i < kNumKeys; i++) {
3796 // padding one "0" to mark existent keys.
3797 std::string random_key(RandomString(&rnd, kKeySize - 1) + "1");
3798 InternalKey k(random_key, 0, kTypeValue);
3799 c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
3800 }
3801
3802 std::vector<std::string> keys;
3803 stl_wrappers::KVMap kvmap;
3804 const ImmutableCFOptions ioptions(options);
3805 const MutableCFOptions moptions(options);
3806 const InternalKeyComparator internal_comparator(options.comparator);
3807 c.Finish(options, ioptions, moptions, table_options, internal_comparator,
3808 &keys, &kvmap);
3809
3810 auto reader = c.GetTableReader();
3811
3812 std::unique_ptr<InternalIterator> seek_iter;
3813 seek_iter.reset(
3814 reader->NewIterator(ReadOptions(), moptions.prefix_extractor.get()));
3815 for (int i = 0; i < 2; ++i) {
3816 ReadOptions ro;
3817 // for every kv, we seek using two method: Get() and Seek()
3818 // Get() will use the SuffixIndexHash in Block. For non-existent key it
3819 // will invalidate the iterator
3820 // Seek() will use the default BinarySeek() in Block. So for non-existent
3821 // key it will land at the closest key that is large than target.
3822
3823 // Search for existent keys
3824 for (auto& kv : kvmap) {
3825 if (i == 0) {
3826 // Search using Seek()
3827 seek_iter->Seek(kv.first);
3828 ASSERT_OK(seek_iter->status());
3829 ASSERT_TRUE(seek_iter->Valid());
3830 ASSERT_EQ(seek_iter->key(), kv.first);
3831 ASSERT_EQ(seek_iter->value(), kv.second);
3832 } else {
3833 // Search using Get()
3834 PinnableSlice value;
3835 std::string user_key = ExtractUserKey(kv.first).ToString();
3836 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3837 GetContext::kNotFound, user_key, &value, nullptr,
3838 nullptr, nullptr, nullptr);
3839 ASSERT_OK(reader->Get(ro, kv.first, &get_context,
3840 moptions.prefix_extractor.get()));
3841 ASSERT_EQ(get_context.State(), GetContext::kFound);
3842 ASSERT_EQ(value, Slice(kv.second));
3843 value.Reset();
3844 }
3845 }
3846
3847 // Search for non-existent keys
3848 for (auto& kv : kvmap) {
3849 std::string user_key = ExtractUserKey(kv.first).ToString();
3850 user_key.back() = '0'; // make it non-existent key
3851 InternalKey internal_key(user_key, 0, kTypeValue);
3852 std::string encoded_key = internal_key.Encode().ToString();
3853 if (i == 0) { // Search using Seek()
3854 seek_iter->Seek(encoded_key);
3855 ASSERT_OK(seek_iter->status());
3856 if (seek_iter->Valid()) {
3857 ASSERT_TRUE(BytewiseComparator()->Compare(
3858 user_key, ExtractUserKey(seek_iter->key())) < 0);
3859 }
3860 } else { // Search using Get()
3861 PinnableSlice value;
3862 GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3863 GetContext::kNotFound, user_key, &value, nullptr,
3864 nullptr, nullptr, nullptr);
3865 ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
3866 moptions.prefix_extractor.get()));
3867 ASSERT_EQ(get_context.State(), GetContext::kNotFound);
3868 value.Reset();
3869 }
3870 }
3871 }
3872 }
3873
3874 } // namespace rocksdb
3875
3876 int main(int argc, char** argv) {
3877 ::testing::InitGoogleTest(&argc, argv);
3878 return RUN_ALL_TESTS();
3879 }