1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
10 #include "cache/compressed_secondary_cache.h"
11 #include "db/blob/blob_index.h"
12 #include "db/blob/blob_log_format.h"
13 #include "db/db_test_util.h"
14 #include "port/stack_trace.h"
15 #include "test_util/sync_point.h"
16 #include "utilities/fault_injection_env.h"
18 namespace ROCKSDB_NAMESPACE
{
20 class DBBlobBasicTest
: public DBTestBase
{
23 : DBTestBase("db_blob_basic_test", /* env_do_fsync */ false) {}
26 TEST_F(DBBlobBasicTest
, GetBlob
) {
27 Options options
= GetDefaultOptions();
28 options
.enable_blob_files
= true;
29 options
.min_blob_size
= 0;
33 constexpr char key
[] = "key";
34 constexpr char blob_value
[] = "blob_value";
36 ASSERT_OK(Put(key
, blob_value
));
40 ASSERT_EQ(Get(key
), blob_value
);
42 // Try again with no I/O allowed. The table and the necessary blocks should
43 // already be in their respective caches; however, the blob itself can only be
44 // read from the blob file, so the read should return Incomplete.
45 ReadOptions read_options
;
46 read_options
.read_tier
= kBlockCacheTier
;
49 ASSERT_TRUE(db_
->Get(read_options
, db_
->DefaultColumnFamily(), key
, &result
)
53 TEST_F(DBBlobBasicTest
, GetBlobFromCache
) {
54 Options options
= GetDefaultOptions();
57 co
.capacity
= 2 << 20; // 2MB
58 co
.num_shard_bits
= 2;
59 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
60 auto backing_cache
= NewLRUCache(co
);
62 options
.enable_blob_files
= true;
63 options
.blob_cache
= backing_cache
;
65 BlockBasedTableOptions block_based_options
;
66 block_based_options
.no_block_cache
= false;
67 block_based_options
.block_cache
= backing_cache
;
68 block_based_options
.cache_index_and_filter_blocks
= true;
69 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
73 constexpr char key
[] = "key";
74 constexpr char blob_value
[] = "blob_value";
76 ASSERT_OK(Put(key
, blob_value
));
80 ReadOptions read_options
;
82 read_options
.fill_cache
= false;
87 read_options
.read_tier
= kReadAllTier
;
88 ASSERT_OK(db_
->Get(read_options
, db_
->DefaultColumnFamily(), key
, &result
));
89 ASSERT_EQ(result
, blob_value
);
92 read_options
.read_tier
= kBlockCacheTier
;
94 // Try again with no I/O allowed. Since we didn't re-fill the cache, the
95 // blob itself can only be read from the blob file, so the read should
97 ASSERT_TRUE(db_
->Get(read_options
, db_
->DefaultColumnFamily(), key
, &result
)
99 ASSERT_TRUE(result
.empty());
102 read_options
.fill_cache
= true;
105 PinnableSlice result
;
107 read_options
.read_tier
= kReadAllTier
;
108 ASSERT_OK(db_
->Get(read_options
, db_
->DefaultColumnFamily(), key
, &result
));
109 ASSERT_EQ(result
, blob_value
);
112 read_options
.read_tier
= kBlockCacheTier
;
114 // Try again with no I/O allowed. The table and the necessary blocks/blobs
115 // should already be in their respective caches.
116 ASSERT_OK(db_
->Get(read_options
, db_
->DefaultColumnFamily(), key
, &result
));
117 ASSERT_EQ(result
, blob_value
);
121 TEST_F(DBBlobBasicTest
, IterateBlobsFromCache
) {
122 Options options
= GetDefaultOptions();
125 co
.capacity
= 2 << 20; // 2MB
126 co
.num_shard_bits
= 2;
127 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
128 auto backing_cache
= NewLRUCache(co
);
130 options
.enable_blob_files
= true;
131 options
.blob_cache
= backing_cache
;
133 BlockBasedTableOptions block_based_options
;
134 block_based_options
.no_block_cache
= false;
135 block_based_options
.block_cache
= backing_cache
;
136 block_based_options
.cache_index_and_filter_blocks
= true;
137 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
139 options
.statistics
= CreateDBStatistics();
144 std::vector
<std::string
> keys
;
145 std::vector
<std::string
> blobs
;
147 for (int i
= 0; i
< num_blobs
; ++i
) {
148 keys
.push_back("key" + std::to_string(i
));
149 blobs
.push_back("blob" + std::to_string(i
));
150 ASSERT_OK(Put(keys
[i
], blobs
[i
]));
154 ReadOptions read_options
;
157 read_options
.fill_cache
= false;
158 read_options
.read_tier
= kReadAllTier
;
160 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
161 ASSERT_OK(iter
->status());
164 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
165 ASSERT_OK(iter
->status());
166 ASSERT_EQ(iter
->key().ToString(), keys
[i
]);
167 ASSERT_EQ(iter
->value().ToString(), blobs
[i
]);
170 ASSERT_EQ(i
, num_blobs
);
171 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
), 0);
175 read_options
.fill_cache
= false;
176 read_options
.read_tier
= kBlockCacheTier
;
178 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
179 ASSERT_OK(iter
->status());
181 // Try again with no I/O allowed. Since we didn't re-fill the cache,
182 // the blob itself can only be read from the blob file, so iter->Valid()
185 ASSERT_NOK(iter
->status());
186 ASSERT_FALSE(iter
->Valid());
187 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
), 0);
191 read_options
.fill_cache
= true;
192 read_options
.read_tier
= kReadAllTier
;
194 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
195 ASSERT_OK(iter
->status());
197 // Read blobs from the file and refill the cache.
199 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
200 ASSERT_OK(iter
->status());
201 ASSERT_EQ(iter
->key().ToString(), keys
[i
]);
202 ASSERT_EQ(iter
->value().ToString(), blobs
[i
]);
205 ASSERT_EQ(i
, num_blobs
);
206 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
),
211 read_options
.fill_cache
= false;
212 read_options
.read_tier
= kBlockCacheTier
;
214 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
215 ASSERT_OK(iter
->status());
217 // Try again with no I/O allowed. The table and the necessary blocks/blobs
218 // should already be in their respective caches.
220 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
221 ASSERT_OK(iter
->status());
222 ASSERT_EQ(iter
->key().ToString(), keys
[i
]);
223 ASSERT_EQ(iter
->value().ToString(), blobs
[i
]);
226 ASSERT_EQ(i
, num_blobs
);
227 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
), 0);
231 TEST_F(DBBlobBasicTest
, IterateBlobsFromCachePinning
) {
232 constexpr size_t min_blob_size
= 6;
234 Options options
= GetDefaultOptions();
236 LRUCacheOptions cache_options
;
237 cache_options
.capacity
= 2048;
238 cache_options
.num_shard_bits
= 0;
239 cache_options
.metadata_charge_policy
= kDontChargeCacheMetadata
;
241 options
.blob_cache
= NewLRUCache(cache_options
);
242 options
.enable_blob_files
= true;
243 options
.min_blob_size
= min_blob_size
;
247 // Put then iterate over three key-values. The second value is below the size
248 // limit and is thus stored inline; the other two are stored separately as
249 // blobs. We expect to have something pinned in the cache iff we are
250 // positioned on a blob.
252 constexpr char first_key
[] = "first_key";
253 constexpr char first_value
[] = "long_value";
254 static_assert(sizeof(first_value
) - 1 >= min_blob_size
,
255 "first_value too short to be stored as blob");
257 ASSERT_OK(Put(first_key
, first_value
));
259 constexpr char second_key
[] = "second_key";
260 constexpr char second_value
[] = "short";
261 static_assert(sizeof(second_value
) - 1 < min_blob_size
,
262 "second_value too long to be inlined");
264 ASSERT_OK(Put(second_key
, second_value
));
266 constexpr char third_key
[] = "third_key";
267 constexpr char third_value
[] = "other_long_value";
268 static_assert(sizeof(third_value
) - 1 >= min_blob_size
,
269 "third_value too short to be stored as blob");
271 ASSERT_OK(Put(third_key
, third_value
));
276 ReadOptions read_options
;
277 read_options
.fill_cache
= true;
279 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
282 ASSERT_TRUE(iter
->Valid());
283 ASSERT_OK(iter
->status());
284 ASSERT_EQ(iter
->key(), first_key
);
285 ASSERT_EQ(iter
->value(), first_value
);
288 ASSERT_TRUE(iter
->Valid());
289 ASSERT_OK(iter
->status());
290 ASSERT_EQ(iter
->key(), second_key
);
291 ASSERT_EQ(iter
->value(), second_value
);
294 ASSERT_TRUE(iter
->Valid());
295 ASSERT_OK(iter
->status());
296 ASSERT_EQ(iter
->key(), third_key
);
297 ASSERT_EQ(iter
->value(), third_value
);
300 ASSERT_FALSE(iter
->Valid());
301 ASSERT_OK(iter
->status());
305 ReadOptions read_options
;
306 read_options
.fill_cache
= false;
307 read_options
.read_tier
= kBlockCacheTier
;
309 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
312 ASSERT_TRUE(iter
->Valid());
313 ASSERT_OK(iter
->status());
314 ASSERT_EQ(iter
->key(), first_key
);
315 ASSERT_EQ(iter
->value(), first_value
);
316 ASSERT_GT(options
.blob_cache
->GetPinnedUsage(), 0);
319 ASSERT_TRUE(iter
->Valid());
320 ASSERT_OK(iter
->status());
321 ASSERT_EQ(iter
->key(), second_key
);
322 ASSERT_EQ(iter
->value(), second_value
);
323 ASSERT_EQ(options
.blob_cache
->GetPinnedUsage(), 0);
326 ASSERT_TRUE(iter
->Valid());
327 ASSERT_OK(iter
->status());
328 ASSERT_EQ(iter
->key(), third_key
);
329 ASSERT_EQ(iter
->value(), third_value
);
330 ASSERT_GT(options
.blob_cache
->GetPinnedUsage(), 0);
333 ASSERT_FALSE(iter
->Valid());
334 ASSERT_OK(iter
->status());
335 ASSERT_EQ(options
.blob_cache
->GetPinnedUsage(), 0);
339 ReadOptions read_options
;
340 read_options
.fill_cache
= false;
341 read_options
.read_tier
= kBlockCacheTier
;
343 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
346 ASSERT_TRUE(iter
->Valid());
347 ASSERT_OK(iter
->status());
348 ASSERT_EQ(iter
->key(), third_key
);
349 ASSERT_EQ(iter
->value(), third_value
);
350 ASSERT_GT(options
.blob_cache
->GetPinnedUsage(), 0);
353 ASSERT_TRUE(iter
->Valid());
354 ASSERT_OK(iter
->status());
355 ASSERT_EQ(iter
->key(), second_key
);
356 ASSERT_EQ(iter
->value(), second_value
);
357 ASSERT_EQ(options
.blob_cache
->GetPinnedUsage(), 0);
360 ASSERT_TRUE(iter
->Valid());
361 ASSERT_OK(iter
->status());
362 ASSERT_EQ(iter
->key(), first_key
);
363 ASSERT_EQ(iter
->value(), first_value
);
364 ASSERT_GT(options
.blob_cache
->GetPinnedUsage(), 0);
367 ASSERT_FALSE(iter
->Valid());
368 ASSERT_OK(iter
->status());
369 ASSERT_EQ(options
.blob_cache
->GetPinnedUsage(), 0);
373 TEST_F(DBBlobBasicTest
, MultiGetBlobs
) {
374 constexpr size_t min_blob_size
= 6;
376 Options options
= GetDefaultOptions();
377 options
.enable_blob_files
= true;
378 options
.min_blob_size
= min_blob_size
;
382 // Put then retrieve three key-values. The first value is below the size limit
383 // and is thus stored inline; the other two are stored separately as blobs.
384 constexpr size_t num_keys
= 3;
386 constexpr char first_key
[] = "first_key";
387 constexpr char first_value
[] = "short";
388 static_assert(sizeof(first_value
) - 1 < min_blob_size
,
389 "first_value too long to be inlined");
391 ASSERT_OK(Put(first_key
, first_value
));
393 constexpr char second_key
[] = "second_key";
394 constexpr char second_value
[] = "long_value";
395 static_assert(sizeof(second_value
) - 1 >= min_blob_size
,
396 "second_value too short to be stored as blob");
398 ASSERT_OK(Put(second_key
, second_value
));
400 constexpr char third_key
[] = "third_key";
401 constexpr char third_value
[] = "other_long_value";
402 static_assert(sizeof(third_value
) - 1 >= min_blob_size
,
403 "third_value too short to be stored as blob");
405 ASSERT_OK(Put(third_key
, third_value
));
409 ReadOptions read_options
;
411 std::array
<Slice
, num_keys
> keys
{{first_key
, second_key
, third_key
}};
414 std::array
<PinnableSlice
, num_keys
> values
;
415 std::array
<Status
, num_keys
> statuses
;
417 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
418 &values
[0], &statuses
[0]);
420 ASSERT_OK(statuses
[0]);
421 ASSERT_EQ(values
[0], first_value
);
423 ASSERT_OK(statuses
[1]);
424 ASSERT_EQ(values
[1], second_value
);
426 ASSERT_OK(statuses
[2]);
427 ASSERT_EQ(values
[2], third_value
);
430 // Try again with no I/O allowed. The table and the necessary blocks should
431 // already be in their respective caches. The first (inlined) value should be
432 // successfully read; however, the two blob values could only be read from the
433 // blob file, so for those the read should return Incomplete.
434 read_options
.read_tier
= kBlockCacheTier
;
437 std::array
<PinnableSlice
, num_keys
> values
;
438 std::array
<Status
, num_keys
> statuses
;
440 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
441 &values
[0], &statuses
[0]);
443 ASSERT_OK(statuses
[0]);
444 ASSERT_EQ(values
[0], first_value
);
446 ASSERT_TRUE(statuses
[1].IsIncomplete());
448 ASSERT_TRUE(statuses
[2].IsIncomplete());
452 TEST_F(DBBlobBasicTest
, MultiGetBlobsFromCache
) {
453 Options options
= GetDefaultOptions();
456 co
.capacity
= 2 << 20; // 2MB
457 co
.num_shard_bits
= 2;
458 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
459 auto backing_cache
= NewLRUCache(co
);
461 constexpr size_t min_blob_size
= 6;
462 options
.min_blob_size
= min_blob_size
;
463 options
.create_if_missing
= true;
464 options
.enable_blob_files
= true;
465 options
.blob_cache
= backing_cache
;
467 BlockBasedTableOptions block_based_options
;
468 block_based_options
.no_block_cache
= false;
469 block_based_options
.block_cache
= backing_cache
;
470 block_based_options
.cache_index_and_filter_blocks
= true;
471 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
473 DestroyAndReopen(options
);
475 // Put then retrieve three key-values. The first value is below the size limit
476 // and is thus stored inline; the other two are stored separately as blobs.
477 constexpr size_t num_keys
= 3;
479 constexpr char first_key
[] = "first_key";
480 constexpr char first_value
[] = "short";
481 static_assert(sizeof(first_value
) - 1 < min_blob_size
,
482 "first_value too long to be inlined");
484 ASSERT_OK(Put(first_key
, first_value
));
486 constexpr char second_key
[] = "second_key";
487 constexpr char second_value
[] = "long_value";
488 static_assert(sizeof(second_value
) - 1 >= min_blob_size
,
489 "second_value too short to be stored as blob");
491 ASSERT_OK(Put(second_key
, second_value
));
493 constexpr char third_key
[] = "third_key";
494 constexpr char third_value
[] = "other_long_value";
495 static_assert(sizeof(third_value
) - 1 >= min_blob_size
,
496 "third_value too short to be stored as blob");
498 ASSERT_OK(Put(third_key
, third_value
));
502 ReadOptions read_options
;
503 read_options
.fill_cache
= false;
505 std::array
<Slice
, num_keys
> keys
{{first_key
, second_key
, third_key
}};
508 std::array
<PinnableSlice
, num_keys
> values
;
509 std::array
<Status
, num_keys
> statuses
;
511 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
512 &values
[0], &statuses
[0]);
514 ASSERT_OK(statuses
[0]);
515 ASSERT_EQ(values
[0], first_value
);
517 ASSERT_OK(statuses
[1]);
518 ASSERT_EQ(values
[1], second_value
);
520 ASSERT_OK(statuses
[2]);
521 ASSERT_EQ(values
[2], third_value
);
524 // Try again with no I/O allowed. The first (inlined) value should be
525 // successfully read; however, the two blob values could only be read from the
526 // blob file, so for those the read should return Incomplete.
527 read_options
.read_tier
= kBlockCacheTier
;
530 std::array
<PinnableSlice
, num_keys
> values
;
531 std::array
<Status
, num_keys
> statuses
;
533 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
534 &values
[0], &statuses
[0]);
536 ASSERT_OK(statuses
[0]);
537 ASSERT_EQ(values
[0], first_value
);
539 ASSERT_TRUE(statuses
[1].IsIncomplete());
541 ASSERT_TRUE(statuses
[2].IsIncomplete());
544 // Fill the cache when reading blobs from the blob file.
545 read_options
.read_tier
= kReadAllTier
;
546 read_options
.fill_cache
= true;
549 std::array
<PinnableSlice
, num_keys
> values
;
550 std::array
<Status
, num_keys
> statuses
;
552 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
553 &values
[0], &statuses
[0]);
555 ASSERT_OK(statuses
[0]);
556 ASSERT_EQ(values
[0], first_value
);
558 ASSERT_OK(statuses
[1]);
559 ASSERT_EQ(values
[1], second_value
);
561 ASSERT_OK(statuses
[2]);
562 ASSERT_EQ(values
[2], third_value
);
565 // Try again with no I/O allowed. All blobs should be successfully read from
567 read_options
.read_tier
= kBlockCacheTier
;
570 std::array
<PinnableSlice
, num_keys
> values
;
571 std::array
<Status
, num_keys
> statuses
;
573 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
574 &values
[0], &statuses
[0]);
576 ASSERT_OK(statuses
[0]);
577 ASSERT_EQ(values
[0], first_value
);
579 ASSERT_OK(statuses
[1]);
580 ASSERT_EQ(values
[1], second_value
);
582 ASSERT_OK(statuses
[2]);
583 ASSERT_EQ(values
[2], third_value
);
588 TEST_F(DBBlobBasicTest
, MultiGetWithDirectIO
) {
589 Options options
= GetDefaultOptions();
591 // First, create an external SST file ["b"].
592 const std::string file_path
= dbname_
+ "/test.sst";
594 SstFileWriter
sst_file_writer(EnvOptions(), GetDefaultOptions());
595 Status s
= sst_file_writer
.Open(file_path
);
597 ASSERT_OK(sst_file_writer
.Put("b", "b_value"));
598 ASSERT_OK(sst_file_writer
.Finish());
601 options
.enable_blob_files
= true;
602 options
.min_blob_size
= 1000;
603 options
.use_direct_reads
= true;
604 options
.allow_ingest_behind
= true;
606 // Open DB with fixed-prefix sst-partitioner so that compaction will cut
607 // new table file when encountering a new key whose 1-byte prefix changes.
608 constexpr size_t key_len
= 1;
609 options
.sst_partitioner_factory
=
610 NewSstPartitionerFixedPrefixFactory(key_len
);
612 Status s
= TryReopen(options
);
613 if (s
.IsInvalidArgument()) {
614 ROCKSDB_GTEST_SKIP("This test requires direct IO support");
619 constexpr size_t num_keys
= 3;
620 constexpr size_t blob_size
= 3000;
622 constexpr char first_key
[] = "a";
623 const std::string
first_blob(blob_size
, 'a');
624 ASSERT_OK(Put(first_key
, first_blob
));
626 constexpr char second_key
[] = "b";
627 const std::string
second_blob(2 * blob_size
, 'b');
628 ASSERT_OK(Put(second_key
, second_blob
));
630 constexpr char third_key
[] = "d";
631 const std::string
third_blob(blob_size
, 'd');
632 ASSERT_OK(Put(third_key
, third_blob
));
634 // first_blob, second_blob and third_blob in the same blob file.
636 // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'|
639 // | | +---------|-------|--------+
640 // | +-----------------|-------+
641 // +-------------------------+
644 constexpr char fourth_key
[] = "c";
645 const std::string
fourth_blob(blob_size
, 'c');
646 ASSERT_OK(Put(fourth_key
, fourth_blob
));
647 // fourth_blob in another blob file.
648 // SST Blob file SST Blob file
649 // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| ["c"] |'cccc'|
652 // | | +---------|-------|--------+ +-------+
653 // | +-----------------|-------+
654 // +-------------------------+
657 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
660 // Due to the above sst partitioner, we get 4 L1 files. The blob files are
662 // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
666 // L1 ["a"] ["b"] ["c"] | | ["d"] |
668 // | | +---------|-------|---------------+
669 // | +-----------------|-------+
670 // +-------------------------+
671 ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
674 // Ingest the external SST file into bottommost level.
675 std::vector
<std::string
> ext_files
{file_path
};
676 IngestExternalFileOptions opts
;
677 opts
.ingest_behind
= true;
679 db_
->IngestExternalFile(db_
->DefaultColumnFamily(), ext_files
, opts
));
682 // Now the database becomes as follows.
683 // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
687 // L1 ["a"] ["b"] ["c"] | | ["d"] |
689 // | | +---------|-------|---------------+
690 // | +-----------------|-------+
691 // +-------------------------+
696 // Compact ["b"] to bottommost level.
697 Slice begin
= Slice(second_key
);
698 Slice end
= Slice(second_key
);
699 CompactRangeOptions cro
;
700 cro
.bottommost_level_compaction
= BottommostLevelCompaction::kForce
;
701 ASSERT_OK(db_
->CompactRange(cro
, &begin
, &end
));
704 // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
708 // L1 ["a"] ["c"] | | ["d"] |
710 // | +---------|-------|---------------+
711 // | +-----------------|-------+
712 // +-------|-----------------+
715 ASSERT_EQ(3, NumTableFilesAtLevel(/*level=*/1));
716 ASSERT_EQ(1, NumTableFilesAtLevel(/*level=*/6));
719 SyncPoint::GetInstance()->ClearAllCallBacks();
720 SyncPoint::GetInstance()->SetCallBack(
721 "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* arg
) {
722 auto* aligned_reqs
= static_cast<std::vector
<FSReadRequest
>*>(arg
);
723 assert(aligned_reqs
);
724 ASSERT_EQ(1, aligned_reqs
->size());
727 SyncPoint::GetInstance()->EnableProcessing();
729 std::array
<Slice
, num_keys
> keys
{{first_key
, third_key
, second_key
}};
732 std::array
<PinnableSlice
, num_keys
> values
;
733 std::array
<Status
, num_keys
> statuses
;
735 // The MultiGet(), when constructing the KeyContexts, will process the keys
736 // in such order: a, d, b. The reason is that ["a"] and ["d"] are in L1,
737 // while ["b"] resides in L6.
738 // Consequently, the original FSReadRequest list prepared by
739 // Version::MultiGetblob() will be for "a", "d" and "b". It is unsorted as
742 // ["a", offset=30, len=3033],
743 // ["d", offset=9096, len=3033],
744 // ["b", offset=3063, len=6033]
746 // If we do not sort them before calling MultiRead() in DirectIO, then the
747 // underlying IO merging logic will yield two requests.
749 // [offset=0, len=4096] (for "a")
750 // [offset=0, len=12288] (result of merging the request for "d" and "b")
752 // We need to sort them in Version::MultiGetBlob() so that the underlying
753 // IO merging logic in DirectIO mode works as expected. The correct
754 // behavior will be one aligned request:
756 // [offset=0, len=12288]
758 db_
->MultiGet(ReadOptions(), db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
759 &values
[0], &statuses
[0]);
761 SyncPoint::GetInstance()->DisableProcessing();
762 SyncPoint::GetInstance()->ClearAllCallBacks();
766 ASSERT_OK(statuses
[0]);
767 ASSERT_EQ(values
[0], first_blob
);
769 ASSERT_OK(statuses
[1]);
770 ASSERT_EQ(values
[1], third_blob
);
772 ASSERT_OK(statuses
[2]);
773 ASSERT_EQ(values
[2], second_blob
);
776 #endif // !ROCKSDB_LITE
778 TEST_F(DBBlobBasicTest
, MultiGetBlobsFromMultipleFiles
) {
779 Options options
= GetDefaultOptions();
782 co
.capacity
= 2 << 20; // 2MB
783 co
.num_shard_bits
= 2;
784 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
785 auto backing_cache
= NewLRUCache(co
);
787 options
.min_blob_size
= 0;
788 options
.create_if_missing
= true;
789 options
.enable_blob_files
= true;
790 options
.blob_cache
= backing_cache
;
792 BlockBasedTableOptions block_based_options
;
793 block_based_options
.no_block_cache
= false;
794 block_based_options
.block_cache
= backing_cache
;
795 block_based_options
.cache_index_and_filter_blocks
= true;
796 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
800 constexpr size_t kNumBlobFiles
= 3;
801 constexpr size_t kNumBlobsPerFile
= 3;
802 constexpr size_t kNumKeys
= kNumBlobsPerFile
* kNumBlobFiles
;
804 std::vector
<std::string
> key_strs
;
805 std::vector
<std::string
> value_strs
;
806 for (size_t i
= 0; i
< kNumBlobFiles
; ++i
) {
807 for (size_t j
= 0; j
< kNumBlobsPerFile
; ++j
) {
808 std::string key
= "key" + std::to_string(i
) + "_" + std::to_string(j
);
810 "value_as_blob" + std::to_string(i
) + "_" + std::to_string(j
);
811 ASSERT_OK(Put(key
, value
));
812 key_strs
.push_back(key
);
813 value_strs
.push_back(value
);
817 assert(key_strs
.size() == kNumKeys
);
818 std::array
<Slice
, kNumKeys
> keys
;
819 for (size_t i
= 0; i
< keys
.size(); ++i
) {
820 keys
[i
] = key_strs
[i
];
823 ReadOptions read_options
;
824 read_options
.read_tier
= kReadAllTier
;
825 read_options
.fill_cache
= false;
828 std::array
<PinnableSlice
, kNumKeys
> values
;
829 std::array
<Status
, kNumKeys
> statuses
;
830 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), kNumKeys
, &keys
[0],
831 &values
[0], &statuses
[0]);
833 for (size_t i
= 0; i
< kNumKeys
; ++i
) {
834 ASSERT_OK(statuses
[i
]);
835 ASSERT_EQ(value_strs
[i
], values
[i
]);
839 read_options
.read_tier
= kBlockCacheTier
;
842 std::array
<PinnableSlice
, kNumKeys
> values
;
843 std::array
<Status
, kNumKeys
> statuses
;
844 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), kNumKeys
, &keys
[0],
845 &values
[0], &statuses
[0]);
847 for (size_t i
= 0; i
< kNumKeys
; ++i
) {
848 ASSERT_TRUE(statuses
[i
].IsIncomplete());
849 ASSERT_TRUE(values
[i
].empty());
853 read_options
.read_tier
= kReadAllTier
;
854 read_options
.fill_cache
= true;
857 std::array
<PinnableSlice
, kNumKeys
> values
;
858 std::array
<Status
, kNumKeys
> statuses
;
859 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), kNumKeys
, &keys
[0],
860 &values
[0], &statuses
[0]);
862 for (size_t i
= 0; i
< kNumKeys
; ++i
) {
863 ASSERT_OK(statuses
[i
]);
864 ASSERT_EQ(value_strs
[i
], values
[i
]);
868 read_options
.read_tier
= kBlockCacheTier
;
871 std::array
<PinnableSlice
, kNumKeys
> values
;
872 std::array
<Status
, kNumKeys
> statuses
;
873 db_
->MultiGet(read_options
, db_
->DefaultColumnFamily(), kNumKeys
, &keys
[0],
874 &values
[0], &statuses
[0]);
876 for (size_t i
= 0; i
< kNumKeys
; ++i
) {
877 ASSERT_OK(statuses
[i
]);
878 ASSERT_EQ(value_strs
[i
], values
[i
]);
883 TEST_F(DBBlobBasicTest
, GetBlob_CorruptIndex
) {
884 Options options
= GetDefaultOptions();
885 options
.enable_blob_files
= true;
886 options
.min_blob_size
= 0;
890 constexpr char key
[] = "key";
891 constexpr char blob
[] = "blob";
893 ASSERT_OK(Put(key
, blob
));
896 SyncPoint::GetInstance()->SetCallBack(
897 "Version::Get::TamperWithBlobIndex", [](void* arg
) {
898 Slice
* const blob_index
= static_cast<Slice
*>(arg
);
900 assert(!blob_index
->empty());
901 blob_index
->remove_prefix(1);
903 SyncPoint::GetInstance()->EnableProcessing();
905 PinnableSlice result
;
906 ASSERT_TRUE(db_
->Get(ReadOptions(), db_
->DefaultColumnFamily(), key
, &result
)
909 SyncPoint::GetInstance()->DisableProcessing();
910 SyncPoint::GetInstance()->ClearAllCallBacks();
913 TEST_F(DBBlobBasicTest
, MultiGetBlob_CorruptIndex
) {
914 Options options
= GetDefaultOptions();
915 options
.enable_blob_files
= true;
916 options
.min_blob_size
= 0;
917 options
.create_if_missing
= true;
919 DestroyAndReopen(options
);
921 constexpr size_t kNumOfKeys
= 3;
922 std::array
<std::string
, kNumOfKeys
> key_strs
;
923 std::array
<std::string
, kNumOfKeys
> value_strs
;
924 std::array
<Slice
, kNumOfKeys
+ 1> keys
;
925 for (size_t i
= 0; i
< kNumOfKeys
; ++i
) {
926 key_strs
[i
] = "foo" + std::to_string(i
);
927 value_strs
[i
] = "blob_value" + std::to_string(i
);
928 ASSERT_OK(Put(key_strs
[i
], value_strs
[i
]));
929 keys
[i
] = key_strs
[i
];
932 constexpr char key
[] = "key";
933 constexpr char blob
[] = "blob";
934 ASSERT_OK(Put(key
, blob
));
935 keys
[kNumOfKeys
] = key
;
939 SyncPoint::GetInstance()->SetCallBack(
940 "Version::MultiGet::TamperWithBlobIndex", [&key
](void* arg
) {
941 KeyContext
* const key_context
= static_cast<KeyContext
*>(arg
);
943 assert(key_context
->key
);
945 if (*(key_context
->key
) == key
) {
946 Slice
* const blob_index
= key_context
->value
;
948 assert(!blob_index
->empty());
949 blob_index
->remove_prefix(1);
952 SyncPoint::GetInstance()->EnableProcessing();
954 std::array
<PinnableSlice
, kNumOfKeys
+ 1> values
;
955 std::array
<Status
, kNumOfKeys
+ 1> statuses
;
956 db_
->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys
+ 1,
957 keys
.data(), values
.data(), statuses
.data(),
958 /*sorted_input=*/false);
959 for (size_t i
= 0; i
< kNumOfKeys
+ 1; ++i
) {
960 if (i
!= kNumOfKeys
) {
961 ASSERT_OK(statuses
[i
]);
962 ASSERT_EQ("blob_value" + std::to_string(i
), values
[i
]);
964 ASSERT_TRUE(statuses
[i
].IsCorruption());
968 SyncPoint::GetInstance()->DisableProcessing();
969 SyncPoint::GetInstance()->ClearAllCallBacks();
972 TEST_F(DBBlobBasicTest
, MultiGetBlob_ExceedSoftLimit
) {
973 Options options
= GetDefaultOptions();
974 options
.enable_blob_files
= true;
975 options
.min_blob_size
= 0;
979 constexpr size_t kNumOfKeys
= 3;
980 std::array
<std::string
, kNumOfKeys
> key_bufs
;
981 std::array
<std::string
, kNumOfKeys
> value_bufs
;
982 std::array
<Slice
, kNumOfKeys
> keys
;
983 for (size_t i
= 0; i
< kNumOfKeys
; ++i
) {
984 key_bufs
[i
] = "foo" + std::to_string(i
);
985 value_bufs
[i
] = "blob_value" + std::to_string(i
);
986 ASSERT_OK(Put(key_bufs
[i
], value_bufs
[i
]));
987 keys
[i
] = key_bufs
[i
];
991 std::array
<PinnableSlice
, kNumOfKeys
> values
;
992 std::array
<Status
, kNumOfKeys
> statuses
;
993 ReadOptions read_opts
;
994 read_opts
.value_size_soft_limit
= 1;
995 db_
->MultiGet(read_opts
, dbfull()->DefaultColumnFamily(), kNumOfKeys
,
996 keys
.data(), values
.data(), statuses
.data(),
997 /*sorted_input=*/true);
998 for (const auto& s
: statuses
) {
999 ASSERT_TRUE(s
.IsAborted());
1003 TEST_F(DBBlobBasicTest
, GetBlob_InlinedTTLIndex
) {
1004 constexpr uint64_t min_blob_size
= 10;
1006 Options options
= GetDefaultOptions();
1007 options
.enable_blob_files
= true;
1008 options
.min_blob_size
= min_blob_size
;
1012 constexpr char key
[] = "key";
1013 constexpr char blob
[] = "short";
1014 static_assert(sizeof(short) - 1 < min_blob_size
,
1015 "Blob too long to be inlined");
1017 // Fake an inlined TTL blob index.
1018 std::string blob_index
;
1020 constexpr uint64_t expiration
= 1234567890;
1022 BlobIndex::EncodeInlinedTTL(&blob_index
, expiration
, blob
);
1025 ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch
, 0, key
, blob_index
));
1026 ASSERT_OK(db_
->Write(WriteOptions(), &batch
));
1030 PinnableSlice result
;
1031 ASSERT_TRUE(db_
->Get(ReadOptions(), db_
->DefaultColumnFamily(), key
, &result
)
1035 TEST_F(DBBlobBasicTest
, GetBlob_IndexWithInvalidFileNumber
) {
1036 Options options
= GetDefaultOptions();
1037 options
.enable_blob_files
= true;
1038 options
.min_blob_size
= 0;
1042 constexpr char key
[] = "key";
1044 // Fake a blob index referencing a non-existent blob file.
1045 std::string blob_index
;
1047 constexpr uint64_t blob_file_number
= 1000;
1048 constexpr uint64_t offset
= 1234;
1049 constexpr uint64_t size
= 5678;
1051 BlobIndex::EncodeBlob(&blob_index
, blob_file_number
, offset
, size
,
1055 ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch
, 0, key
, blob_index
));
1056 ASSERT_OK(db_
->Write(WriteOptions(), &batch
));
1060 PinnableSlice result
;
1061 ASSERT_TRUE(db_
->Get(ReadOptions(), db_
->DefaultColumnFamily(), key
, &result
)
1065 #ifndef ROCKSDB_LITE
1066 TEST_F(DBBlobBasicTest
, GenerateIOTracing
) {
1067 Options options
= GetDefaultOptions();
1068 options
.enable_blob_files
= true;
1069 options
.min_blob_size
= 0;
1070 std::string trace_file
= dbname_
+ "/io_trace_file";
1074 // Create IO trace file
1075 std::unique_ptr
<TraceWriter
> trace_writer
;
1077 NewFileTraceWriter(env_
, EnvOptions(), trace_file
, &trace_writer
));
1078 ASSERT_OK(db_
->StartIOTrace(TraceOptions(), std::move(trace_writer
)));
1080 constexpr char key
[] = "key";
1081 constexpr char blob_value
[] = "blob_value";
1083 ASSERT_OK(Put(key
, blob_value
));
1085 ASSERT_EQ(Get(key
), blob_value
);
1087 ASSERT_OK(db_
->EndIOTrace());
1088 ASSERT_OK(env_
->FileExists(trace_file
));
1091 // Parse trace file to check file operations related to blob files are
1093 std::unique_ptr
<TraceReader
> trace_reader
;
1095 NewFileTraceReader(env_
, EnvOptions(), trace_file
, &trace_reader
));
1096 IOTraceReader
reader(std::move(trace_reader
));
1098 IOTraceHeader header
;
1099 ASSERT_OK(reader
.ReadHeader(&header
));
1100 ASSERT_EQ(kMajorVersion
, static_cast<int>(header
.rocksdb_major_version
));
1101 ASSERT_EQ(kMinorVersion
, static_cast<int>(header
.rocksdb_minor_version
));
1104 int blob_files_op_count
= 0;
1107 IOTraceRecord record
;
1108 status
= reader
.ReadIOOp(&record
);
1112 if (record
.file_name
.find("blob") != std::string::npos
) {
1113 blob_files_op_count
++;
1116 // Assuming blob files will have Append, Close and then Read operations.
1117 ASSERT_GT(blob_files_op_count
, 2);
1120 #endif // !ROCKSDB_LITE
1122 TEST_F(DBBlobBasicTest
, BestEffortsRecovery_MissingNewestBlobFile
) {
1123 Options options
= GetDefaultOptions();
1124 options
.enable_blob_files
= true;
1125 options
.min_blob_size
= 0;
1126 options
.create_if_missing
= true;
1129 ASSERT_OK(dbfull()->DisableFileDeletions());
1130 constexpr int kNumTableFiles
= 2;
1131 for (int i
= 0; i
< kNumTableFiles
; ++i
) {
1132 for (char ch
= 'a'; ch
!= 'c'; ++ch
) {
1133 std::string
key(1, ch
);
1134 ASSERT_OK(Put(key
, "value" + std::to_string(i
)));
1141 std::vector
<std::string
> files
;
1142 ASSERT_OK(env_
->GetChildren(dbname_
, &files
));
1143 std::string blob_file_path
;
1144 uint64_t max_blob_file_num
= kInvalidBlobFileNumber
;
1145 for (const auto& fname
: files
) {
1146 uint64_t file_num
= 0;
1148 if (ParseFileName(fname
, &file_num
, /*info_log_name_prefix=*/"", &type
) &&
1149 type
== kBlobFile
) {
1150 if (file_num
> max_blob_file_num
) {
1151 max_blob_file_num
= file_num
;
1152 blob_file_path
= dbname_
+ "/" + fname
;
1156 ASSERT_OK(env_
->DeleteFile(blob_file_path
));
1158 options
.best_efforts_recovery
= true;
1161 ASSERT_OK(db_
->Get(ReadOptions(), "a", &value
));
1162 ASSERT_EQ("value" + std::to_string(kNumTableFiles
- 2), value
);
1165 TEST_F(DBBlobBasicTest
, GetMergeBlobWithPut
) {
1166 Options options
= GetDefaultOptions();
1167 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1168 options
.enable_blob_files
= true;
1169 options
.min_blob_size
= 0;
1173 ASSERT_OK(Put("Key1", "v1"));
1175 ASSERT_OK(Merge("Key1", "v2"));
1177 ASSERT_OK(Merge("Key1", "v3"));
1181 ASSERT_OK(db_
->Get(ReadOptions(), "Key1", &value
));
1182 ASSERT_EQ(Get("Key1"), "v1,v2,v3");
1185 TEST_F(DBBlobBasicTest
, MultiGetMergeBlobWithPut
) {
1186 constexpr size_t num_keys
= 3;
1188 Options options
= GetDefaultOptions();
1189 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1190 options
.enable_blob_files
= true;
1191 options
.min_blob_size
= 0;
1195 ASSERT_OK(Put("Key0", "v0_0"));
1196 ASSERT_OK(Put("Key1", "v1_0"));
1197 ASSERT_OK(Put("Key2", "v2_0"));
1199 ASSERT_OK(Merge("Key0", "v0_1"));
1200 ASSERT_OK(Merge("Key1", "v1_1"));
1202 ASSERT_OK(Merge("Key0", "v0_2"));
1205 std::array
<Slice
, num_keys
> keys
{{"Key0", "Key1", "Key2"}};
1206 std::array
<PinnableSlice
, num_keys
> values
;
1207 std::array
<Status
, num_keys
> statuses
;
1209 db_
->MultiGet(ReadOptions(), db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
1210 &values
[0], &statuses
[0]);
1212 ASSERT_OK(statuses
[0]);
1213 ASSERT_EQ(values
[0], "v0_0,v0_1,v0_2");
1215 ASSERT_OK(statuses
[1]);
1216 ASSERT_EQ(values
[1], "v1_0,v1_1");
1218 ASSERT_OK(statuses
[2]);
1219 ASSERT_EQ(values
[2], "v2_0");
1222 #ifndef ROCKSDB_LITE
1223 TEST_F(DBBlobBasicTest
, Properties
) {
1224 Options options
= GetDefaultOptions();
1225 options
.enable_blob_files
= true;
1226 options
.min_blob_size
= 0;
1230 constexpr char key1
[] = "key1";
1231 constexpr size_t key1_size
= sizeof(key1
) - 1;
1233 constexpr char key2
[] = "key2";
1234 constexpr size_t key2_size
= sizeof(key2
) - 1;
1236 constexpr char key3
[] = "key3";
1237 constexpr size_t key3_size
= sizeof(key3
) - 1;
1239 constexpr char blob
[] = "00000000000000";
1240 constexpr size_t blob_size
= sizeof(blob
) - 1;
1242 constexpr char longer_blob
[] = "00000000000000000000";
1243 constexpr size_t longer_blob_size
= sizeof(longer_blob
) - 1;
1245 ASSERT_OK(Put(key1
, blob
));
1246 ASSERT_OK(Put(key2
, longer_blob
));
1249 constexpr size_t first_blob_file_expected_size
=
1250 BlobLogHeader::kSize
+
1251 BlobLogRecord::CalculateAdjustmentForRecordHeader(key1_size
) + blob_size
+
1252 BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size
) +
1253 longer_blob_size
+ BlobLogFooter::kSize
;
1255 ASSERT_OK(Put(key3
, blob
));
1258 constexpr size_t second_blob_file_expected_size
=
1259 BlobLogHeader::kSize
+
1260 BlobLogRecord::CalculateAdjustmentForRecordHeader(key3_size
) + blob_size
+
1261 BlobLogFooter::kSize
;
1263 constexpr size_t total_expected_size
=
1264 first_blob_file_expected_size
+ second_blob_file_expected_size
;
1266 // Number of blob files
1267 uint64_t num_blob_files
= 0;
1269 db_
->GetIntProperty(DB::Properties::kNumBlobFiles
, &num_blob_files
));
1270 ASSERT_EQ(num_blob_files
, 2);
1272 // Total size of live blob files
1273 uint64_t live_blob_file_size
= 0;
1274 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kLiveBlobFileSize
,
1275 &live_blob_file_size
));
1276 ASSERT_EQ(live_blob_file_size
, total_expected_size
);
1278 // Total amount of garbage in live blob files
1280 uint64_t live_blob_file_garbage_size
= 0;
1281 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize
,
1282 &live_blob_file_garbage_size
));
1283 ASSERT_EQ(live_blob_file_garbage_size
, 0);
1286 // Total size of all blob files across all versions
1287 // Note: this should be the same as above since we only have one
1288 // version at this point.
1289 uint64_t total_blob_file_size
= 0;
1290 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kTotalBlobFileSize
,
1291 &total_blob_file_size
));
1292 ASSERT_EQ(total_blob_file_size
, total_expected_size
);
1294 // Delete key2 to create some garbage
1295 ASSERT_OK(Delete(key2
));
1298 constexpr Slice
* begin
= nullptr;
1299 constexpr Slice
* end
= nullptr;
1300 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), begin
, end
));
1302 constexpr size_t expected_garbage_size
=
1303 BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size
) +
1306 constexpr double expected_space_amp
=
1307 static_cast<double>(total_expected_size
) /
1308 (total_expected_size
- expected_garbage_size
);
1311 std::string blob_stats
;
1312 ASSERT_TRUE(db_
->GetProperty(DB::Properties::kBlobStats
, &blob_stats
));
1314 std::ostringstream oss
;
1315 oss
<< "Number of blob files: 2\nTotal size of blob files: "
1316 << total_expected_size
1317 << "\nTotal size of garbage in blob files: " << expected_garbage_size
1318 << "\nBlob file space amplification: " << expected_space_amp
<< '\n';
1320 ASSERT_EQ(blob_stats
, oss
.str());
1322 // Total amount of garbage in live blob files
1324 uint64_t live_blob_file_garbage_size
= 0;
1325 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize
,
1326 &live_blob_file_garbage_size
));
1327 ASSERT_EQ(live_blob_file_garbage_size
, expected_garbage_size
);
1331 TEST_F(DBBlobBasicTest
, PropertiesMultiVersion
) {
1332 Options options
= GetDefaultOptions();
1333 options
.enable_blob_files
= true;
1334 options
.min_blob_size
= 0;
1338 constexpr char key1
[] = "key1";
1339 constexpr char key2
[] = "key2";
1340 constexpr char key3
[] = "key3";
1342 constexpr size_t key_size
= sizeof(key1
) - 1;
1343 static_assert(sizeof(key2
) - 1 == key_size
, "unexpected size: key2");
1344 static_assert(sizeof(key3
) - 1 == key_size
, "unexpected size: key3");
1346 constexpr char blob
[] = "0000000000";
1347 constexpr size_t blob_size
= sizeof(blob
) - 1;
1349 ASSERT_OK(Put(key1
, blob
));
1352 ASSERT_OK(Put(key2
, blob
));
1355 // Create an iterator to keep the current version alive
1356 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(ReadOptions()));
1357 ASSERT_OK(iter
->status());
1359 // Note: the Delete and subsequent compaction results in the first blob file
1360 // not making it to the final version. (It is still part of the previous
1361 // version kept alive by the iterator though.) On the other hand, the Put
1362 // results in a third blob file.
1363 ASSERT_OK(Delete(key1
));
1364 ASSERT_OK(Put(key3
, blob
));
1367 constexpr Slice
* begin
= nullptr;
1368 constexpr Slice
* end
= nullptr;
1369 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), begin
, end
));
1371 // Total size of all blob files across all versions: between the two versions,
1372 // we should have three blob files of the same size with one blob each.
1373 // The version kept alive by the iterator contains the first and the second
1374 // blob file, while the final version contains the second and the third blob
1375 // file. (The second blob file is thus shared by the two versions but should
1376 // be counted only once.)
1377 uint64_t total_blob_file_size
= 0;
1378 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kTotalBlobFileSize
,
1379 &total_blob_file_size
));
1380 ASSERT_EQ(total_blob_file_size
,
1381 3 * (BlobLogHeader::kSize
+
1382 BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size
) +
1383 blob_size
+ BlobLogFooter::kSize
));
1385 #endif // !ROCKSDB_LITE
1387 class DBBlobBasicIOErrorTest
: public DBBlobBasicTest
,
1388 public testing::WithParamInterface
<std::string
> {
1390 DBBlobBasicIOErrorTest() : sync_point_(GetParam()) {
1391 fault_injection_env_
.reset(new FaultInjectionTestEnv(env_
));
1393 ~DBBlobBasicIOErrorTest() { Close(); }
1395 std::unique_ptr
<FaultInjectionTestEnv
> fault_injection_env_
;
1396 std::string sync_point_
;
1399 class DBBlobBasicIOErrorMultiGetTest
: public DBBlobBasicIOErrorTest
{
1401 DBBlobBasicIOErrorMultiGetTest() : DBBlobBasicIOErrorTest() {}
1404 INSTANTIATE_TEST_CASE_P(DBBlobBasicTest
, DBBlobBasicIOErrorTest
,
1405 ::testing::ValuesIn(std::vector
<std::string
>{
1406 "BlobFileReader::OpenFile:NewRandomAccessFile",
1407 "BlobFileReader::GetBlob:ReadFromFile"}));
1409 INSTANTIATE_TEST_CASE_P(DBBlobBasicTest
, DBBlobBasicIOErrorMultiGetTest
,
1410 ::testing::ValuesIn(std::vector
<std::string
>{
1411 "BlobFileReader::OpenFile:NewRandomAccessFile",
1412 "BlobFileReader::MultiGetBlob:ReadFromFile"}));
1414 TEST_P(DBBlobBasicIOErrorTest
, GetBlob_IOError
) {
1416 options
.env
= fault_injection_env_
.get();
1417 options
.enable_blob_files
= true;
1418 options
.min_blob_size
= 0;
1422 constexpr char key
[] = "key";
1423 constexpr char blob_value
[] = "blob_value";
1425 ASSERT_OK(Put(key
, blob_value
));
1429 SyncPoint::GetInstance()->SetCallBack(sync_point_
, [this](void* /* arg */) {
1430 fault_injection_env_
->SetFilesystemActive(false,
1431 Status::IOError(sync_point_
));
1433 SyncPoint::GetInstance()->EnableProcessing();
1435 PinnableSlice result
;
1436 ASSERT_TRUE(db_
->Get(ReadOptions(), db_
->DefaultColumnFamily(), key
, &result
)
1439 SyncPoint::GetInstance()->DisableProcessing();
1440 SyncPoint::GetInstance()->ClearAllCallBacks();
1443 TEST_P(DBBlobBasicIOErrorMultiGetTest
, MultiGetBlobs_IOError
) {
1444 Options options
= GetDefaultOptions();
1445 options
.env
= fault_injection_env_
.get();
1446 options
.enable_blob_files
= true;
1447 options
.min_blob_size
= 0;
1451 constexpr size_t num_keys
= 2;
1453 constexpr char first_key
[] = "first_key";
1454 constexpr char first_value
[] = "first_value";
1456 ASSERT_OK(Put(first_key
, first_value
));
1458 constexpr char second_key
[] = "second_key";
1459 constexpr char second_value
[] = "second_value";
1461 ASSERT_OK(Put(second_key
, second_value
));
1465 std::array
<Slice
, num_keys
> keys
{{first_key
, second_key
}};
1466 std::array
<PinnableSlice
, num_keys
> values
;
1467 std::array
<Status
, num_keys
> statuses
;
1469 SyncPoint::GetInstance()->SetCallBack(sync_point_
, [this](void* /* arg */) {
1470 fault_injection_env_
->SetFilesystemActive(false,
1471 Status::IOError(sync_point_
));
1473 SyncPoint::GetInstance()->EnableProcessing();
1475 db_
->MultiGet(ReadOptions(), db_
->DefaultColumnFamily(), num_keys
, &keys
[0],
1476 &values
[0], &statuses
[0]);
1478 SyncPoint::GetInstance()->DisableProcessing();
1479 SyncPoint::GetInstance()->ClearAllCallBacks();
1481 ASSERT_TRUE(statuses
[0].IsIOError());
1482 ASSERT_TRUE(statuses
[1].IsIOError());
1485 TEST_P(DBBlobBasicIOErrorMultiGetTest
, MultipleBlobFiles
) {
1486 Options options
= GetDefaultOptions();
1487 options
.env
= fault_injection_env_
.get();
1488 options
.enable_blob_files
= true;
1489 options
.min_blob_size
= 0;
1493 constexpr size_t num_keys
= 2;
1495 constexpr char key1
[] = "key1";
1496 constexpr char value1
[] = "blob1";
1498 ASSERT_OK(Put(key1
, value1
));
1501 constexpr char key2
[] = "key2";
1502 constexpr char value2
[] = "blob2";
1504 ASSERT_OK(Put(key2
, value2
));
1507 std::array
<Slice
, num_keys
> keys
{{key1
, key2
}};
1508 std::array
<PinnableSlice
, num_keys
> values
;
1509 std::array
<Status
, num_keys
> statuses
;
1511 bool first_blob_file
= true;
1512 SyncPoint::GetInstance()->SetCallBack(
1513 sync_point_
, [&first_blob_file
, this](void* /* arg */) {
1514 if (first_blob_file
) {
1515 first_blob_file
= false;
1518 fault_injection_env_
->SetFilesystemActive(false,
1519 Status::IOError(sync_point_
));
1521 SyncPoint::GetInstance()->EnableProcessing();
1523 db_
->MultiGet(ReadOptions(), db_
->DefaultColumnFamily(), num_keys
,
1524 keys
.data(), values
.data(), statuses
.data());
1525 SyncPoint::GetInstance()->DisableProcessing();
1526 SyncPoint::GetInstance()->ClearAllCallBacks();
1527 ASSERT_OK(statuses
[0]);
1528 ASSERT_EQ(value1
, values
[0]);
1529 ASSERT_TRUE(statuses
[1].IsIOError());
1534 class ReadBlobCompactionFilter
: public CompactionFilter
{
1536 ReadBlobCompactionFilter() = default;
1537 const char* Name() const override
{
1538 return "rocksdb.compaction.filter.read.blob";
1540 CompactionFilter::Decision
FilterV2(
1541 int /*level*/, const Slice
& /*key*/, ValueType value_type
,
1542 const Slice
& existing_value
, std::string
* new_value
,
1543 std::string
* /*skip_until*/) const override
{
1544 if (value_type
!= CompactionFilter::ValueType::kValue
) {
1545 return CompactionFilter::Decision::kKeep
;
1548 new_value
->assign(existing_value
.data(), existing_value
.size());
1549 return CompactionFilter::Decision::kChangeValue
;
1553 } // anonymous namespace
1555 TEST_P(DBBlobBasicIOErrorTest
, CompactionFilterReadBlob_IOError
) {
1556 Options options
= GetDefaultOptions();
1557 options
.env
= fault_injection_env_
.get();
1558 options
.enable_blob_files
= true;
1559 options
.min_blob_size
= 0;
1560 options
.create_if_missing
= true;
1561 std::unique_ptr
<CompactionFilter
> compaction_filter_guard(
1562 new ReadBlobCompactionFilter
);
1563 options
.compaction_filter
= compaction_filter_guard
.get();
1565 DestroyAndReopen(options
);
1566 constexpr char key
[] = "foo";
1567 constexpr char blob_value
[] = "foo_blob_value";
1568 ASSERT_OK(Put(key
, blob_value
));
1571 SyncPoint::GetInstance()->SetCallBack(sync_point_
, [this](void* /* arg */) {
1572 fault_injection_env_
->SetFilesystemActive(false,
1573 Status::IOError(sync_point_
));
1575 SyncPoint::GetInstance()->EnableProcessing();
1577 ASSERT_TRUE(db_
->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
1581 SyncPoint::GetInstance()->DisableProcessing();
1582 SyncPoint::GetInstance()->ClearAllCallBacks();
1585 TEST_F(DBBlobBasicTest
, WarmCacheWithBlobsDuringFlush
) {
1586 Options options
= GetDefaultOptions();
1589 co
.capacity
= 1 << 25;
1590 co
.num_shard_bits
= 2;
1591 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
1592 auto backing_cache
= NewLRUCache(co
);
1594 options
.blob_cache
= backing_cache
;
1596 BlockBasedTableOptions block_based_options
;
1597 block_based_options
.no_block_cache
= false;
1598 block_based_options
.block_cache
= backing_cache
;
1599 block_based_options
.cache_index_and_filter_blocks
= true;
1600 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
1602 options
.enable_blob_files
= true;
1603 options
.create_if_missing
= true;
1604 options
.disable_auto_compactions
= true;
1605 options
.enable_blob_garbage_collection
= true;
1606 options
.blob_garbage_collection_age_cutoff
= 1.0;
1607 options
.prepopulate_blob_cache
= PrepopulateBlobCache::kFlushOnly
;
1608 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1610 DestroyAndReopen(options
);
1612 constexpr size_t kNumBlobs
= 10;
1613 constexpr size_t kValueSize
= 100;
1615 std::string
value(kValueSize
, 'a');
1617 for (size_t i
= 1; i
<= kNumBlobs
; i
++) {
1618 ASSERT_OK(Put(std::to_string(i
), value
));
1619 ASSERT_OK(Put(std::to_string(i
+ kNumBlobs
), value
)); // Add some overlap
1621 ASSERT_EQ(i
* 2, options
.statistics
->getTickerCount(BLOB_DB_CACHE_ADD
));
1622 ASSERT_EQ(value
, Get(std::to_string(i
)));
1623 ASSERT_EQ(value
, Get(std::to_string(i
+ kNumBlobs
)));
1624 ASSERT_EQ(0, options
.statistics
->getTickerCount(BLOB_DB_CACHE_MISS
));
1625 ASSERT_EQ(i
* 2, options
.statistics
->getTickerCount(BLOB_DB_CACHE_HIT
));
1628 // Verify compaction not counted
1629 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
1631 EXPECT_EQ(kNumBlobs
* 2,
1632 options
.statistics
->getTickerCount(BLOB_DB_CACHE_ADD
));
1635 #ifndef ROCKSDB_LITE
1636 TEST_F(DBBlobBasicTest
, DynamicallyWarmCacheDuringFlush
) {
1637 Options options
= GetDefaultOptions();
1640 co
.capacity
= 1 << 25;
1641 co
.num_shard_bits
= 2;
1642 co
.metadata_charge_policy
= kDontChargeCacheMetadata
;
1643 auto backing_cache
= NewLRUCache(co
);
1645 options
.blob_cache
= backing_cache
;
1647 BlockBasedTableOptions block_based_options
;
1648 block_based_options
.no_block_cache
= false;
1649 block_based_options
.block_cache
= backing_cache
;
1650 block_based_options
.cache_index_and_filter_blocks
= true;
1651 options
.table_factory
.reset(NewBlockBasedTableFactory(block_based_options
));
1653 options
.enable_blob_files
= true;
1654 options
.create_if_missing
= true;
1655 options
.disable_auto_compactions
= true;
1656 options
.enable_blob_garbage_collection
= true;
1657 options
.blob_garbage_collection_age_cutoff
= 1.0;
1658 options
.prepopulate_blob_cache
= PrepopulateBlobCache::kFlushOnly
;
1659 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1661 DestroyAndReopen(options
);
1663 constexpr size_t kNumBlobs
= 10;
1664 constexpr size_t kValueSize
= 100;
1666 std::string
value(kValueSize
, 'a');
1668 for (size_t i
= 1; i
<= 5; i
++) {
1669 ASSERT_OK(Put(std::to_string(i
), value
));
1670 ASSERT_OK(Put(std::to_string(i
+ kNumBlobs
), value
)); // Add some overlap
1672 ASSERT_EQ(2, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
));
1674 ASSERT_EQ(value
, Get(std::to_string(i
)));
1675 ASSERT_EQ(value
, Get(std::to_string(i
+ kNumBlobs
)));
1676 ASSERT_EQ(0, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
));
1678 options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
));
1679 ASSERT_EQ(2, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
));
1682 ASSERT_OK(dbfull()->SetOptions({{"prepopulate_blob_cache", "kDisable"}}));
1684 for (size_t i
= 6; i
<= kNumBlobs
; i
++) {
1685 ASSERT_OK(Put(std::to_string(i
), value
));
1686 ASSERT_OK(Put(std::to_string(i
+ kNumBlobs
), value
)); // Add some overlap
1688 ASSERT_EQ(0, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
));
1690 ASSERT_EQ(value
, Get(std::to_string(i
)));
1691 ASSERT_EQ(value
, Get(std::to_string(i
+ kNumBlobs
)));
1692 ASSERT_EQ(2, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
));
1694 options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
));
1695 ASSERT_EQ(0, options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
));
1698 // Verify compaction not counted
1699 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
1701 EXPECT_EQ(0, options
.statistics
->getTickerCount(BLOB_DB_CACHE_ADD
));
1703 #endif // !ROCKSDB_LITE
1705 TEST_F(DBBlobBasicTest
, WarmCacheWithBlobsSecondary
) {
1706 CompressedSecondaryCacheOptions secondary_cache_opts
;
1707 secondary_cache_opts
.capacity
= 1 << 20;
1708 secondary_cache_opts
.num_shard_bits
= 0;
1709 secondary_cache_opts
.metadata_charge_policy
= kDontChargeCacheMetadata
;
1710 secondary_cache_opts
.compression_type
= kNoCompression
;
1712 LRUCacheOptions primary_cache_opts
;
1713 primary_cache_opts
.capacity
= 1024;
1714 primary_cache_opts
.num_shard_bits
= 0;
1715 primary_cache_opts
.metadata_charge_policy
= kDontChargeCacheMetadata
;
1716 primary_cache_opts
.secondary_cache
=
1717 NewCompressedSecondaryCache(secondary_cache_opts
);
1719 Options options
= GetDefaultOptions();
1720 options
.create_if_missing
= true;
1721 options
.statistics
= CreateDBStatistics();
1722 options
.enable_blob_files
= true;
1723 options
.blob_cache
= NewLRUCache(primary_cache_opts
);
1724 options
.prepopulate_blob_cache
= PrepopulateBlobCache::kFlushOnly
;
1726 DestroyAndReopen(options
);
1728 // Note: only one of the two blobs fit in the primary cache at any given time.
1729 constexpr char first_key
[] = "foo";
1730 constexpr size_t first_blob_size
= 512;
1731 const std::string
first_blob(first_blob_size
, 'a');
1733 constexpr char second_key
[] = "bar";
1734 constexpr size_t second_blob_size
= 768;
1735 const std::string
second_blob(second_blob_size
, 'b');
1737 // First blob is inserted into primary cache during flush.
1738 ASSERT_OK(Put(first_key
, first_blob
));
1740 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
), 1);
1742 // Second blob is inserted into primary cache during flush,
1743 // First blob is evicted but only a dummy handle is inserted into secondary
1745 ASSERT_OK(Put(second_key
, second_blob
));
1747 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_ADD
), 1);
1749 // First blob is inserted into primary cache.
1750 // Second blob is evicted but only a dummy handle is inserted into secondary
1752 ASSERT_EQ(Get(first_key
), first_blob
);
1753 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
), 1);
1754 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
), 0);
1755 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(SECONDARY_CACHE_HITS
),
1757 // Second blob is inserted into primary cache,
1758 // First blob is evicted and is inserted into secondary cache.
1759 ASSERT_EQ(Get(second_key
), second_blob
);
1760 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
), 1);
1761 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
), 0);
1762 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(SECONDARY_CACHE_HITS
),
1765 // First blob's dummy item is inserted into primary cache b/c of lookup.
1766 // Second blob is still in primary cache.
1767 ASSERT_EQ(Get(first_key
), first_blob
);
1768 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
), 0);
1769 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
), 1);
1770 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(SECONDARY_CACHE_HITS
),
1773 // First blob's item is inserted into primary cache b/c of lookup.
1774 // Second blob is evicted and inserted into secondary cache.
1775 ASSERT_EQ(Get(first_key
), first_blob
);
1776 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_MISS
), 0);
1777 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(BLOB_DB_CACHE_HIT
), 1);
1778 ASSERT_EQ(options
.statistics
->getAndResetTickerCount(SECONDARY_CACHE_HITS
),
1782 } // namespace ROCKSDB_NAMESPACE
1784 int main(int argc
, char** argv
) {
1785 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1786 ::testing::InitGoogleTest(&argc
, argv
);
1787 RegisterCustomObjects(argc
, argv
);
1788 return RUN_ALL_TESTS();