1 // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 #include "db/db_test_util.h"
7 #include "port/stack_trace.h"
8 #include "util/testutil.h"
9 #include "utilities/merge_operators.h"
13 class DBRangeDelTest
: public DBTestBase
{
15 DBRangeDelTest() : DBTestBase("/db_range_del_test") {}
17 std::string
GetNumericStr(int key
) {
18 uint64_t uint64_key
= static_cast<uint64_t>(key
);
21 memcpy(&str
[0], static_cast<void*>(&uint64_key
), 8);
26 // PlainTableFactory and NumTableFilesAtLevel() are not supported in
29 TEST_F(DBRangeDelTest
, NonBlockBasedTableNotSupported
) {
30 Options opts
= CurrentOptions();
31 opts
.table_factory
.reset(new PlainTableFactory());
32 opts
.prefix_extractor
.reset(NewNoopTransform());
33 opts
.allow_mmap_reads
= true;
34 opts
.max_sequential_skip_in_iterations
= 999999;
38 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr1", "dr1")
42 TEST_F(DBRangeDelTest
, FlushOutputHasOnlyRangeTombstones
) {
43 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr1",
45 ASSERT_OK(db_
->Flush(FlushOptions()));
46 ASSERT_EQ(1, NumTableFilesAtLevel(0));
49 TEST_F(DBRangeDelTest
, CompactionOutputHasOnlyRangeTombstone
) {
50 Options opts
= CurrentOptions();
51 opts
.disable_auto_compactions
= true;
52 opts
.statistics
= CreateDBStatistics();
55 // snapshot protects range tombstone from dropping due to becoming obsolete.
56 const Snapshot
* snapshot
= db_
->GetSnapshot();
57 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z");
58 db_
->Flush(FlushOptions());
60 ASSERT_EQ(1, NumTableFilesAtLevel(0));
61 ASSERT_EQ(0, NumTableFilesAtLevel(1));
62 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
63 true /* disallow_trivial_move */);
64 ASSERT_EQ(0, NumTableFilesAtLevel(0));
65 ASSERT_EQ(1, NumTableFilesAtLevel(1));
66 ASSERT_EQ(0, TestGetTickerCount(opts
, COMPACTION_RANGE_DEL_DROP_OBSOLETE
));
67 db_
->ReleaseSnapshot(snapshot
);
70 TEST_F(DBRangeDelTest
, CompactionOutputFilesExactlyFilled
) {
71 // regression test for exactly filled compaction output files. Previously
72 // another file would be generated containing all range deletions, which
73 // could invalidate the non-overlapping file boundary invariant.
74 const int kNumPerFile
= 4, kNumFiles
= 2, kFileBytes
= 9 << 10;
75 Options options
= CurrentOptions();
76 options
.disable_auto_compactions
= true;
77 options
.level0_file_num_compaction_trigger
= kNumFiles
;
78 options
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
79 options
.num_levels
= 2;
80 options
.target_file_size_base
= kFileBytes
;
81 BlockBasedTableOptions table_options
;
82 table_options
.block_size_deviation
= 50; // each block holds two keys
83 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
86 // snapshot protects range tombstone from dropping due to becoming obsolete.
87 const Snapshot
* snapshot
= db_
->GetSnapshot();
88 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0), Key(1));
91 for (int i
= 0; i
< kNumFiles
; ++i
) {
92 std::vector
<std::string
> values
;
93 // Write 12K (4 values, each 3K)
94 for (int j
= 0; j
< kNumPerFile
; j
++) {
95 values
.push_back(RandomString(&rnd
, 3 << 10));
96 ASSERT_OK(Put(Key(i
* kNumPerFile
+ j
), values
[j
]));
97 if (j
== 0 && i
> 0) {
98 dbfull()->TEST_WaitForFlushMemTable();
102 // put extra key to trigger final flush
103 ASSERT_OK(Put("", ""));
104 dbfull()->TEST_WaitForFlushMemTable();
105 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
106 ASSERT_EQ(0, NumTableFilesAtLevel(1));
108 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
109 true /* disallow_trivial_move */);
110 ASSERT_EQ(0, NumTableFilesAtLevel(0));
111 ASSERT_EQ(2, NumTableFilesAtLevel(1));
112 db_
->ReleaseSnapshot(snapshot
);
115 TEST_F(DBRangeDelTest
, MaxCompactionBytesCutsOutputFiles
) {
116 // Ensures range deletion spanning multiple compaction output files that are
117 // cut by max_compaction_bytes will have non-overlapping key-ranges.
118 // https://github.com/facebook/rocksdb/issues/1778
119 const int kNumFiles
= 2, kNumPerFile
= 1 << 8, kBytesPerVal
= 1 << 12;
120 Options opts
= CurrentOptions();
121 opts
.comparator
= test::Uint64Comparator();
122 opts
.disable_auto_compactions
= true;
123 opts
.level0_file_num_compaction_trigger
= kNumFiles
;
124 opts
.max_compaction_bytes
= kNumPerFile
* kBytesPerVal
;
125 opts
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
126 // Want max_compaction_bytes to trigger the end of compaction output file, not
127 // target_file_size_base, so make the latter much bigger
128 opts
.target_file_size_base
= 100 * opts
.max_compaction_bytes
;
131 // snapshot protects range tombstone from dropping due to becoming obsolete.
132 const Snapshot
* snapshot
= db_
->GetSnapshot();
134 // It spans the whole key-range, thus will be included in all output files
135 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
137 GetNumericStr(kNumFiles
* kNumPerFile
- 1)));
139 for (int i
= 0; i
< kNumFiles
; ++i
) {
140 std::vector
<std::string
> values
;
141 // Write 1MB (256 values, each 4K)
142 for (int j
= 0; j
< kNumPerFile
; j
++) {
143 values
.push_back(RandomString(&rnd
, kBytesPerVal
));
144 ASSERT_OK(Put(GetNumericStr(kNumPerFile
* i
+ j
), values
[j
]));
146 // extra entry to trigger SpecialSkipListFactory's flush
147 ASSERT_OK(Put(GetNumericStr(kNumPerFile
), ""));
148 dbfull()->TEST_WaitForFlushMemTable();
149 ASSERT_EQ(i
+ 1, NumTableFilesAtLevel(0));
152 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
153 true /* disallow_trivial_move */);
154 ASSERT_EQ(0, NumTableFilesAtLevel(0));
155 ASSERT_GE(NumTableFilesAtLevel(1), 2);
157 std::vector
<std::vector
<FileMetaData
>> files
;
158 dbfull()->TEST_GetFilesMetaData(db_
->DefaultColumnFamily(), &files
);
160 for (size_t i
= 0; i
< files
[1].size() - 1; ++i
) {
161 ASSERT_TRUE(InternalKeyComparator(opts
.comparator
)
162 .Compare(files
[1][i
].largest
, files
[1][i
+ 1].smallest
) <
165 db_
->ReleaseSnapshot(snapshot
);
168 TEST_F(DBRangeDelTest
, SentinelsOmittedFromOutputFile
) {
169 // Regression test for bug where sentinel range deletions (i.e., ones with
170 // sequence number of zero) were included in output files.
171 // snapshot protects range tombstone from dropping due to becoming obsolete.
172 const Snapshot
* snapshot
= db_
->GetSnapshot();
174 // gaps between ranges creates sentinels in our internal representation
175 std::vector
<std::pair
<std::string
, std::string
>> range_dels
= {{"a", "b"}, {"c", "d"}, {"e", "f"}};
176 for (const auto& range_del
: range_dels
) {
177 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
178 range_del
.first
, range_del
.second
));
180 ASSERT_OK(db_
->Flush(FlushOptions()));
181 ASSERT_EQ(1, NumTableFilesAtLevel(0));
183 std::vector
<std::vector
<FileMetaData
>> files
;
184 dbfull()->TEST_GetFilesMetaData(db_
->DefaultColumnFamily(), &files
);
185 ASSERT_GT(files
[0][0].smallest_seqno
, 0);
187 db_
->ReleaseSnapshot(snapshot
);
190 TEST_F(DBRangeDelTest
, FlushRangeDelsSameStartKey
) {
191 db_
->Put(WriteOptions(), "b1", "val");
193 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "c"));
194 db_
->Put(WriteOptions(), "b2", "val");
196 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "b"));
197 // first iteration verifies query correctness in memtable, second verifies
198 // query correctness for a single SST file
199 for (int i
= 0; i
< 2; ++i
) {
201 ASSERT_OK(db_
->Flush(FlushOptions()));
202 ASSERT_EQ(1, NumTableFilesAtLevel(0));
205 ASSERT_TRUE(db_
->Get(ReadOptions(), "b1", &value
).IsNotFound());
206 ASSERT_OK(db_
->Get(ReadOptions(), "b2", &value
));
210 TEST_F(DBRangeDelTest
, CompactRangeDelsSameStartKey
) {
211 db_
->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction
212 db_
->Put(WriteOptions(), "b1", "val");
213 ASSERT_OK(db_
->Flush(FlushOptions()));
215 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "c"));
216 ASSERT_OK(db_
->Flush(FlushOptions()));
218 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "b"));
219 ASSERT_OK(db_
->Flush(FlushOptions()));
220 ASSERT_EQ(3, NumTableFilesAtLevel(0));
222 for (int i
= 0; i
< 2; ++i
) {
224 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
225 true /* disallow_trivial_move */);
226 ASSERT_EQ(0, NumTableFilesAtLevel(0));
227 ASSERT_EQ(1, NumTableFilesAtLevel(1));
230 ASSERT_TRUE(db_
->Get(ReadOptions(), "b1", &value
).IsNotFound());
233 #endif // ROCKSDB_LITE
235 TEST_F(DBRangeDelTest
, FlushRemovesCoveredKeys
) {
236 const int kNum
= 300, kRangeBegin
= 50, kRangeEnd
= 250;
237 Options opts
= CurrentOptions();
238 opts
.comparator
= test::Uint64Comparator();
241 // Write a third before snapshot, a third between snapshot and tombstone, and
242 // a third after the tombstone. Keys older than snapshot or newer than the
243 // tombstone should be preserved.
244 const Snapshot
* snapshot
= nullptr;
245 for (int i
= 0; i
< kNum
; ++i
) {
247 snapshot
= db_
->GetSnapshot();
248 } else if (i
== 2 * kNum
/ 3) {
249 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
250 GetNumericStr(kRangeBegin
), GetNumericStr(kRangeEnd
));
252 db_
->Put(WriteOptions(), GetNumericStr(i
), "val");
254 db_
->Flush(FlushOptions());
256 for (int i
= 0; i
< kNum
; ++i
) {
257 ReadOptions read_opts
;
258 read_opts
.ignore_range_deletions
= true;
260 if (i
< kRangeBegin
|| i
> kRangeEnd
|| i
< kNum
/ 3 || i
>= 2 * kNum
/ 3) {
261 ASSERT_OK(db_
->Get(read_opts
, GetNumericStr(i
), &value
));
263 ASSERT_TRUE(db_
->Get(read_opts
, GetNumericStr(i
), &value
).IsNotFound());
266 db_
->ReleaseSnapshot(snapshot
);
269 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
271 TEST_F(DBRangeDelTest
, CompactionRemovesCoveredKeys
) {
272 const int kNumPerFile
= 100, kNumFiles
= 4;
273 Options opts
= CurrentOptions();
274 opts
.comparator
= test::Uint64Comparator();
275 opts
.disable_auto_compactions
= true;
276 opts
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
278 opts
.statistics
= CreateDBStatistics();
281 for (int i
= 0; i
< kNumFiles
; ++i
) {
283 // range tombstone covers first half of the previous file
284 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
285 GetNumericStr((i
- 1) * kNumPerFile
),
286 GetNumericStr((i
- 1) * kNumPerFile
+ kNumPerFile
/ 2));
288 // Make sure a given key appears in each file so compaction won't be able to
289 // use trivial move, which would happen if the ranges were non-overlapping.
290 // Also, we need an extra element since flush is only triggered when the
291 // number of keys is one greater than SpecialSkipListFactory's limit.
292 // We choose a key outside the key-range used by the test to avoid conflict.
293 db_
->Put(WriteOptions(), GetNumericStr(kNumPerFile
* kNumFiles
), "val");
295 for (int j
= 0; j
< kNumPerFile
; ++j
) {
296 db_
->Put(WriteOptions(), GetNumericStr(i
* kNumPerFile
+ j
), "val");
298 dbfull()->TEST_WaitForFlushMemTable();
299 ASSERT_EQ(i
+ 1, NumTableFilesAtLevel(0));
301 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
302 ASSERT_EQ(0, NumTableFilesAtLevel(0));
303 ASSERT_GT(NumTableFilesAtLevel(1), 0);
304 ASSERT_EQ((kNumFiles
- 1) * kNumPerFile
/ 2,
305 TestGetTickerCount(opts
, COMPACTION_KEY_DROP_RANGE_DEL
));
307 for (int i
= 0; i
< kNumFiles
; ++i
) {
308 for (int j
= 0; j
< kNumPerFile
; ++j
) {
309 ReadOptions read_opts
;
310 read_opts
.ignore_range_deletions
= true;
312 if (i
== kNumFiles
- 1 || j
>= kNumPerFile
/ 2) {
314 db_
->Get(read_opts
, GetNumericStr(i
* kNumPerFile
+ j
), &value
));
317 db_
->Get(read_opts
, GetNumericStr(i
* kNumPerFile
+ j
), &value
)
324 TEST_F(DBRangeDelTest
, ValidLevelSubcompactionBoundaries
) {
325 const int kNumPerFile
= 100, kNumFiles
= 4, kFileBytes
= 100 << 10;
326 Options options
= CurrentOptions();
327 options
.disable_auto_compactions
= true;
328 options
.level0_file_num_compaction_trigger
= kNumFiles
;
329 options
.max_bytes_for_level_base
= 2 * kFileBytes
;
330 options
.max_subcompactions
= 4;
331 options
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
332 options
.num_levels
= 3;
333 options
.target_file_size_base
= kFileBytes
;
334 options
.target_file_size_multiplier
= 1;
338 for (int i
= 0; i
< 2; ++i
) {
339 for (int j
= 0; j
< kNumFiles
; ++j
) {
341 // delete [95,105) in two files, [295,305) in next two
342 int mid
= (j
+ (1 - j
% 2)) * kNumPerFile
;
343 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
344 Key(mid
- 5), Key(mid
+ 5));
346 std::vector
<std::string
> values
;
347 // Write 100KB (100 values, each 1K)
348 for (int k
= 0; k
< kNumPerFile
; k
++) {
349 values
.push_back(RandomString(&rnd
, 990));
350 ASSERT_OK(Put(Key(j
* kNumPerFile
+ k
), values
[k
]));
352 // put extra key to trigger flush
353 ASSERT_OK(Put("", ""));
354 dbfull()->TEST_WaitForFlushMemTable();
355 if (j
< kNumFiles
- 1) {
356 // background compaction may happen early for kNumFiles'th file
357 ASSERT_EQ(NumTableFilesAtLevel(0), j
+ 1);
359 if (j
== options
.level0_file_num_compaction_trigger
- 1) {
360 // When i == 1, compaction will output some files to L1, at which point
361 // L1 is not bottommost so range deletions cannot be compacted away. The
362 // new L1 files must be generated with non-overlapping key ranges even
363 // though multiple subcompactions see the same ranges deleted, else an
364 // assertion will fail.
366 // Only enable auto-compactions when we're ready; otherwise, the
367 // oversized L0 (relative to base_level) causes the compaction to run
369 ASSERT_OK(db_
->EnableAutoCompaction({db_
->DefaultColumnFamily()}));
370 dbfull()->TEST_WaitForCompact();
371 ASSERT_OK(db_
->SetOptions(db_
->DefaultColumnFamily(),
372 {{"disable_auto_compactions", "true"}}));
373 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
374 ASSERT_GT(NumTableFilesAtLevel(1), 0);
375 ASSERT_GT(NumTableFilesAtLevel(2), 0);
381 TEST_F(DBRangeDelTest
, ValidUniversalSubcompactionBoundaries
) {
382 const int kNumPerFile
= 100, kFilesPerLevel
= 4, kNumLevels
= 4;
383 Options options
= CurrentOptions();
384 options
.compaction_options_universal
.min_merge_width
= kFilesPerLevel
;
385 options
.compaction_options_universal
.max_merge_width
= kFilesPerLevel
;
386 options
.compaction_options_universal
.size_ratio
= 10;
387 options
.compaction_style
= kCompactionStyleUniversal
;
388 options
.level0_file_num_compaction_trigger
= kFilesPerLevel
;
389 options
.max_subcompactions
= 4;
390 options
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
391 options
.num_levels
= kNumLevels
;
392 options
.target_file_size_base
= kNumPerFile
<< 10;
393 options
.target_file_size_multiplier
= 1;
397 for (int i
= 0; i
< kNumLevels
- 1; ++i
) {
398 for (int j
= 0; j
< kFilesPerLevel
; ++j
) {
399 if (i
== kNumLevels
- 2) {
400 // insert range deletions [95,105) in two files, [295,305) in next two
401 // to prepare L1 for later manual compaction.
402 int mid
= (j
+ (1 - j
% 2)) * kNumPerFile
;
403 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
404 Key(mid
- 5), Key(mid
+ 5));
406 std::vector
<std::string
> values
;
407 // Write 100KB (100 values, each 1K)
408 for (int k
= 0; k
< kNumPerFile
; k
++) {
409 values
.push_back(RandomString(&rnd
, 990));
410 ASSERT_OK(Put(Key(j
* kNumPerFile
+ k
), values
[k
]));
412 // put extra key to trigger flush
413 ASSERT_OK(Put("", ""));
414 dbfull()->TEST_WaitForFlushMemTable();
415 if (j
< kFilesPerLevel
- 1) {
416 // background compaction may happen early for kFilesPerLevel'th file
417 ASSERT_EQ(NumTableFilesAtLevel(0), j
+ 1);
420 dbfull()->TEST_WaitForCompact();
421 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
422 ASSERT_GT(NumTableFilesAtLevel(kNumLevels
- 1 - i
), kFilesPerLevel
- 1);
424 // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
425 // happen since input level > 0; (2) range deletions are not dropped since
426 // output level is not bottommost. If no file boundary assertion fails, that
427 // probably means universal compaction + subcompaction + range deletion are
429 ASSERT_OK(dbfull()->RunManualCompaction(
430 reinterpret_cast<ColumnFamilyHandleImpl
*>(db_
->DefaultColumnFamily())
432 1 /* input_level */, 2 /* output_level */, 0 /* output_path_id */,
433 nullptr /* begin */, nullptr /* end */, true /* exclusive */,
434 true /* disallow_trivial_move */));
436 #endif // ROCKSDB_LITE
438 TEST_F(DBRangeDelTest
, CompactionRemovesCoveredMergeOperands
) {
439 const int kNumPerFile
= 3, kNumFiles
= 3;
440 Options opts
= CurrentOptions();
441 opts
.disable_auto_compactions
= true;
442 opts
.memtable_factory
.reset(new SpecialSkipListFactory(2 * kNumPerFile
));
443 opts
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
447 // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
448 // requires an extra entry.
449 for (int i
= 0; i
<= kNumFiles
* kNumPerFile
; ++i
) {
450 if (i
% kNumPerFile
== 0 && i
/ kNumPerFile
== kNumFiles
- 1) {
451 // Delete merge operands from all but the last file
452 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "key",
457 db_
->Merge(WriteOptions(), "key", val
);
458 // we need to prevent trivial move using Puts so compaction will actually
459 // process the merge operands.
460 db_
->Put(WriteOptions(), "prevent_trivial_move", "");
461 if (i
> 0 && i
% kNumPerFile
== 0) {
462 dbfull()->TEST_WaitForFlushMemTable();
466 ReadOptions read_opts
;
467 read_opts
.ignore_range_deletions
= true;
468 std::string expected
, actual
;
469 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
470 PutFixed64(&expected
, 45); // 1+2+...+9
471 ASSERT_EQ(expected
, actual
);
473 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
476 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
479 GetFixed64(&tmp2
, &tmp
);
480 PutFixed64(&expected
, 30); // 6+7+8+9 (earlier operands covered by tombstone)
481 ASSERT_EQ(expected
, actual
);
484 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
486 TEST_F(DBRangeDelTest
, ObsoleteTombstoneCleanup
) {
487 // During compaction to bottommost level, verify range tombstones older than
488 // the oldest snapshot are removed, while others are preserved.
489 Options opts
= CurrentOptions();
490 opts
.disable_auto_compactions
= true;
492 opts
.statistics
= CreateDBStatistics();
495 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr1",
496 "dr1"); // obsolete after compaction
497 db_
->Put(WriteOptions(), "key", "val");
498 db_
->Flush(FlushOptions());
499 const Snapshot
* snapshot
= db_
->GetSnapshot();
500 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr2",
501 "dr2"); // protected by snapshot
502 db_
->Put(WriteOptions(), "key", "val");
503 db_
->Flush(FlushOptions());
505 ASSERT_EQ(2, NumTableFilesAtLevel(0));
506 ASSERT_EQ(0, NumTableFilesAtLevel(1));
507 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
508 ASSERT_EQ(0, NumTableFilesAtLevel(0));
509 ASSERT_EQ(1, NumTableFilesAtLevel(1));
510 ASSERT_EQ(1, TestGetTickerCount(opts
, COMPACTION_RANGE_DEL_DROP_OBSOLETE
));
512 db_
->ReleaseSnapshot(snapshot
);
515 TEST_F(DBRangeDelTest
, TableEvictedDuringScan
) {
516 // The RangeDelAggregator holds pointers into range deletion blocks created by
517 // table readers. This test ensures the aggregator can still access those
518 // blocks even if it outlives the table readers that created them.
520 // DBIter always keeps readers open for L0 files. So, in order to test
521 // aggregator outliving reader, we need to have deletions in L1 files, which
522 // are opened/closed on-demand during the scan. This is accomplished by
523 // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
524 // from all lingering in L0 (there is at most one range deletion per L0 file).
526 // The first L1 file will contain a range deletion since its begin key is 0.
527 // SeekToFirst() references that table's reader and adds its range tombstone
528 // to the aggregator. Upon advancing beyond that table's key-range via Next(),
529 // the table reader will be unreferenced by the iterator. Since we manually
530 // call Evict() on all readers before the full scan, this unreference causes
531 // the reader's refcount to drop to zero and thus be destroyed.
533 // When it is destroyed, we do not remove its range deletions from the
534 // aggregator. So, subsequent calls to Next() must be able to use these
535 // deletions to decide whether a key is covered. This will work as long as
536 // the aggregator properly references the range deletion block.
537 const int kNum
= 25, kRangeBegin
= 0, kRangeEnd
= 7, kNumRanges
= 5;
538 Options opts
= CurrentOptions();
539 opts
.comparator
= test::Uint64Comparator();
540 opts
.level0_file_num_compaction_trigger
= 4;
541 opts
.level0_stop_writes_trigger
= 4;
542 opts
.memtable_factory
.reset(new SpecialSkipListFactory(1));
544 BlockBasedTableOptions bbto
;
545 bbto
.cache_index_and_filter_blocks
= true;
546 bbto
.block_cache
= NewLRUCache(8 << 20);
547 opts
.table_factory
.reset(NewBlockBasedTableFactory(bbto
));
550 // Hold a snapshot so range deletions can't become obsolete during compaction
551 // to bottommost level (i.e., L1).
552 const Snapshot
* snapshot
= db_
->GetSnapshot();
553 for (int i
= 0; i
< kNum
; ++i
) {
554 db_
->Put(WriteOptions(), GetNumericStr(i
), "val");
556 dbfull()->TEST_WaitForFlushMemTable();
558 if (i
>= kNum
/ 2 && i
< kNum
/ 2 + kNumRanges
) {
559 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
560 GetNumericStr(kRangeBegin
), GetNumericStr(kRangeEnd
));
563 // Must be > 1 so the first L1 file can be closed before scan finishes
564 dbfull()->TEST_WaitForCompact();
565 ASSERT_GT(NumTableFilesAtLevel(1), 1);
566 std::vector
<uint64_t> file_numbers
= ListTableFiles(env_
, dbname_
);
568 ReadOptions read_opts
;
569 auto* iter
= db_
->NewIterator(read_opts
);
570 int expected
= kRangeEnd
;
572 for (auto file_number
: file_numbers
) {
573 // This puts table caches in the state of being externally referenced only
574 // so they are destroyed immediately upon iterator unreferencing.
575 TableCache::Evict(dbfull()->TEST_table_cache(), file_number
);
577 for (; iter
->Valid(); iter
->Next()) {
578 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
580 // Keep clearing block cache's LRU so range deletion block can be freed as
581 // soon as its refcount drops to zero.
582 bbto
.block_cache
->EraseUnRefEntries();
584 ASSERT_EQ(kNum
, expected
);
586 db_
->ReleaseSnapshot(snapshot
);
589 TEST_F(DBRangeDelTest
, GetCoveredKeyFromMutableMemtable
) {
590 db_
->Put(WriteOptions(), "key", "val");
592 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
594 ReadOptions read_opts
;
596 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
599 TEST_F(DBRangeDelTest
, GetCoveredKeyFromImmutableMemtable
) {
600 Options opts
= CurrentOptions();
601 opts
.max_write_buffer_number
= 3;
602 opts
.min_write_buffer_number_to_merge
= 2;
603 // SpecialSkipListFactory lets us specify maximum number of elements the
604 // memtable can hold. It switches the active memtable to immutable (flush is
605 // prevented by the above options) upon inserting an element that would
606 // overflow the memtable.
607 opts
.memtable_factory
.reset(new SpecialSkipListFactory(1));
610 db_
->Put(WriteOptions(), "key", "val");
612 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
613 db_
->Put(WriteOptions(), "blah", "val");
615 ReadOptions read_opts
;
617 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
620 TEST_F(DBRangeDelTest
, GetCoveredKeyFromSst
) {
621 db_
->Put(WriteOptions(), "key", "val");
622 // snapshot prevents key from being deleted during flush
623 const Snapshot
* snapshot
= db_
->GetSnapshot();
625 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
626 ASSERT_OK(db_
->Flush(FlushOptions()));
628 ReadOptions read_opts
;
630 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
631 db_
->ReleaseSnapshot(snapshot
);
634 TEST_F(DBRangeDelTest
, GetCoveredMergeOperandFromMemtable
) {
635 const int kNumMergeOps
= 10;
636 Options opts
= CurrentOptions();
637 opts
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
640 for (int i
= 0; i
< kNumMergeOps
; ++i
) {
643 db_
->Merge(WriteOptions(), "key", val
);
644 if (i
== kNumMergeOps
/ 2) {
646 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "key",
651 ReadOptions read_opts
;
652 std::string expected
, actual
;
653 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
654 PutFixed64(&expected
, 30); // 6+7+8+9
655 ASSERT_EQ(expected
, actual
);
658 read_opts
.ignore_range_deletions
= true;
659 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
660 PutFixed64(&expected
, 45); // 0+1+2+...+9
661 ASSERT_EQ(expected
, actual
);
664 TEST_F(DBRangeDelTest
, GetIgnoresRangeDeletions
) {
665 Options opts
= CurrentOptions();
666 opts
.max_write_buffer_number
= 4;
667 opts
.min_write_buffer_number_to_merge
= 3;
668 opts
.memtable_factory
.reset(new SpecialSkipListFactory(1));
671 db_
->Put(WriteOptions(), "sst_key", "val");
672 // snapshot prevents key from being deleted during flush
673 const Snapshot
* snapshot
= db_
->GetSnapshot();
675 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
676 ASSERT_OK(db_
->Flush(FlushOptions()));
677 db_
->Put(WriteOptions(), "imm_key", "val");
679 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
680 db_
->Put(WriteOptions(), "mem_key", "val");
682 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
684 ReadOptions read_opts
;
685 read_opts
.ignore_range_deletions
= true;
686 for (std::string key
: {"sst_key", "imm_key", "mem_key"}) {
688 ASSERT_OK(db_
->Get(read_opts
, key
, &value
));
690 db_
->ReleaseSnapshot(snapshot
);
693 TEST_F(DBRangeDelTest
, IteratorRemovesCoveredKeys
) {
694 const int kNum
= 200, kRangeBegin
= 50, kRangeEnd
= 150, kNumPerFile
= 25;
695 Options opts
= CurrentOptions();
696 opts
.comparator
= test::Uint64Comparator();
697 opts
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
700 // Write half of the keys before the tombstone and half after the tombstone.
701 // Only covered keys (i.e., within the range and older than the tombstone)
702 // should be deleted.
703 for (int i
= 0; i
< kNum
; ++i
) {
705 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
706 GetNumericStr(kRangeBegin
), GetNumericStr(kRangeEnd
));
708 db_
->Put(WriteOptions(), GetNumericStr(i
), "val");
710 ReadOptions read_opts
;
711 auto* iter
= db_
->NewIterator(read_opts
);
714 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
715 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
716 if (expected
== kRangeBegin
- 1) {
722 ASSERT_EQ(kNum
, expected
);
726 TEST_F(DBRangeDelTest
, IteratorOverUserSnapshot
) {
727 const int kNum
= 200, kRangeBegin
= 50, kRangeEnd
= 150, kNumPerFile
= 25;
728 Options opts
= CurrentOptions();
729 opts
.comparator
= test::Uint64Comparator();
730 opts
.memtable_factory
.reset(new SpecialSkipListFactory(kNumPerFile
));
733 const Snapshot
* snapshot
= nullptr;
734 // Put a snapshot before the range tombstone, verify an iterator using that
735 // snapshot sees all inserted keys.
736 for (int i
= 0; i
< kNum
; ++i
) {
738 snapshot
= db_
->GetSnapshot();
739 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
740 GetNumericStr(kRangeBegin
), GetNumericStr(kRangeEnd
));
742 db_
->Put(WriteOptions(), GetNumericStr(i
), "val");
744 ReadOptions read_opts
;
745 read_opts
.snapshot
= snapshot
;
746 auto* iter
= db_
->NewIterator(read_opts
);
749 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
750 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
753 ASSERT_EQ(kNum
/ 2, expected
);
755 db_
->ReleaseSnapshot(snapshot
);
758 TEST_F(DBRangeDelTest
, IteratorIgnoresRangeDeletions
) {
759 Options opts
= CurrentOptions();
760 opts
.max_write_buffer_number
= 4;
761 opts
.min_write_buffer_number_to_merge
= 3;
762 opts
.memtable_factory
.reset(new SpecialSkipListFactory(1));
765 db_
->Put(WriteOptions(), "sst_key", "val");
766 // snapshot prevents key from being deleted during flush
767 const Snapshot
* snapshot
= db_
->GetSnapshot();
769 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
770 ASSERT_OK(db_
->Flush(FlushOptions()));
771 db_
->Put(WriteOptions(), "imm_key", "val");
773 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
774 db_
->Put(WriteOptions(), "mem_key", "val");
776 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
778 ReadOptions read_opts
;
779 read_opts
.ignore_range_deletions
= true;
780 auto* iter
= db_
->NewIterator(read_opts
);
782 std::string expected
[] = {"imm_key", "mem_key", "sst_key"};
783 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next(), ++i
) {
785 ASSERT_EQ(expected
[i
], iter
->key());
789 db_
->ReleaseSnapshot(snapshot
);
792 TEST_F(DBRangeDelTest
, TailingIteratorRangeTombstoneUnsupported
) {
793 db_
->Put(WriteOptions(), "key", "val");
794 // snapshot prevents key from being deleted during flush
795 const Snapshot
* snapshot
= db_
->GetSnapshot();
797 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
799 // iterations check unsupported in memtable, l0, and then l1
800 for (int i
= 0; i
< 3; ++i
) {
801 ReadOptions read_opts
;
802 read_opts
.tailing
= true;
803 auto* iter
= db_
->NewIterator(read_opts
);
805 // For L1+, iterators over files are created on-demand, so need seek
808 ASSERT_TRUE(iter
->status().IsNotSupported());
811 ASSERT_OK(db_
->Flush(FlushOptions()));
816 db_
->ReleaseSnapshot(snapshot
);
818 #endif // ROCKSDB_LITE
820 } // namespace rocksdb
822 int main(int argc
, char** argv
) {
823 rocksdb::port::InstallStackTraceHandler();
824 ::testing::InitGoogleTest(&argc
, argv
);
825 return RUN_ALL_TESTS();