1 // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 #include "db/db_test_util.h"
7 #include "db/version_set.h"
8 #include "port/stack_trace.h"
9 #include "rocksdb/utilities/write_batch_with_index.h"
10 #include "test_util/testutil.h"
11 #include "util/random.h"
12 #include "utilities/merge_operators.h"
14 namespace ROCKSDB_NAMESPACE
{
16 // TODO(cbi): parameterize the test to cover user-defined timestamp cases
17 class DBRangeDelTest
: public DBTestBase
{
19 DBRangeDelTest() : DBTestBase("db_range_del_test", /*env_do_fsync=*/false) {}
21 std::string
GetNumericStr(int key
) {
22 uint64_t uint64_key
= static_cast<uint64_t>(key
);
25 memcpy(&str
[0], static_cast<void*>(&uint64_key
), 8);
30 // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not
31 // supported in ROCKSDB_LITE
33 TEST_F(DBRangeDelTest
, NonBlockBasedTableNotSupported
) {
34 // TODO: figure out why MmapReads trips the iterator pinning assertion in
35 // RangeDelAggregator. Ideally it would be supported; otherwise it should at
36 // least be explicitly unsupported.
37 for (auto config
: {kPlainTableAllBytesPrefix
, /* kWalDirAndMmapReads */}) {
38 option_config_
= config
;
39 DestroyAndReopen(CurrentOptions());
40 ASSERT_TRUE(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
46 TEST_F(DBRangeDelTest
, WriteBatchWithIndexNotSupported
) {
47 WriteBatchWithIndex indexedBatch
{};
48 ASSERT_TRUE(indexedBatch
.DeleteRange(db_
->DefaultColumnFamily(), "dr1", "dr1")
50 ASSERT_TRUE(indexedBatch
.DeleteRange("dr1", "dr1").IsNotSupported());
53 TEST_F(DBRangeDelTest
, EndSameAsStartCoversNothing
) {
54 ASSERT_OK(db_
->Put(WriteOptions(), "b", "val"));
56 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "b", "b"));
57 ASSERT_EQ("val", Get("b"));
60 TEST_F(DBRangeDelTest
, EndComesBeforeStartInvalidArgument
) {
61 ASSERT_OK(db_
->Put(WriteOptions(), "b", "val"));
63 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "b", "a")
64 .IsInvalidArgument());
65 ASSERT_EQ("val", Get("b"));
68 TEST_F(DBRangeDelTest
, FlushOutputHasOnlyRangeTombstones
) {
70 DestroyAndReopen(CurrentOptions());
71 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
73 ASSERT_OK(db_
->Flush(FlushOptions()));
74 ASSERT_EQ(1, NumTableFilesAtLevel(0));
75 } while (ChangeOptions(kRangeDelSkipConfigs
));
78 TEST_F(DBRangeDelTest
, DictionaryCompressionWithOnlyRangeTombstones
) {
79 Options opts
= CurrentOptions();
80 opts
.compression_opts
.max_dict_bytes
= 16384;
82 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr1",
84 ASSERT_OK(db_
->Flush(FlushOptions()));
87 TEST_F(DBRangeDelTest
, CompactionOutputHasOnlyRangeTombstone
) {
89 Options opts
= CurrentOptions();
90 opts
.disable_auto_compactions
= true;
91 opts
.statistics
= CreateDBStatistics();
92 DestroyAndReopen(opts
);
94 // snapshot protects range tombstone from dropping due to becoming obsolete.
95 const Snapshot
* snapshot
= db_
->GetSnapshot();
97 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
98 ASSERT_OK(db_
->Flush(FlushOptions()));
100 ASSERT_EQ(1, NumTableFilesAtLevel(0));
101 ASSERT_EQ(0, NumTableFilesAtLevel(1));
102 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
103 true /* disallow_trivial_move */));
104 ASSERT_EQ(0, NumTableFilesAtLevel(0));
105 ASSERT_EQ(1, NumTableFilesAtLevel(1));
106 ASSERT_EQ(0, TestGetTickerCount(opts
, COMPACTION_RANGE_DEL_DROP_OBSOLETE
));
107 db_
->ReleaseSnapshot(snapshot
);
108 // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
109 // compactions as the above assertions about the number of files in a level
111 } while (ChangeOptions(kRangeDelSkipConfigs
| kSkipUniversalCompaction
|
112 kSkipFIFOCompaction
));
115 TEST_F(DBRangeDelTest
, CompactionOutputFilesExactlyFilled
) {
116 // regression test for exactly filled compaction output files. Previously
117 // another file would be generated containing all range deletions, which
118 // could invalidate the non-overlapping file boundary invariant.
119 const int kNumPerFile
= 4, kNumFiles
= 2, kFileBytes
= 9 << 10;
120 Options options
= CurrentOptions();
121 options
.disable_auto_compactions
= true;
122 options
.level0_file_num_compaction_trigger
= kNumFiles
;
123 options
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
124 options
.num_levels
= 2;
125 options
.target_file_size_base
= kFileBytes
;
126 BlockBasedTableOptions table_options
;
127 table_options
.block_size_deviation
= 50; // each block holds two keys
128 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
131 // snapshot protects range tombstone from dropping due to becoming obsolete.
132 const Snapshot
* snapshot
= db_
->GetSnapshot();
133 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
137 for (int i
= 0; i
< kNumFiles
; ++i
) {
138 std::vector
<std::string
> values
;
139 // Write 12K (4 values, each 3K)
140 for (int j
= 0; j
< kNumPerFile
; j
++) {
141 values
.push_back(rnd
.RandomString(3 << 10));
142 ASSERT_OK(Put(Key(i
* kNumPerFile
+ j
), values
[j
]));
143 if (j
== 0 && i
> 0) {
144 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
148 // put extra key to trigger final flush
149 ASSERT_OK(Put("", ""));
150 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
151 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
152 ASSERT_EQ(0, NumTableFilesAtLevel(1));
154 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
155 true /* disallow_trivial_move */));
156 ASSERT_EQ(0, NumTableFilesAtLevel(0));
157 ASSERT_EQ(2, NumTableFilesAtLevel(1));
158 db_
->ReleaseSnapshot(snapshot
);
161 TEST_F(DBRangeDelTest
, MaxCompactionBytesCutsOutputFiles
) {
162 // Ensures range deletion spanning multiple compaction output files that are
163 // cut by max_compaction_bytes will have non-overlapping key-ranges.
164 // https://github.com/facebook/rocksdb/issues/1778
165 const int kNumFiles
= 2, kNumPerFile
= 1 << 8, kBytesPerVal
= 1 << 12;
166 Options opts
= CurrentOptions();
167 opts
.comparator
= test::Uint64Comparator();
168 opts
.disable_auto_compactions
= true;
169 opts
.level0_file_num_compaction_trigger
= kNumFiles
;
170 opts
.max_compaction_bytes
= kNumPerFile
* kBytesPerVal
;
171 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
172 // Want max_compaction_bytes to trigger the end of compaction output file, not
173 // target_file_size_base, so make the latter much bigger
174 // opts.target_file_size_base = 100 * opts.max_compaction_bytes;
175 opts
.target_file_size_base
= 1;
176 DestroyAndReopen(opts
);
178 // snapshot protects range tombstone from dropping due to becoming obsolete.
179 const Snapshot
* snapshot
= db_
->GetSnapshot();
183 ASSERT_OK(Put(GetNumericStr(0), rnd
.RandomString(kBytesPerVal
)));
185 Put(GetNumericStr(kNumPerFile
- 1), rnd
.RandomString(kBytesPerVal
)));
187 ASSERT_OK(Put(GetNumericStr(kNumPerFile
), rnd
.RandomString(kBytesPerVal
)));
189 Put(GetNumericStr(kNumPerFile
* 2 - 1), rnd
.RandomString(kBytesPerVal
)));
192 ASSERT_EQ(0, NumTableFilesAtLevel(0));
193 ASSERT_EQ(NumTableFilesAtLevel(2), 2);
196 db_
->SetOptions(db_
->DefaultColumnFamily(),
197 {{"target_file_size_base",
198 std::to_string(100 * opts
.max_compaction_bytes
)}}));
200 // It spans the whole key-range, thus will be included in all output files
201 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
203 GetNumericStr(kNumFiles
* kNumPerFile
- 1)));
205 for (int i
= 0; i
< kNumFiles
; ++i
) {
206 std::vector
<std::string
> values
;
207 // Write 1MB (256 values, each 4K)
208 for (int j
= 0; j
< kNumPerFile
; j
++) {
209 values
.push_back(rnd
.RandomString(kBytesPerVal
));
210 ASSERT_OK(Put(GetNumericStr(kNumPerFile
* i
+ j
), values
[j
]));
212 // extra entry to trigger SpecialSkipListFactory's flush
213 ASSERT_OK(Put(GetNumericStr(kNumPerFile
), ""));
214 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
215 ASSERT_EQ(i
+ 1, NumTableFilesAtLevel(0));
218 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr,
219 /*column_family=*/nullptr,
220 /*disallow_trivial_move=*/true));
221 ASSERT_EQ(0, NumTableFilesAtLevel(0));
222 ASSERT_GE(NumTableFilesAtLevel(1), 2);
223 std::vector
<std::vector
<FileMetaData
>> files
;
224 dbfull()->TEST_GetFilesMetaData(db_
->DefaultColumnFamily(), &files
);
226 for (size_t i
= 0; i
+ 1 < files
[1].size(); ++i
) {
227 ASSERT_TRUE(InternalKeyComparator(opts
.comparator
)
228 .Compare(files
[1][i
].largest
, files
[1][i
+ 1].smallest
) <
231 db_
->ReleaseSnapshot(snapshot
);
234 TEST_F(DBRangeDelTest
, SentinelsOmittedFromOutputFile
) {
235 // Regression test for bug where sentinel range deletions (i.e., ones with
236 // sequence number of zero) were included in output files.
237 // snapshot protects range tombstone from dropping due to becoming obsolete.
238 const Snapshot
* snapshot
= db_
->GetSnapshot();
240 // gaps between ranges creates sentinels in our internal representation
241 std::vector
<std::pair
<std::string
, std::string
>> range_dels
= {
242 {"a", "b"}, {"c", "d"}, {"e", "f"}};
243 for (const auto& range_del
: range_dels
) {
244 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
245 range_del
.first
, range_del
.second
));
247 ASSERT_OK(db_
->Flush(FlushOptions()));
248 ASSERT_EQ(1, NumTableFilesAtLevel(0));
250 std::vector
<std::vector
<FileMetaData
>> files
;
251 dbfull()->TEST_GetFilesMetaData(db_
->DefaultColumnFamily(), &files
);
252 ASSERT_GT(files
[0][0].fd
.smallest_seqno
, 0);
254 db_
->ReleaseSnapshot(snapshot
);
257 TEST_F(DBRangeDelTest
, FlushRangeDelsSameStartKey
) {
258 ASSERT_OK(db_
->Put(WriteOptions(), "b1", "val"));
260 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "c"));
261 ASSERT_OK(db_
->Put(WriteOptions(), "b2", "val"));
263 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "b"));
264 // first iteration verifies query correctness in memtable, second verifies
265 // query correctness for a single SST file
266 for (int i
= 0; i
< 2; ++i
) {
268 ASSERT_OK(db_
->Flush(FlushOptions()));
269 ASSERT_EQ(1, NumTableFilesAtLevel(0));
272 ASSERT_TRUE(db_
->Get(ReadOptions(), "b1", &value
).IsNotFound());
273 ASSERT_OK(db_
->Get(ReadOptions(), "b2", &value
));
277 TEST_F(DBRangeDelTest
, CompactRangeDelsSameStartKey
) {
278 ASSERT_OK(db_
->Put(WriteOptions(), "unused",
279 "val")); // prevents empty after compaction
280 ASSERT_OK(db_
->Put(WriteOptions(), "b1", "val"));
281 ASSERT_OK(db_
->Flush(FlushOptions()));
283 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "c"));
284 ASSERT_OK(db_
->Flush(FlushOptions()));
286 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "b"));
287 ASSERT_OK(db_
->Flush(FlushOptions()));
288 ASSERT_EQ(3, NumTableFilesAtLevel(0));
290 for (int i
= 0; i
< 2; ++i
) {
292 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
293 true /* disallow_trivial_move */));
294 ASSERT_EQ(0, NumTableFilesAtLevel(0));
295 ASSERT_EQ(1, NumTableFilesAtLevel(1));
298 ASSERT_TRUE(db_
->Get(ReadOptions(), "b1", &value
).IsNotFound());
301 #endif // ROCKSDB_LITE
303 TEST_F(DBRangeDelTest
, FlushRemovesCoveredKeys
) {
304 const int kNum
= 300, kRangeBegin
= 50, kRangeEnd
= 250;
305 Options opts
= CurrentOptions();
306 opts
.comparator
= test::Uint64Comparator();
307 DestroyAndReopen(opts
);
309 // Write a third before snapshot, a third between snapshot and tombstone, and
310 // a third after the tombstone. Keys older than snapshot or newer than the
311 // tombstone should be preserved.
312 const Snapshot
* snapshot
= nullptr;
313 for (int i
= 0; i
< kNum
; ++i
) {
315 snapshot
= db_
->GetSnapshot();
316 } else if (i
== 2 * kNum
/ 3) {
317 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
318 GetNumericStr(kRangeBegin
),
319 GetNumericStr(kRangeEnd
)));
321 ASSERT_OK(db_
->Put(WriteOptions(), GetNumericStr(i
), "val"));
323 ASSERT_OK(db_
->Flush(FlushOptions()));
325 for (int i
= 0; i
< kNum
; ++i
) {
326 ReadOptions read_opts
;
327 read_opts
.ignore_range_deletions
= true;
329 if (i
< kRangeBegin
|| i
> kRangeEnd
|| i
< kNum
/ 3 || i
>= 2 * kNum
/ 3) {
330 ASSERT_OK(db_
->Get(read_opts
, GetNumericStr(i
), &value
));
332 ASSERT_TRUE(db_
->Get(read_opts
, GetNumericStr(i
), &value
).IsNotFound());
335 db_
->ReleaseSnapshot(snapshot
);
338 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
340 TEST_F(DBRangeDelTest
, CompactionRemovesCoveredKeys
) {
341 const int kNumPerFile
= 100, kNumFiles
= 4;
342 Options opts
= CurrentOptions();
343 opts
.comparator
= test::Uint64Comparator();
344 opts
.disable_auto_compactions
= true;
345 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
347 opts
.statistics
= CreateDBStatistics();
348 DestroyAndReopen(opts
);
350 for (int i
= 0; i
< kNumFiles
; ++i
) {
352 // range tombstone covers first half of the previous file
353 ASSERT_OK(db_
->DeleteRange(
354 WriteOptions(), db_
->DefaultColumnFamily(),
355 GetNumericStr((i
- 1) * kNumPerFile
),
356 GetNumericStr((i
- 1) * kNumPerFile
+ kNumPerFile
/ 2)));
358 // Make sure a given key appears in each file so compaction won't be able to
359 // use trivial move, which would happen if the ranges were non-overlapping.
360 // Also, we need an extra element since flush is only triggered when the
361 // number of keys is one greater than SpecialSkipListFactory's limit.
362 // We choose a key outside the key-range used by the test to avoid conflict.
363 ASSERT_OK(db_
->Put(WriteOptions(), GetNumericStr(kNumPerFile
* kNumFiles
),
366 for (int j
= 0; j
< kNumPerFile
; ++j
) {
368 db_
->Put(WriteOptions(), GetNumericStr(i
* kNumPerFile
+ j
), "val"));
370 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
371 ASSERT_EQ(i
+ 1, NumTableFilesAtLevel(0));
373 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
374 ASSERT_EQ(0, NumTableFilesAtLevel(0));
375 ASSERT_GT(NumTableFilesAtLevel(1), 0);
376 ASSERT_EQ((kNumFiles
- 1) * kNumPerFile
/ 2,
377 TestGetTickerCount(opts
, COMPACTION_KEY_DROP_RANGE_DEL
));
379 for (int i
= 0; i
< kNumFiles
; ++i
) {
380 for (int j
= 0; j
< kNumPerFile
; ++j
) {
381 ReadOptions read_opts
;
382 read_opts
.ignore_range_deletions
= true;
384 if (i
== kNumFiles
- 1 || j
>= kNumPerFile
/ 2) {
386 db_
->Get(read_opts
, GetNumericStr(i
* kNumPerFile
+ j
), &value
));
389 db_
->Get(read_opts
, GetNumericStr(i
* kNumPerFile
+ j
), &value
)
396 TEST_F(DBRangeDelTest
, ValidLevelSubcompactionBoundaries
) {
397 const int kNumPerFile
= 100, kNumFiles
= 4, kFileBytes
= 100 << 10;
398 Options options
= CurrentOptions();
399 options
.disable_auto_compactions
= true;
400 options
.level0_file_num_compaction_trigger
= kNumFiles
;
401 options
.max_bytes_for_level_base
= 2 * kFileBytes
;
402 options
.max_subcompactions
= 4;
403 options
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
404 options
.num_levels
= 3;
405 options
.target_file_size_base
= kFileBytes
;
406 options
.target_file_size_multiplier
= 1;
407 options
.max_compaction_bytes
= 1500;
411 for (int i
= 0; i
< 2; ++i
) {
412 for (int j
= 0; j
< kNumFiles
; ++j
) {
414 // delete [95,105) in two files, [295,305) in next two
415 int mid
= (j
+ (1 - j
% 2)) * kNumPerFile
;
416 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
417 Key(mid
- 5), Key(mid
+ 5)));
419 std::vector
<std::string
> values
;
420 // Write 100KB (100 values, each 1K)
421 for (int k
= 0; k
< kNumPerFile
; k
++) {
422 values
.push_back(rnd
.RandomString(990));
423 ASSERT_OK(Put(Key(j
* kNumPerFile
+ k
), values
[k
]));
425 // put extra key to trigger flush
426 ASSERT_OK(Put("", ""));
427 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
428 if (j
< kNumFiles
- 1) {
429 // background compaction may happen early for kNumFiles'th file
430 ASSERT_EQ(NumTableFilesAtLevel(0), j
+ 1);
432 if (j
== options
.level0_file_num_compaction_trigger
- 1) {
433 // When i == 1, compaction will output some files to L1, at which point
434 // L1 is not bottommost so range deletions cannot be compacted away. The
435 // new L1 files must be generated with non-overlapping key ranges even
436 // though multiple subcompactions see the same ranges deleted, else an
437 // assertion will fail.
439 // Only enable auto-compactions when we're ready; otherwise, the
440 // oversized L0 (relative to base_level) causes the compaction to run
442 ASSERT_OK(db_
->EnableAutoCompaction({db_
->DefaultColumnFamily()}));
443 ASSERT_OK(dbfull()->TEST_WaitForCompact());
444 ASSERT_OK(db_
->SetOptions(db_
->DefaultColumnFamily(),
445 {{"disable_auto_compactions", "true"}}));
446 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
447 ASSERT_GT(NumTableFilesAtLevel(1), 0);
448 ASSERT_GT(NumTableFilesAtLevel(2), 0);
454 TEST_F(DBRangeDelTest
, ValidUniversalSubcompactionBoundaries
) {
455 const int kNumPerFile
= 100, kFilesPerLevel
= 4, kNumLevels
= 4;
456 Options options
= CurrentOptions();
457 options
.compaction_options_universal
.min_merge_width
= kFilesPerLevel
;
458 options
.compaction_options_universal
.max_merge_width
= kFilesPerLevel
;
459 options
.compaction_options_universal
.size_ratio
= 10;
460 options
.compaction_style
= kCompactionStyleUniversal
;
461 options
.level0_file_num_compaction_trigger
= kFilesPerLevel
;
462 options
.max_subcompactions
= 4;
463 options
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
464 options
.num_levels
= kNumLevels
;
465 options
.target_file_size_base
= kNumPerFile
<< 10;
466 options
.target_file_size_multiplier
= 1;
470 for (int i
= 0; i
< kNumLevels
- 1; ++i
) {
471 for (int j
= 0; j
< kFilesPerLevel
; ++j
) {
472 if (i
== kNumLevels
- 2) {
473 // insert range deletions [95,105) in two files, [295,305) in next two
474 // to prepare L1 for later manual compaction.
475 int mid
= (j
+ (1 - j
% 2)) * kNumPerFile
;
476 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
477 Key(mid
- 5), Key(mid
+ 5)));
479 std::vector
<std::string
> values
;
480 // Write 100KB (100 values, each 1K)
481 for (int k
= 0; k
< kNumPerFile
; k
++) {
482 values
.push_back(rnd
.RandomString(990));
483 ASSERT_OK(Put(Key(j
* kNumPerFile
+ k
), values
[k
]));
485 // put extra key to trigger flush
486 ASSERT_OK(Put("", ""));
487 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
488 if (j
< kFilesPerLevel
- 1) {
489 // background compaction may happen early for kFilesPerLevel'th file
490 ASSERT_EQ(NumTableFilesAtLevel(0), j
+ 1);
493 ASSERT_OK(dbfull()->TEST_WaitForCompact());
494 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
495 ASSERT_GT(NumTableFilesAtLevel(kNumLevels
- 1 - i
), kFilesPerLevel
- 1);
497 // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
498 // happen since input level > 0; (2) range deletions are not dropped since
499 // output level is not bottommost. If no file boundary assertion fails, that
500 // probably means universal compaction + subcompaction + range deletion are
502 ASSERT_OK(dbfull()->RunManualCompaction(
503 static_cast_with_check
<ColumnFamilyHandleImpl
>(db_
->DefaultColumnFamily())
505 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
506 nullptr /* begin */, nullptr /* end */, true /* exclusive */,
507 true /* disallow_trivial_move */,
508 std::numeric_limits
<uint64_t>::max() /* max_file_num_to_ignore */,
511 #endif // ROCKSDB_LITE
513 TEST_F(DBRangeDelTest
, CompactionRemovesCoveredMergeOperands
) {
514 const int kNumPerFile
= 3, kNumFiles
= 3;
515 Options opts
= CurrentOptions();
516 opts
.disable_auto_compactions
= true;
517 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(2 * kNumPerFile
));
518 opts
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
522 // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
523 // requires an extra entry.
524 for (int i
= 0; i
<= kNumFiles
* kNumPerFile
; ++i
) {
525 if (i
% kNumPerFile
== 0 && i
/ kNumPerFile
== kNumFiles
- 1) {
526 // Delete merge operands from all but the last file
527 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
532 ASSERT_OK(db_
->Merge(WriteOptions(), "key", val
));
533 // we need to prevent trivial move using Puts so compaction will actually
534 // process the merge operands.
535 ASSERT_OK(db_
->Put(WriteOptions(), "prevent_trivial_move", ""));
536 if (i
> 0 && i
% kNumPerFile
== 0) {
537 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
541 ReadOptions read_opts
;
542 read_opts
.ignore_range_deletions
= true;
543 std::string expected
, actual
;
544 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
545 PutFixed64(&expected
, 45); // 1+2+...+9
546 ASSERT_EQ(expected
, actual
);
548 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
551 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
554 GetFixed64(&tmp2
, &tmp
);
555 PutFixed64(&expected
, 30); // 6+7+8+9 (earlier operands covered by tombstone)
556 ASSERT_EQ(expected
, actual
);
559 TEST_F(DBRangeDelTest
, PutDeleteRangeMergeFlush
) {
560 // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4)
561 // Flush. The `CompactionIterator` previously had a bug where we forgot to
562 // check for covering range tombstones when processing the (1) Put, causing
563 // it to reappear after the flush.
564 Options opts
= CurrentOptions();
565 opts
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
570 ASSERT_OK(db_
->Put(WriteOptions(), "key", val
));
571 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "key",
573 ASSERT_OK(db_
->Merge(WriteOptions(), "key", val
));
574 ASSERT_OK(db_
->Flush(FlushOptions()));
576 ReadOptions read_opts
;
577 std::string expected
, actual
;
578 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
579 PutFixed64(&expected
, 1);
580 ASSERT_EQ(expected
, actual
);
583 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
585 TEST_F(DBRangeDelTest
, ObsoleteTombstoneCleanup
) {
586 // During compaction to bottommost level, verify range tombstones older than
587 // the oldest snapshot are removed, while others are preserved.
588 Options opts
= CurrentOptions();
589 opts
.disable_auto_compactions
= true;
591 opts
.statistics
= CreateDBStatistics();
594 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr1",
595 "dr10")); // obsolete after compaction
596 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
597 ASSERT_OK(db_
->Flush(FlushOptions()));
598 const Snapshot
* snapshot
= db_
->GetSnapshot();
599 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "dr2",
600 "dr20")); // protected by snapshot
601 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
602 ASSERT_OK(db_
->Flush(FlushOptions()));
604 ASSERT_EQ(2, NumTableFilesAtLevel(0));
605 ASSERT_EQ(0, NumTableFilesAtLevel(1));
606 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
607 ASSERT_EQ(0, NumTableFilesAtLevel(0));
608 ASSERT_EQ(1, NumTableFilesAtLevel(1));
609 ASSERT_EQ(1, TestGetTickerCount(opts
, COMPACTION_RANGE_DEL_DROP_OBSOLETE
));
611 db_
->ReleaseSnapshot(snapshot
);
614 TEST_F(DBRangeDelTest
, TableEvictedDuringScan
) {
615 // The RangeDelAggregator holds pointers into range deletion blocks created by
616 // table readers. This test ensures the aggregator can still access those
617 // blocks even if it outlives the table readers that created them.
619 // DBIter always keeps readers open for L0 files. So, in order to test
620 // aggregator outliving reader, we need to have deletions in L1 files, which
621 // are opened/closed on-demand during the scan. This is accomplished by
622 // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
623 // from all lingering in L0 (there is at most one range deletion per L0 file).
625 // The first L1 file will contain a range deletion since its begin key is 0.
626 // SeekToFirst() references that table's reader and adds its range tombstone
627 // to the aggregator. Upon advancing beyond that table's key-range via Next(),
628 // the table reader will be unreferenced by the iterator. Since we manually
629 // call Evict() on all readers before the full scan, this unreference causes
630 // the reader's refcount to drop to zero and thus be destroyed.
632 // When it is destroyed, we do not remove its range deletions from the
633 // aggregator. So, subsequent calls to Next() must be able to use these
634 // deletions to decide whether a key is covered. This will work as long as
635 // the aggregator properly references the range deletion block.
636 const int kNum
= 25, kRangeBegin
= 0, kRangeEnd
= 7, kNumRanges
= 5;
637 Options opts
= CurrentOptions();
638 opts
.comparator
= test::Uint64Comparator();
639 opts
.level0_file_num_compaction_trigger
= 4;
640 opts
.level0_stop_writes_trigger
= 4;
641 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(1));
643 BlockBasedTableOptions bbto
;
644 bbto
.cache_index_and_filter_blocks
= true;
645 bbto
.block_cache
= NewLRUCache(8 << 20);
646 opts
.table_factory
.reset(NewBlockBasedTableFactory(bbto
));
647 DestroyAndReopen(opts
);
649 // Hold a snapshot so range deletions can't become obsolete during compaction
650 // to bottommost level (i.e., L1).
651 const Snapshot
* snapshot
= db_
->GetSnapshot();
652 for (int i
= 0; i
< kNum
; ++i
) {
653 ASSERT_OK(db_
->Put(WriteOptions(), GetNumericStr(i
), "val"));
655 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
657 if (i
>= kNum
/ 2 && i
< kNum
/ 2 + kNumRanges
) {
658 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
659 GetNumericStr(kRangeBegin
),
660 GetNumericStr(kRangeEnd
)));
663 // Must be > 1 so the first L1 file can be closed before scan finishes
664 ASSERT_OK(dbfull()->TEST_WaitForCompact());
665 ASSERT_GT(NumTableFilesAtLevel(1), 1);
666 std::vector
<uint64_t> file_numbers
= ListTableFiles(env_
, dbname_
);
668 ReadOptions read_opts
;
669 auto* iter
= db_
->NewIterator(read_opts
);
670 ASSERT_OK(iter
->status());
671 int expected
= kRangeEnd
;
673 for (auto file_number
: file_numbers
) {
674 // This puts table caches in the state of being externally referenced only
675 // so they are destroyed immediately upon iterator unreferencing.
676 TableCache::Evict(dbfull()->TEST_table_cache(), file_number
);
678 for (; iter
->Valid(); iter
->Next()) {
679 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
681 // Keep clearing block cache's LRU so range deletion block can be freed as
682 // soon as its refcount drops to zero.
683 bbto
.block_cache
->EraseUnRefEntries();
685 ASSERT_EQ(kNum
, expected
);
687 db_
->ReleaseSnapshot(snapshot
);
689 // Also test proper cache handling in GetRangeTombstoneIterator,
690 // via TablesRangeTombstoneSummary. (This once triggered memory leak
691 // report with ASAN.)
692 opts
.max_open_files
= 1;
696 ASSERT_OK(dbfull()->TablesRangeTombstoneSummary(db_
->DefaultColumnFamily(),
700 TEST_F(DBRangeDelTest
, GetCoveredKeyFromMutableMemtable
) {
702 DestroyAndReopen(CurrentOptions());
703 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
705 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
707 ReadOptions read_opts
;
709 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
710 } while (ChangeOptions(kRangeDelSkipConfigs
));
713 TEST_F(DBRangeDelTest
, GetCoveredKeyFromImmutableMemtable
) {
715 Options opts
= CurrentOptions();
716 opts
.max_write_buffer_number
= 3;
717 opts
.min_write_buffer_number_to_merge
= 2;
718 // SpecialSkipListFactory lets us specify maximum number of elements the
719 // memtable can hold. It switches the active memtable to immutable (flush is
720 // prevented by the above options) upon inserting an element that would
721 // overflow the memtable.
722 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(1));
723 DestroyAndReopen(opts
);
725 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
727 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
728 ASSERT_OK(db_
->Put(WriteOptions(), "blah", "val"));
730 ReadOptions read_opts
;
732 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
733 } while (ChangeOptions(kRangeDelSkipConfigs
));
736 TEST_F(DBRangeDelTest
, GetCoveredKeyFromSst
) {
738 DestroyAndReopen(CurrentOptions());
739 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
740 // snapshot prevents key from being deleted during flush
741 const Snapshot
* snapshot
= db_
->GetSnapshot();
743 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
744 ASSERT_OK(db_
->Flush(FlushOptions()));
746 ReadOptions read_opts
;
748 ASSERT_TRUE(db_
->Get(read_opts
, "key", &value
).IsNotFound());
749 db_
->ReleaseSnapshot(snapshot
);
750 } while (ChangeOptions(kRangeDelSkipConfigs
));
753 TEST_F(DBRangeDelTest
, GetCoveredMergeOperandFromMemtable
) {
754 const int kNumMergeOps
= 10;
755 Options opts
= CurrentOptions();
756 opts
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
759 for (int i
= 0; i
< kNumMergeOps
; ++i
) {
762 ASSERT_OK(db_
->Merge(WriteOptions(), "key", val
));
763 if (i
== kNumMergeOps
/ 2) {
765 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
770 ReadOptions read_opts
;
771 std::string expected
, actual
;
772 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
773 PutFixed64(&expected
, 30); // 6+7+8+9
774 ASSERT_EQ(expected
, actual
);
777 read_opts
.ignore_range_deletions
= true;
778 ASSERT_OK(db_
->Get(read_opts
, "key", &actual
));
779 PutFixed64(&expected
, 45); // 0+1+2+...+9
780 ASSERT_EQ(expected
, actual
);
783 TEST_F(DBRangeDelTest
, GetIgnoresRangeDeletions
) {
784 Options opts
= CurrentOptions();
785 opts
.max_write_buffer_number
= 4;
786 opts
.min_write_buffer_number_to_merge
= 3;
787 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(1));
790 ASSERT_OK(db_
->Put(WriteOptions(), "sst_key", "val"));
791 // snapshot prevents key from being deleted during flush
792 const Snapshot
* snapshot
= db_
->GetSnapshot();
794 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
795 ASSERT_OK(db_
->Flush(FlushOptions()));
796 ASSERT_OK(db_
->Put(WriteOptions(), "imm_key", "val"));
798 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
799 ASSERT_OK(db_
->Put(WriteOptions(), "mem_key", "val"));
801 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
803 ReadOptions read_opts
;
804 read_opts
.ignore_range_deletions
= true;
805 for (std::string key
: {"sst_key", "imm_key", "mem_key"}) {
807 ASSERT_OK(db_
->Get(read_opts
, key
, &value
));
809 db_
->ReleaseSnapshot(snapshot
);
812 TEST_F(DBRangeDelTest
, IteratorRemovesCoveredKeys
) {
813 const int kNum
= 200, kRangeBegin
= 50, kRangeEnd
= 150, kNumPerFile
= 25;
814 Options opts
= CurrentOptions();
815 opts
.comparator
= test::Uint64Comparator();
816 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
817 DestroyAndReopen(opts
);
819 // Write half of the keys before the tombstone and half after the tombstone.
820 // Only covered keys (i.e., within the range and older than the tombstone)
821 // should be deleted.
822 for (int i
= 0; i
< kNum
; ++i
) {
824 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
825 GetNumericStr(kRangeBegin
),
826 GetNumericStr(kRangeEnd
)));
828 ASSERT_OK(db_
->Put(WriteOptions(), GetNumericStr(i
), "val"));
830 ReadOptions read_opts
;
831 auto* iter
= db_
->NewIterator(read_opts
);
832 ASSERT_OK(iter
->status());
835 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
836 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
837 if (expected
== kRangeBegin
- 1) {
843 ASSERT_EQ(kNum
, expected
);
847 TEST_F(DBRangeDelTest
, IteratorOverUserSnapshot
) {
848 const int kNum
= 200, kRangeBegin
= 50, kRangeEnd
= 150, kNumPerFile
= 25;
849 Options opts
= CurrentOptions();
850 opts
.comparator
= test::Uint64Comparator();
851 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(kNumPerFile
));
852 DestroyAndReopen(opts
);
854 const Snapshot
* snapshot
= nullptr;
855 // Put a snapshot before the range tombstone, verify an iterator using that
856 // snapshot sees all inserted keys.
857 for (int i
= 0; i
< kNum
; ++i
) {
859 snapshot
= db_
->GetSnapshot();
860 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
861 GetNumericStr(kRangeBegin
),
862 GetNumericStr(kRangeEnd
)));
864 ASSERT_OK(db_
->Put(WriteOptions(), GetNumericStr(i
), "val"));
866 ReadOptions read_opts
;
867 read_opts
.snapshot
= snapshot
;
868 auto* iter
= db_
->NewIterator(read_opts
);
869 ASSERT_OK(iter
->status());
872 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
873 ASSERT_EQ(GetNumericStr(expected
), iter
->key());
876 ASSERT_EQ(kNum
/ 2, expected
);
878 db_
->ReleaseSnapshot(snapshot
);
881 TEST_F(DBRangeDelTest
, IteratorIgnoresRangeDeletions
) {
882 Options opts
= CurrentOptions();
883 opts
.max_write_buffer_number
= 4;
884 opts
.min_write_buffer_number_to_merge
= 3;
885 opts
.memtable_factory
.reset(test::NewSpecialSkipListFactory(1));
888 ASSERT_OK(db_
->Put(WriteOptions(), "sst_key", "val"));
889 // snapshot prevents key from being deleted during flush
890 const Snapshot
* snapshot
= db_
->GetSnapshot();
892 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
893 ASSERT_OK(db_
->Flush(FlushOptions()));
894 ASSERT_OK(db_
->Put(WriteOptions(), "imm_key", "val"));
896 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
897 ASSERT_OK(db_
->Put(WriteOptions(), "mem_key", "val"));
899 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
901 ReadOptions read_opts
;
902 read_opts
.ignore_range_deletions
= true;
903 auto* iter
= db_
->NewIterator(read_opts
);
904 ASSERT_OK(iter
->status());
906 std::string expected
[] = {"imm_key", "mem_key", "sst_key"};
907 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next(), ++i
) {
909 ASSERT_EQ(expected
[i
], iter
->key());
913 db_
->ReleaseSnapshot(snapshot
);
916 #ifndef ROCKSDB_UBSAN_RUN
917 TEST_F(DBRangeDelTest
, TailingIteratorRangeTombstoneUnsupported
) {
918 ASSERT_OK(db_
->Put(WriteOptions(), "key", "val"));
919 // snapshot prevents key from being deleted during flush
920 const Snapshot
* snapshot
= db_
->GetSnapshot();
922 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
924 // iterations check unsupported in memtable, l0, and then l1
925 for (int i
= 0; i
< 3; ++i
) {
926 ReadOptions read_opts
;
927 read_opts
.tailing
= true;
928 auto* iter
= db_
->NewIterator(read_opts
);
930 // For L1+, iterators over files are created on-demand, so need seek
933 ASSERT_TRUE(iter
->status().IsNotSupported());
937 ASSERT_OK(db_
->Flush(FlushOptions()));
942 db_
->ReleaseSnapshot(snapshot
);
944 #endif // !ROCKSDB_UBSAN_RUN
946 TEST_F(DBRangeDelTest
, SubcompactionHasEmptyDedicatedRangeDelFile
) {
947 const int kNumFiles
= 2, kNumKeysPerFile
= 4;
948 Options options
= CurrentOptions();
949 options
.compression
= kNoCompression
;
950 options
.disable_auto_compactions
= true;
951 options
.level0_file_num_compaction_trigger
= kNumFiles
;
952 options
.max_subcompactions
= 2;
953 options
.num_levels
= 2;
954 options
.target_file_size_base
= 4096;
957 // need a L1 file for subcompaction to be triggered
959 db_
->Put(WriteOptions(), db_
->DefaultColumnFamily(), Key(0), "val"));
960 ASSERT_OK(db_
->Flush(FlushOptions()));
963 // put enough keys to fill up the first subcompaction, and later range-delete
964 // them so that the first subcompaction outputs no key-values. In that case
965 // it'll consider making an SST file dedicated to range deletions.
966 for (int i
= 0; i
< kNumKeysPerFile
; ++i
) {
967 ASSERT_OK(db_
->Put(WriteOptions(), db_
->DefaultColumnFamily(), Key(i
),
968 std::string(1024, 'a')));
970 ASSERT_OK(db_
->Flush(FlushOptions()));
971 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
972 Key(kNumKeysPerFile
)));
974 // the above range tombstone can be dropped, so that one alone won't cause a
975 // dedicated file to be opened. We can make one protected by snapshot that
976 // must be considered. Make its range outside the first subcompaction's range
977 // to exercise the tricky part of the code.
978 const Snapshot
* snapshot
= db_
->GetSnapshot();
979 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
980 Key(kNumKeysPerFile
+ 1),
981 Key(kNumKeysPerFile
+ 2)));
982 ASSERT_OK(db_
->Flush(FlushOptions()));
984 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
985 ASSERT_EQ(1, NumTableFilesAtLevel(1));
987 ASSERT_OK(db_
->EnableAutoCompaction({db_
->DefaultColumnFamily()}));
988 ASSERT_OK(dbfull()->TEST_WaitForCompact());
989 db_
->ReleaseSnapshot(snapshot
);
992 TEST_F(DBRangeDelTest
, MemtableBloomFilter
) {
993 // regression test for #2743. the range delete tombstones in memtable should
994 // be added even when Get() skips searching due to its prefix bloom filter
995 const int kMemtableSize
= 1 << 20; // 1MB
996 const int kMemtablePrefixFilterSize
= 1 << 13; // 8KB
997 const int kNumKeys
= 1000;
998 const int kPrefixLen
= 8;
999 Options options
= CurrentOptions();
1000 options
.memtable_prefix_bloom_size_ratio
=
1001 static_cast<double>(kMemtablePrefixFilterSize
) / kMemtableSize
;
1002 options
.prefix_extractor
.reset(
1003 ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen
));
1004 options
.write_buffer_size
= kMemtableSize
;
1007 for (int i
= 0; i
< kNumKeys
; ++i
) {
1008 ASSERT_OK(Put(Key(i
), "val"));
1011 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1013 for (int i
= 0; i
< kNumKeys
; ++i
) {
1015 ASSERT_TRUE(db_
->Get(ReadOptions(), Key(i
), &value
).IsNotFound());
1019 TEST_F(DBRangeDelTest
, CompactionTreatsSplitInputLevelDeletionAtomically
) {
1020 // This test originally verified that compaction treated files containing a
1021 // split range deletion in the input level as an atomic unit. I.e.,
1022 // compacting any input-level file(s) containing a portion of the range
1023 // deletion causes all other input-level files containing portions of that
1024 // same range deletion to be included in the compaction. Range deletion
1025 // tombstones are now truncated to sstable boundaries which removed the need
1026 // for that behavior (which could lead to excessively large
1028 const int kNumFilesPerLevel
= 4, kValueBytes
= 4 << 10;
1029 Options options
= CurrentOptions();
1030 options
.compression
= kNoCompression
;
1031 options
.level0_file_num_compaction_trigger
= kNumFilesPerLevel
;
1032 options
.memtable_factory
.reset(
1033 test::NewSpecialSkipListFactory(2 /* num_entries_flush */));
1034 // max file size could be 2x of target file size, so set it to half of that
1035 options
.target_file_size_base
= kValueBytes
/ 2;
1036 // disable dynamic_file_size, as it will cut L1 files into more files (than
1037 // kNumFilesPerLevel).
1038 options
.level_compaction_dynamic_file_size
= false;
1039 options
.max_compaction_bytes
= 1500;
1040 // i == 0: CompactFiles
1041 // i == 1: CompactRange
1042 // i == 2: automatic compaction
1043 for (int i
= 0; i
< 3; ++i
) {
1044 DestroyAndReopen(options
);
1046 ASSERT_OK(Put(Key(0), ""));
1047 ASSERT_OK(db_
->Flush(FlushOptions()));
1048 MoveFilesToLevel(2);
1049 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1051 // snapshot protects range tombstone from dropping due to becoming obsolete.
1052 const Snapshot
* snapshot
= db_
->GetSnapshot();
1053 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1054 Key(0), Key(2 * kNumFilesPerLevel
)));
1057 std::string value
= rnd
.RandomString(kValueBytes
);
1058 for (int j
= 0; j
< kNumFilesPerLevel
; ++j
) {
1059 // give files overlapping key-ranges to prevent trivial move
1060 ASSERT_OK(Put(Key(j
), value
));
1061 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel
- 1 - j
), value
));
1063 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
1064 ASSERT_EQ(j
, NumTableFilesAtLevel(0));
1067 // put extra key to trigger final flush
1068 ASSERT_OK(Put("", ""));
1069 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
1070 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1071 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1072 ASSERT_EQ(kNumFilesPerLevel
, NumTableFilesAtLevel(1));
1074 ColumnFamilyMetaData meta
;
1075 db_
->GetColumnFamilyMetaData(&meta
);
1077 ASSERT_OK(db_
->CompactFiles(
1078 CompactionOptions(), {meta
.levels
[1].files
[0].name
}, 2 /* level */));
1079 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1080 } else if (i
== 1) {
1081 auto begin_str
= Key(0), end_str
= Key(1);
1082 Slice begin
= begin_str
, end
= end_str
;
1083 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), &begin
, &end
));
1084 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1085 } else if (i
== 2) {
1086 ASSERT_OK(db_
->SetOptions(db_
->DefaultColumnFamily(),
1087 {{"max_bytes_for_level_base", "10000"}}));
1088 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1089 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1091 ASSERT_GT(NumTableFilesAtLevel(2), 0);
1093 db_
->ReleaseSnapshot(snapshot
);
1097 TEST_F(DBRangeDelTest
, RangeTombstoneEndKeyAsSstableUpperBound
) {
1098 // Test the handling of the range-tombstone end-key as the
1099 // upper-bound for an sstable.
1101 const int kNumFilesPerLevel
= 2, kValueBytes
= 4 << 10;
1102 Options options
= CurrentOptions();
1103 options
.compression
= kNoCompression
;
1104 options
.level0_file_num_compaction_trigger
= kNumFilesPerLevel
;
1105 options
.memtable_factory
.reset(
1106 test::NewSpecialSkipListFactory(2 /* num_entries_flush */));
1107 options
.target_file_size_base
= kValueBytes
;
1108 options
.disable_auto_compactions
= true;
1109 // disable it for now, otherwise the L1 files are going be cut before data 1:
1112 // because the grandparent file is between [0]->[1] and it's size is more than
1113 // 1/8 of target size (4k).
1114 options
.level_compaction_dynamic_file_size
= false;
1116 DestroyAndReopen(options
);
1118 // Create an initial sstable at L2:
1119 // [key000000#1,1, key000000#1,1]
1120 ASSERT_OK(Put(Key(0), ""));
1121 ASSERT_OK(db_
->Flush(FlushOptions()));
1122 MoveFilesToLevel(2);
1123 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1125 // A snapshot protects the range tombstone from dropping due to
1126 // becoming obsolete.
1127 const Snapshot
* snapshot
= db_
->GetSnapshot();
1128 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1129 Key(2 * kNumFilesPerLevel
)));
1131 // Create 2 additional sstables in L0. Note that the first sstable
1132 // contains the range tombstone.
1133 // [key000000#3,1, key000004#72057594037927935,15]
1134 // [key000001#5,1, key000002#6,1]
1136 std::string value
= rnd
.RandomString(kValueBytes
);
1137 for (int j
= 0; j
< kNumFilesPerLevel
; ++j
) {
1138 // Give files overlapping key-ranges to prevent a trivial move when we
1139 // compact from L0 to L1.
1140 ASSERT_OK(Put(Key(j
), value
));
1141 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel
- 1 - j
), value
));
1142 ASSERT_OK(db_
->Flush(FlushOptions()));
1143 ASSERT_EQ(j
+ 1, NumTableFilesAtLevel(0));
1145 // Compact the 2 L0 sstables to L1, resulting in the following LSM. There
1146 // are 2 sstables generated in L1 due to the target_file_size_base setting.
1148 // [key000000#3,1, key000002#72057594037927935,15]
1149 // [key000002#6,1, key000004#72057594037927935,15]
1151 // [key000000#1,1, key000000#1,1]
1152 MoveFilesToLevel(1);
1153 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1156 // Compact the second sstable in L1:
1158 // [key000000#3,1, key000002#72057594037927935,15]
1160 // [key000000#1,1, key000000#1,1]
1161 // [key000002#6,1, key000004#72057594037927935,15]
1163 // At the same time, verify the compaction does not cause the key at the
1164 // endpoint (key000002#6,1) to disappear.
1165 ASSERT_EQ(value
, Get(Key(2)));
1166 auto begin_str
= Key(3);
1167 const ROCKSDB_NAMESPACE::Slice begin
= begin_str
;
1168 ASSERT_OK(dbfull()->TEST_CompactRange(1, &begin
, nullptr));
1169 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1170 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1171 ASSERT_EQ(value
, Get(Key(2)));
1175 // Compact the first sstable in L1. This should be copacetic, but
1176 // was previously resulting in overlapping sstables in L2 due to
1177 // mishandling of the range tombstone end-key when used as the
1178 // largest key for an sstable. The resulting LSM structure should
1182 // [key000000#1,1, key000001#72057594037927935,15]
1183 // [key000001#5,1, key000002#72057594037927935,15]
1184 // [key000002#6,1, key000004#72057594037927935,15]
1185 auto begin_str
= Key(0);
1186 const ROCKSDB_NAMESPACE::Slice begin
= begin_str
;
1187 ASSERT_OK(dbfull()->TEST_CompactRange(1, &begin
, &begin
));
1188 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1189 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1192 db_
->ReleaseSnapshot(snapshot
);
1195 TEST_F(DBRangeDelTest
, UnorderedTombstones
) {
1196 // Regression test for #2752. Range delete tombstones between
1197 // different snapshot stripes are not stored in order, so the first
1198 // tombstone of each snapshot stripe should be checked as a smallest
1200 Options options
= CurrentOptions();
1201 DestroyAndReopen(options
);
1203 auto cf
= db_
->DefaultColumnFamily();
1205 ASSERT_OK(db_
->Put(WriteOptions(), cf
, "a", "a"));
1206 ASSERT_OK(db_
->Flush(FlushOptions(), cf
));
1207 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1208 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
1209 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1211 ASSERT_OK(db_
->DeleteRange(WriteOptions(), cf
, "b", "c"));
1212 // Hold a snapshot to separate these two delete ranges.
1213 auto snapshot
= db_
->GetSnapshot();
1214 ASSERT_OK(db_
->DeleteRange(WriteOptions(), cf
, "a", "b"));
1215 ASSERT_OK(db_
->Flush(FlushOptions(), cf
));
1216 db_
->ReleaseSnapshot(snapshot
);
1218 std::vector
<std::vector
<FileMetaData
>> files
;
1219 dbfull()->TEST_GetFilesMetaData(cf
, &files
);
1220 ASSERT_EQ(1, files
[0].size());
1221 ASSERT_EQ("a", files
[0][0].smallest
.user_key());
1222 ASSERT_EQ("c", files
[0][0].largest
.user_key());
1225 auto s
= db_
->Get(ReadOptions(), "a", &v
);
1226 ASSERT_TRUE(s
.IsNotFound());
1229 class MockMergeOperator
: public MergeOperator
{
1230 // Mock non-associative operator. Non-associativity is expressed by lack of
1231 // implementation for any `PartialMerge*` functions.
1233 bool FullMergeV2(const MergeOperationInput
& merge_in
,
1234 MergeOperationOutput
* merge_out
) const override
{
1235 assert(merge_out
!= nullptr);
1236 merge_out
->new_value
= merge_in
.operand_list
.back().ToString();
1240 const char* Name() const override
{ return "MockMergeOperator"; }
1243 TEST_F(DBRangeDelTest
, KeyAtOverlappingEndpointReappears
) {
1244 // This test uses a non-associative merge operator since that is a convenient
1245 // way to get compaction to write out files with overlapping user-keys at the
1246 // endpoints. Note, however, overlapping endpoints can also occur with other
1247 // value types (Put, etc.), assuming the right snapshots are present.
1248 const int kFileBytes
= 1 << 20;
1249 const int kValueBytes
= 1 << 10;
1250 const int kNumFiles
= 4;
1252 Options options
= CurrentOptions();
1253 options
.compression
= kNoCompression
;
1254 options
.disable_auto_compactions
= true;
1255 options
.merge_operator
.reset(new MockMergeOperator());
1256 options
.target_file_size_base
= kFileBytes
;
1259 // Push dummy data to L3 so that our actual test files on L0-L2
1260 // will not be considered "bottommost" level, otherwise compaction
1261 // may prevent us from creating overlapping user keys
1262 // as on the bottommost layer MergeHelper
1263 ASSERT_OK(db_
->Merge(WriteOptions(), "key", "dummy"));
1264 ASSERT_OK(db_
->Flush(FlushOptions()));
1265 MoveFilesToLevel(3);
1268 const Snapshot
* snapshot
= nullptr;
1269 for (int i
= 0; i
< kNumFiles
; ++i
) {
1270 for (int j
= 0; j
< kFileBytes
/ kValueBytes
; ++j
) {
1271 auto value
= rnd
.RandomString(kValueBytes
);
1272 ASSERT_OK(db_
->Merge(WriteOptions(), "key", value
));
1274 if (i
== kNumFiles
- 1) {
1275 // Take snapshot to prevent covered merge operands from being dropped by
1277 snapshot
= db_
->GetSnapshot();
1278 // The DeleteRange is the last write so all merge operands are covered.
1279 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1282 ASSERT_OK(db_
->Flush(FlushOptions()));
1284 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
1286 ASSERT_TRUE(db_
->Get(ReadOptions(), "key", &value
).IsNotFound());
1288 ASSERT_OK(dbfull()->TEST_CompactRange(
1289 0 /* level */, nullptr /* begin */, nullptr /* end */,
1290 nullptr /* column_family */, true /* disallow_trivial_move */));
1291 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1292 // Now we have multiple files at L1 all containing a single user key, thus
1293 // guaranteeing overlap in the file endpoints.
1294 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1296 // Verify no merge operands reappeared after the compaction.
1297 ASSERT_TRUE(db_
->Get(ReadOptions(), "key", &value
).IsNotFound());
1299 // Compact and verify again. It's worthwhile because now the files have
1300 // tighter endpoints, so we can verify that doesn't mess anything up.
1301 ASSERT_OK(dbfull()->TEST_CompactRange(
1302 1 /* level */, nullptr /* begin */, nullptr /* end */,
1303 nullptr /* column_family */, true /* disallow_trivial_move */));
1304 ASSERT_GT(NumTableFilesAtLevel(2), 1);
1305 ASSERT_TRUE(db_
->Get(ReadOptions(), "key", &value
).IsNotFound());
1307 db_
->ReleaseSnapshot(snapshot
);
1310 TEST_F(DBRangeDelTest
, UntruncatedTombstoneDoesNotDeleteNewerKey
) {
1311 // Verify a key newer than a range tombstone cannot be deleted by being
1312 // compacted to the bottom level (and thus having its seqnum zeroed) before
1313 // the range tombstone. This used to happen when range tombstones were
1314 // untruncated on reads such that they extended past their file boundaries.
1318 // - L1 is bottommost.
1319 // - A couple snapshots are strategically taken to prevent seqnums from being
1320 // zeroed, range tombstone from being dropped, merge operands from being
1321 // dropped, and merge operands from being combined.
1322 // - Left half of files in L1 all have same user key, ensuring their file
1323 // boundaries overlap. In the past this would cause range tombstones to be
1325 // - Right half of L1 files all have different keys, ensuring no overlap.
1326 // - A range tombstone spans all L1 keys, so it is stored in every L1 file.
1327 // - Keys in the right side of the key-range are overwritten. These are
1328 // compacted down to L1 after releasing snapshots such that their seqnums
1330 // - A full range scan is performed. If the tombstone in the left L1 files
1331 // were untruncated, it would now cover keys newer than it (but with zeroed
1332 // seqnums) in the right L1 files.
1333 const int kFileBytes
= 1 << 20;
1334 const int kValueBytes
= 1 << 10;
1335 const int kNumFiles
= 4;
1336 const int kMaxKey
= kNumFiles
* kFileBytes
/ kValueBytes
;
1337 const int kKeysOverwritten
= 10;
1339 Options options
= CurrentOptions();
1340 options
.compression
= kNoCompression
;
1341 options
.disable_auto_compactions
= true;
1342 options
.merge_operator
.reset(new MockMergeOperator());
1343 options
.num_levels
= 2;
1344 options
.target_file_size_base
= kFileBytes
;
1348 // - snapshots[0] prevents merge operands from being combined during
1350 // - snapshots[1] prevents merge operands from being dropped due to the
1351 // covering range tombstone.
1352 const Snapshot
* snapshots
[] = {nullptr, nullptr};
1353 for (int i
= 0; i
< kNumFiles
; ++i
) {
1354 for (int j
= 0; j
< kFileBytes
/ kValueBytes
; ++j
) {
1355 auto value
= rnd
.RandomString(kValueBytes
);
1357 if (i
< kNumFiles
/ 2) {
1360 key
= Key(1 + i
* kFileBytes
/ kValueBytes
+ j
);
1362 ASSERT_OK(db_
->Merge(WriteOptions(), key
, value
));
1365 snapshots
[0] = db_
->GetSnapshot();
1367 if (i
== kNumFiles
- 1) {
1368 snapshots
[1] = db_
->GetSnapshot();
1369 // The DeleteRange is the last write so all merge operands are covered.
1370 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1371 Key(0), Key(kMaxKey
+ 1)));
1373 ASSERT_OK(db_
->Flush(FlushOptions()));
1375 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
1377 auto get_key_count
= [this]() -> int {
1378 auto* iter
= db_
->NewIterator(ReadOptions());
1379 assert(iter
->status().ok());
1380 iter
->SeekToFirst();
1382 for (; iter
->Valid(); iter
->Next()) {
1389 // All keys should be covered
1390 ASSERT_EQ(0, get_key_count());
1392 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1393 nullptr /* end_key */));
1394 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1395 // Roughly the left half of L1 files should have overlapping boundary keys,
1396 // while the right half should not.
1397 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles
);
1399 // Now overwrite a few keys that are in L1 files that definitely don't have
1400 // overlapping boundary keys.
1401 for (int i
= kMaxKey
; i
> kMaxKey
- kKeysOverwritten
; --i
) {
1402 auto value
= rnd
.RandomString(kValueBytes
);
1403 ASSERT_OK(db_
->Merge(WriteOptions(), Key(i
), value
));
1405 ASSERT_OK(db_
->Flush(FlushOptions()));
1407 // The overwritten keys are in L0 now, so clearly aren't covered by the range
1409 ASSERT_EQ(kKeysOverwritten
, get_key_count());
1411 // Release snapshots so seqnums can be zeroed when L0->L1 happens.
1412 db_
->ReleaseSnapshot(snapshots
[0]);
1413 db_
->ReleaseSnapshot(snapshots
[1]);
1415 auto begin_key_storage
= Key(kMaxKey
- kKeysOverwritten
+ 1);
1416 auto end_key_storage
= Key(kMaxKey
);
1417 Slice
begin_key(begin_key_storage
);
1418 Slice
end_key(end_key_storage
);
1419 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), &begin_key
, &end_key
));
1420 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1421 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles
);
1423 ASSERT_EQ(kKeysOverwritten
, get_key_count());
1426 TEST_F(DBRangeDelTest
, DeletedMergeOperandReappearsIterPrev
) {
1427 // Exposes a bug where we were using
1428 // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands
1429 // in the forward direction. Confusingly, this case happened during
1430 // `DBIter::Prev`. It could cause assertion failure, or reappearing keys.
1431 const int kFileBytes
= 1 << 20;
1432 const int kValueBytes
= 1 << 10;
1433 // Need multiple keys so we can get results when calling `Prev()` after
1435 const int kNumKeys
= 3;
1436 const int kNumFiles
= 4;
1438 Options options
= CurrentOptions();
1439 options
.compression
= kNoCompression
;
1440 options
.disable_auto_compactions
= true;
1441 options
.merge_operator
.reset(new MockMergeOperator());
1442 options
.target_file_size_base
= kFileBytes
;
1446 const Snapshot
* snapshot
= nullptr;
1447 for (int i
= 0; i
< kNumFiles
; ++i
) {
1448 for (int j
= 0; j
< kFileBytes
/ kValueBytes
; ++j
) {
1449 auto value
= rnd
.RandomString(kValueBytes
);
1450 ASSERT_OK(db_
->Merge(WriteOptions(), Key(j
% kNumKeys
), value
));
1451 if (i
== 0 && j
== kNumKeys
) {
1452 // Take snapshot to prevent covered merge operands from being dropped or
1453 // merged by compaction.
1454 snapshot
= db_
->GetSnapshot();
1455 // Do a DeleteRange near the beginning so only the oldest merge operand
1456 // for each key is covered. This ensures the sequence of events:
1458 // - `DBIter::Prev()` is called
1459 // - After several same versions of the same user key are encountered,
1460 // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`.
1461 // - Binary searches to the newest version of the key, which is in the
1462 // leftmost file containing the user key.
1463 // - Scans forwards to collect all merge operands. Eventually reaches
1464 // the rightmost file containing the oldest merge operand, which
1465 // should be covered by the `DeleteRange`. If `RangeDelAggregator`
1466 // were not properly using `kForwardTraversal` here, that operand
1468 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1469 Key(0), Key(kNumKeys
+ 1)));
1472 ASSERT_OK(db_
->Flush(FlushOptions()));
1474 ASSERT_EQ(kNumFiles
, NumTableFilesAtLevel(0));
1476 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1477 nullptr /* end_key */));
1478 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1479 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1481 auto* iter
= db_
->NewIterator(ReadOptions());
1482 ASSERT_OK(iter
->status());
1485 for (; iter
->Valid(); iter
->Prev()) {
1489 ASSERT_EQ(kNumKeys
, keys_found
);
1491 db_
->ReleaseSnapshot(snapshot
);
1494 TEST_F(DBRangeDelTest
, SnapshotPreventsDroppedKeys
) {
1495 const int kFileBytes
= 1 << 20;
1497 Options options
= CurrentOptions();
1498 options
.compression
= kNoCompression
;
1499 options
.disable_auto_compactions
= true;
1500 options
.target_file_size_base
= kFileBytes
;
1503 ASSERT_OK(Put(Key(0), "a"));
1504 const Snapshot
* snapshot
= db_
->GetSnapshot();
1506 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1509 ASSERT_OK(db_
->Flush(FlushOptions()));
1511 ReadOptions read_opts
;
1512 read_opts
.snapshot
= snapshot
;
1513 auto* iter
= db_
->NewIterator(read_opts
);
1514 ASSERT_OK(iter
->status());
1516 iter
->SeekToFirst();
1517 ASSERT_TRUE(iter
->Valid());
1518 ASSERT_EQ(Key(0), iter
->key());
1521 ASSERT_FALSE(iter
->Valid());
1524 db_
->ReleaseSnapshot(snapshot
);
1527 TEST_F(DBRangeDelTest
, SnapshotPreventsDroppedKeysInImmMemTables
) {
1528 const int kFileBytes
= 1 << 20;
1530 Options options
= CurrentOptions();
1531 options
.compression
= kNoCompression
;
1532 options
.disable_auto_compactions
= true;
1533 options
.target_file_size_base
= kFileBytes
;
1536 // block flush thread -> pin immtables in memory
1537 SyncPoint::GetInstance()->DisableProcessing();
1538 SyncPoint::GetInstance()->LoadDependency({
1539 {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator",
1540 "DBImpl::BGWorkFlush"},
1542 SyncPoint::GetInstance()->EnableProcessing();
1544 ASSERT_OK(Put(Key(0), "a"));
1545 std::unique_ptr
<const Snapshot
, std::function
<void(const Snapshot
*)>>
1546 snapshot(db_
->GetSnapshot(),
1547 [this](const Snapshot
* s
) { db_
->ReleaseSnapshot(s
); });
1549 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1552 ASSERT_OK(dbfull()->TEST_SwitchMemtable());
1554 ReadOptions read_opts
;
1555 read_opts
.snapshot
= snapshot
.get();
1556 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_opts
));
1557 ASSERT_OK(iter
->status());
1559 TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator");
1561 iter
->SeekToFirst();
1562 ASSERT_TRUE(iter
->Valid());
1563 ASSERT_EQ(Key(0), iter
->key());
1566 ASSERT_FALSE(iter
->Valid());
1569 TEST_F(DBRangeDelTest
, RangeTombstoneWrittenToMinimalSsts
) {
1571 // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398.
1572 // Regression test for issue where range tombstone was written to more files
1573 // than necessary when it began exactly at the begin key in the next
1574 // compaction output file.
1575 const int kFileBytes
= 1 << 20;
1576 const int kValueBytes
= 4 << 10;
1577 Options options
= CurrentOptions();
1578 options
.compression
= kNoCompression
;
1579 options
.disable_auto_compactions
= true;
1580 // Have a bit of slack in the size limits but we enforce them more strictly
1581 // when manually flushing/compacting.
1582 options
.max_compaction_bytes
= 2 * kFileBytes
;
1583 options
.target_file_size_base
= 2 * kFileBytes
;
1584 options
.write_buffer_size
= 2 * kFileBytes
;
1588 for (char first_char
: {'a', 'b', 'c'}) {
1589 for (int i
= 0; i
< kFileBytes
/ kValueBytes
; ++i
) {
1590 std::string
key(1, first_char
);
1592 std::string value
= rnd
.RandomString(kValueBytes
);
1593 ASSERT_OK(Put(key
, value
));
1595 ASSERT_OK(db_
->Flush(FlushOptions()));
1596 MoveFilesToLevel(2);
1598 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1599 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1601 // Populate the memtable lightly while spanning the whole key-space. The
1602 // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple
1603 // files to prevent a large L1->L2 compaction later.
1604 ASSERT_OK(Put("a", "val"));
1605 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1606 "c" + Key(1), "d"));
1607 // Our compaction output file cutting logic currently only considers point
1608 // keys. So, in order for the range tombstone to have a chance at landing at
1609 // the start of a new file, we need a point key at the range tombstone's
1611 // TODO(ajkr): remove this `Put` after file cutting accounts for range
1612 // tombstones (#3977).
1613 ASSERT_OK(Put("c" + Key(1), "value"));
1614 ASSERT_OK(db_
->Flush(FlushOptions()));
1616 // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone
1617 // and the range tombstone is only placed in the second SST.
1618 std::string
begin_key_storage("c" + Key(1));
1619 Slice
begin_key(begin_key_storage
);
1620 std::string
end_key_storage("d");
1621 Slice
end_key(end_key_storage
);
1622 ASSERT_OK(dbfull()->TEST_CompactRange(
1623 0 /* level */, &begin_key
/* begin */, &end_key
/* end */,
1624 nullptr /* column_family */, true /* disallow_trivial_move */));
1625 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1627 std::vector
<LiveFileMetaData
> all_metadata
;
1628 std::vector
<LiveFileMetaData
> l1_metadata
;
1629 db_
->GetLiveFilesMetaData(&all_metadata
);
1630 for (const auto& metadata
: all_metadata
) {
1631 if (metadata
.level
== 1) {
1632 l1_metadata
.push_back(metadata
);
1635 std::sort(l1_metadata
.begin(), l1_metadata
.end(),
1636 [&](const LiveFileMetaData
& a
, const LiveFileMetaData
& b
) {
1637 return options
.comparator
->Compare(a
.smallestkey
, b
.smallestkey
) <
1640 ASSERT_EQ("a", l1_metadata
[0].smallestkey
);
1641 ASSERT_EQ("a", l1_metadata
[0].largestkey
);
1642 ASSERT_EQ("c" + Key(1), l1_metadata
[1].smallestkey
);
1643 ASSERT_EQ("d", l1_metadata
[1].largestkey
);
1645 TablePropertiesCollection all_table_props
;
1646 ASSERT_OK(db_
->GetPropertiesOfAllTables(&all_table_props
));
1647 int64_t num_range_deletions
= 0;
1648 for (const auto& name_and_table_props
: all_table_props
) {
1649 const auto& name
= name_and_table_props
.first
;
1650 const auto& table_props
= name_and_table_props
.second
;
1651 // The range tombstone should only be output to the second L1 SST.
1652 if (name
.size() >= l1_metadata
[1].name
.size() &&
1653 name
.substr(name
.size() - l1_metadata
[1].name
.size())
1654 .compare(l1_metadata
[1].name
) == 0) {
1655 ASSERT_EQ(1, table_props
->num_range_deletions
);
1656 ++num_range_deletions
;
1658 ASSERT_EQ(0, table_props
->num_range_deletions
);
1661 ASSERT_EQ(1, num_range_deletions
);
1664 TEST_F(DBRangeDelTest
, OverlappedTombstones
) {
1665 const int kNumPerFile
= 4, kNumFiles
= 2;
1666 Options options
= CurrentOptions();
1667 options
.disable_auto_compactions
= true;
1668 options
.target_file_size_base
= 9 * 1024;
1669 options
.max_compaction_bytes
= 9 * 1024;
1670 DestroyAndReopen(options
);
1672 for (int i
= 0; i
< kNumFiles
; ++i
) {
1673 std::vector
<std::string
> values
;
1674 // Write 12K (4 values, each 3K)
1675 for (int j
= 0; j
< kNumPerFile
; j
++) {
1676 values
.push_back(rnd
.RandomString(3 << 10));
1677 ASSERT_OK(Put(Key(i
* kNumPerFile
+ j
), values
[j
]));
1680 ASSERT_OK(db_
->Flush(FlushOptions()));
1681 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1682 MoveFilesToLevel(2);
1683 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1685 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(1),
1686 Key((kNumFiles
)*kNumPerFile
+ 1)));
1687 ASSERT_OK(db_
->Flush(FlushOptions()));
1689 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1691 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1692 true /* disallow_trivial_move */));
1694 // The tombstone range is not broken up into multiple SSTs which may incur a
1695 // large compaction with L2.
1696 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1697 std::vector
<std::vector
<FileMetaData
>> files
;
1698 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1699 true /* disallow_trivial_move */));
1700 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1701 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1704 TEST_F(DBRangeDelTest
, OverlappedKeys
) {
1705 const int kNumPerFile
= 4, kNumFiles
= 2;
1706 Options options
= CurrentOptions();
1707 options
.disable_auto_compactions
= true;
1708 options
.target_file_size_base
= 9 * 1024;
1709 options
.max_compaction_bytes
= 9 * 1024;
1710 DestroyAndReopen(options
);
1712 for (int i
= 0; i
< kNumFiles
; ++i
) {
1713 std::vector
<std::string
> values
;
1714 // Write 12K (4 values, each 3K)
1715 for (int j
= 0; j
< kNumPerFile
; j
++) {
1716 values
.push_back(rnd
.RandomString(3 << 10));
1717 ASSERT_OK(Put(Key(i
* kNumPerFile
+ j
), values
[j
]));
1720 ASSERT_OK(db_
->Flush(FlushOptions()));
1721 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1722 MoveFilesToLevel(2);
1723 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1725 for (int i
= 1; i
< kNumFiles
* kNumPerFile
+ 1; i
++) {
1726 ASSERT_OK(Put(Key(i
), "0x123"));
1728 ASSERT_OK(db_
->Flush(FlushOptions()));
1729 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1731 // The key range is broken up into three SSTs to avoid a future big compaction
1732 // with the grandparent
1733 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1734 true /* disallow_trivial_move */));
1735 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1737 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1738 true /* disallow_trivial_move */));
1739 // L1->L2 compaction size is limited to max_compaction_bytes
1740 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1741 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1744 TEST_F(DBRangeDelTest
, IteratorRefresh
) {
1745 // Refreshing an iterator after a range tombstone is added should cause the
1746 // deleted range of keys to disappear.
1747 for (bool sv_changed
: {false, true}) {
1748 ASSERT_OK(db_
->Put(WriteOptions(), "key1", "value1"));
1749 ASSERT_OK(db_
->Put(WriteOptions(), "key2", "value2"));
1751 auto* iter
= db_
->NewIterator(ReadOptions());
1752 ASSERT_OK(iter
->status());
1754 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(),
1758 ASSERT_OK(db_
->Flush(FlushOptions()));
1761 ASSERT_OK(iter
->Refresh());
1762 ASSERT_OK(iter
->status());
1763 iter
->SeekToFirst();
1764 ASSERT_EQ("key1", iter
->key());
1766 ASSERT_FALSE(iter
->Valid());
1772 void VerifyIteratorReachesEnd(InternalIterator
* iter
) {
1773 ASSERT_TRUE(!iter
->Valid() && iter
->status().ok());
1776 void VerifyIteratorReachesEnd(Iterator
* iter
) {
1777 ASSERT_TRUE(!iter
->Valid() && iter
->status().ok());
1780 TEST_F(DBRangeDelTest
, IteratorReseek
) {
1781 // Range tombstone triggers reseek (seeking to a range tombstone end key) in
1782 // merging iterator. Test set up:
1783 // one memtable: range tombstone [0, 1)
1784 // one immutable memtable: range tombstone [1, 2)
1785 // one L0 file with range tombstone [2, 3)
1786 // one L1 file with range tombstone [3, 4)
1787 // Seek(0) should trigger cascading reseeks at all levels below memtable.
1788 // Seek(1) should trigger cascading reseeks at all levels below immutable
1789 // memtable. SeekToFirst and SeekToLast trigger no reseek.
1790 Options options
= CurrentOptions();
1791 options
.compression
= kNoCompression
;
1792 options
.disable_auto_compactions
= true;
1794 DestroyAndReopen(options
);
1796 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(3),
1798 ASSERT_OK(db_
->Flush(FlushOptions()));
1799 MoveFilesToLevel(1);
1800 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1802 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
1804 ASSERT_OK(db_
->Flush(FlushOptions()));
1805 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1806 // Immutable memtable
1807 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(1),
1809 ASSERT_OK(static_cast_with_check
<DBImpl
>(db_
)->TEST_SwitchMemtable());
1811 ASSERT_TRUE(dbfull()->GetProperty(db_
->DefaultColumnFamily(),
1812 "rocksdb.num-immutable-mem-table", &value
));
1813 ASSERT_EQ(1, std::stoi(value
));
1815 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1817 // this memtable is still active
1818 ASSERT_TRUE(dbfull()->GetProperty(db_
->DefaultColumnFamily(),
1819 "rocksdb.num-immutable-mem-table", &value
));
1820 ASSERT_EQ(1, std::stoi(value
));
1822 auto iter
= db_
->NewIterator(ReadOptions());
1823 get_perf_context()->Reset();
1825 // Reseeked immutable memtable, L0 and L1
1826 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 3);
1827 VerifyIteratorReachesEnd(iter
);
1828 get_perf_context()->Reset();
1829 iter
->SeekForPrev(Key(1));
1830 // Reseeked L0 and L1
1831 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1832 VerifyIteratorReachesEnd(iter
);
1833 get_perf_context()->Reset();
1834 iter
->SeekToFirst();
1835 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 0);
1836 VerifyIteratorReachesEnd(iter
);
1838 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 0);
1839 VerifyIteratorReachesEnd(iter
);
1843 TEST_F(DBRangeDelTest
, ReseekDuringNextAndPrev
) {
1844 // Range tombstone triggers reseek during Next()/Prev() in merging iterator.
1846 // memtable has: [0, 1) [2, 3)
1849 // Seek(0) will reseek to 1 for L0 and L1. Seek(1) will not trigger any
1850 // reseek. Then Next() determines 2 is covered by [2, 3), it will try to
1851 // reseek to 3 for L0 and L1. Similar story for Prev() and SeekForPrev() is
1853 Options options
= CurrentOptions();
1854 options
.compression
= kNoCompression
;
1855 options
.disable_auto_compactions
= true;
1857 DestroyAndReopen(options
);
1859 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), "foo"));
1860 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
1861 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), "foo"));
1862 ASSERT_OK(db_
->Flush(FlushOptions()));
1863 MoveFilesToLevel(1);
1864 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1867 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
1868 ASSERT_OK(db_
->Flush(FlushOptions()));
1869 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1872 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1874 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
1877 auto iter
= db_
->NewIterator(ReadOptions());
1878 auto iter_test_forward
= [&] {
1879 ASSERT_TRUE(iter
->Valid());
1880 ASSERT_EQ(iter
->key().ToString(), Key(1));
1882 get_perf_context()->Reset();
1884 ASSERT_TRUE(iter
->Valid());
1885 ASSERT_EQ(iter
->key().ToString(), Key(3));
1886 // Reseeked L0 and L1
1887 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1890 get_perf_context()->Reset();
1892 ASSERT_TRUE(iter
->Valid());
1893 ASSERT_EQ(iter
->key().ToString(), Key(1));
1894 // Reseeked L0 and L1
1895 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1898 get_perf_context()->Reset();
1900 ASSERT_TRUE(iter
->Valid());
1901 ASSERT_EQ(iter
->key().ToString(), Key(3));
1902 // Reseeked L0 and L1
1903 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1906 VerifyIteratorReachesEnd(iter
);
1909 get_perf_context()->Reset();
1911 // Reseeked L0 and L1
1912 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1913 iter_test_forward();
1914 get_perf_context()->Reset();
1916 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 0);
1917 iter_test_forward();
1919 get_perf_context()->Reset();
1920 iter
->SeekForPrev(Key(2));
1921 // Reseeked L0 and L1
1922 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1923 iter_test_forward();
1924 get_perf_context()->Reset();
1925 iter
->SeekForPrev(Key(1));
1926 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 0);
1927 iter_test_forward();
1929 get_perf_context()->Reset();
1930 iter
->SeekToFirst();
1931 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 0);
1932 iter_test_forward();
1936 iter_test_forward();
1940 TEST_F(DBRangeDelTest
, TombstoneFromCurrentLevel
) {
1941 // Range tombstone triggers reseek when covering key from the same level.
1942 // in merging iterator. Test set up:
1943 // memtable has: [0, 1)
1944 // L0 has: [2, 3), 2
1946 // Seek(0) will reseek to 1 for L0 and L1.
1947 // Then Next() will reseek to 3 for L1 since 2 in L0 is covered by [2, 3) in
1949 Options options
= CurrentOptions();
1950 options
.compression
= kNoCompression
;
1951 options
.disable_auto_compactions
= true;
1953 DestroyAndReopen(options
);
1955 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), "foo"));
1956 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
1957 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), "foo"));
1958 ASSERT_OK(db_
->Flush(FlushOptions()));
1959 MoveFilesToLevel(1);
1960 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1963 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
1964 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
1966 ASSERT_OK(db_
->Flush(FlushOptions()));
1967 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1970 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(0),
1973 auto iter
= db_
->NewIterator(ReadOptions());
1974 get_perf_context()->Reset();
1976 ASSERT_TRUE(iter
->Valid());
1977 ASSERT_EQ(iter
->key().ToString(), Key(1));
1978 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 2);
1980 get_perf_context()->Reset();
1982 ASSERT_TRUE(iter
->Valid());
1983 ASSERT_EQ(iter
->key().ToString(), Key(3));
1984 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 1);
1989 class TombstoneTestSstPartitioner
: public SstPartitioner
{
1991 const char* Name() const override
{ return "SingleKeySstPartitioner"; }
1993 PartitionerResult
ShouldPartition(
1994 const PartitionerRequest
& request
) override
{
1995 if (cmp
->Compare(*request
.current_user_key
, DBTestBase::Key(5)) == 0) {
1998 return kNotRequired
;
2002 bool CanDoTrivialMove(const Slice
& /*smallest_user_key*/,
2003 const Slice
& /*largest_user_key*/) override
{
2007 const Comparator
* cmp
= BytewiseComparator();
2010 class TombstoneTestSstPartitionerFactory
: public SstPartitionerFactory
{
2012 static const char* kClassName() {
2013 return "TombstoneTestSstPartitionerFactory";
2015 const char* Name() const override
{ return kClassName(); }
2017 std::unique_ptr
<SstPartitioner
> CreatePartitioner(
2018 const SstPartitioner::Context
& /* context */) const override
{
2019 return std::unique_ptr
<SstPartitioner
>(new TombstoneTestSstPartitioner());
2023 TEST_F(DBRangeDelTest
, TombstoneAcrossFileBoundary
) {
2024 // Verify that a range tombstone across file boundary covers keys from older
2025 // levels. Test set up:
2026 // L1_0: 1, 3, [2, 6) L1_1: 5, 7, [2, 6) ([2, 6) is from compaction with
2028 // Seek(1) and then Next() should move the L1 level iterator to
2029 // L1_1. Check if 5 is returned after Next().
2030 Options options
= CurrentOptions();
2031 options
.compression
= kNoCompression
;
2032 options
.disable_auto_compactions
= true;
2033 options
.target_file_size_base
= 2 * 1024;
2034 options
.max_compaction_bytes
= 2 * 1024;
2036 // Make sure L1 files are split before "5"
2037 auto factory
= std::make_shared
<TombstoneTestSstPartitionerFactory
>();
2038 options
.sst_partitioner_factory
= factory
;
2040 DestroyAndReopen(options
);
2044 // the file should be smaller than max_compaction_bytes, otherwise the file
2045 // will be cut before 7.
2046 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), rnd
.RandomString(1 << 9)));
2047 ASSERT_OK(db_
->Flush(FlushOptions()));
2048 MoveFilesToLevel(2);
2049 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2052 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), rnd
.RandomString(1 << 10)));
2053 ASSERT_OK(db_
->Put(WriteOptions(), Key(7), rnd
.RandomString(1 << 10)));
2054 ASSERT_OK(db_
->Flush(FlushOptions()));
2055 ASSERT_EQ(1, NumTableFilesAtLevel(0));
2058 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(1 << 10)));
2059 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), rnd
.RandomString(1 << 10)));
2060 // Prevent keys being compacted away
2061 const Snapshot
* snapshot
= db_
->GetSnapshot();
2062 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
2064 ASSERT_OK(db_
->Flush(FlushOptions()));
2065 ASSERT_EQ(2, NumTableFilesAtLevel(0));
2066 MoveFilesToLevel(1);
2067 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2069 auto iter
= db_
->NewIterator(ReadOptions());
2070 get_perf_context()->Reset();
2072 ASSERT_TRUE(iter
->Valid());
2073 ASSERT_EQ(iter
->key().ToString(), Key(1));
2075 ASSERT_TRUE(iter
->Valid());
2076 ASSERT_EQ(iter
->key().ToString(), Key(7));
2077 // 1 reseek into L2 when key 5 in L2 is covered by [2, 6) from L1
2078 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 1);
2081 db_
->ReleaseSnapshot(snapshot
);
2084 TEST_F(DBRangeDelTest
, NonOverlappingTombstonAtBoundary
) {
2085 // Verify that a range tombstone across file boundary covers keys from older
2088 // L1_0: 1, 3, [4, 7) L1_1: 6, 8, [4, 7)
2090 // Note that [4, 7) is at end of L1_0 and not overlapping with any point key
2091 // in L1_0. [4, 7) from L1_0 should cover 5 is sentinel works
2092 Options options
= CurrentOptions();
2093 options
.compression
= kNoCompression
;
2094 options
.disable_auto_compactions
= true;
2095 options
.target_file_size_base
= 2 * 1024;
2096 DestroyAndReopen(options
);
2100 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), rnd
.RandomString(4 << 10)));
2101 ASSERT_OK(db_
->Flush(FlushOptions()));
2102 MoveFilesToLevel(2);
2103 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2106 ASSERT_OK(db_
->Put(WriteOptions(), Key(6), rnd
.RandomString(4 << 10)));
2107 ASSERT_OK(db_
->Put(WriteOptions(), Key(8), rnd
.RandomString(4 << 10)));
2108 ASSERT_OK(db_
->Flush(FlushOptions()));
2109 ASSERT_EQ(1, NumTableFilesAtLevel(0));
2112 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2113 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), rnd
.RandomString(4 << 10)));
2114 // Prevent keys being compacted away
2115 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(4),
2117 ASSERT_OK(db_
->Flush(FlushOptions()));
2118 ASSERT_EQ(2, NumTableFilesAtLevel(0));
2119 MoveFilesToLevel(1);
2120 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2122 auto iter
= db_
->NewIterator(ReadOptions());
2124 ASSERT_TRUE(iter
->Valid());
2125 ASSERT_EQ(iter
->key(), Key(3));
2126 get_perf_context()->Reset();
2128 ASSERT_TRUE(iter
->Valid());
2129 ASSERT_EQ(iter
->key().ToString(), Key(8));
2130 // 1 reseek into L1 since 5 from L2 is covered by [4, 7) from L1
2131 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 1);
2132 for (auto& k
: {4, 5, 6}) {
2133 get_perf_context()->Reset();
2135 ASSERT_TRUE(iter
->Valid());
2136 ASSERT_EQ(iter
->key().ToString(), Key(8));
2138 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
, 1);
2143 TEST_F(DBRangeDelTest
, OlderLevelHasNewerData
) {
2144 // L1_0: 1, 3, [2, 7) L1_1: 5, 6 at a newer sequence number than [2, 7)
2145 // Compact L1_1 to L2. Seek(3) should not skip 5 or 6.
2146 Options options
= CurrentOptions();
2147 options
.compression
= kNoCompression
;
2148 options
.disable_auto_compactions
= true;
2149 options
.target_file_size_base
= 3 * 1024;
2150 DestroyAndReopen(options
);
2154 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2155 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), rnd
.RandomString(4 << 10)));
2156 const Snapshot
* snapshot
= db_
->GetSnapshot();
2157 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
2159 ASSERT_OK(db_
->Flush(FlushOptions()));
2160 MoveFilesToLevel(1);
2161 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2164 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), rnd
.RandomString(4 << 10)));
2165 ASSERT_OK(db_
->Put(WriteOptions(), Key(6), rnd
.RandomString(4 << 10)));
2166 ASSERT_OK(db_
->Flush(FlushOptions()));
2167 ASSERT_EQ(1, NumTableFilesAtLevel(0));
2168 MoveFilesToLevel(1);
2169 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2173 EXPECT_OK(dbfull()->TEST_CompactRange(1, &begin
, nullptr));
2174 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2175 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2177 auto iter
= db_
->NewIterator(ReadOptions());
2179 ASSERT_TRUE(iter
->Valid());
2180 ASSERT_EQ(iter
->key().ToString(), Key(5));
2182 ASSERT_TRUE(iter
->Valid());
2183 ASSERT_EQ(iter
->key().ToString(), Key(6));
2185 db_
->ReleaseSnapshot(snapshot
);
2188 TEST_F(DBRangeDelTest
, LevelBoundaryDefinedByTombstone
) {
2189 // L1 has: 1, 2, [4, 5)
2191 // Seek(3), which is over all points keys in L1, check whether
2192 // sentinel key from L1 works in this case.
2193 Options options
= CurrentOptions();
2194 options
.compression
= kNoCompression
;
2195 options
.disable_auto_compactions
= true;
2196 options
.target_file_size_base
= 3 * 1024;
2197 DestroyAndReopen(options
);
2200 ASSERT_OK(db_
->Put(WriteOptions(), Key(4), "foo"));
2201 ASSERT_OK(db_
->Flush(FlushOptions()));
2202 const Snapshot
* snapshot
= db_
->GetSnapshot();
2203 MoveFilesToLevel(2);
2204 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2207 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2208 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), rnd
.RandomString(4 << 10)));
2209 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(4),
2211 ASSERT_OK(db_
->Flush(FlushOptions()));
2212 MoveFilesToLevel(1);
2213 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2214 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2216 auto iter
= db_
->NewIterator(ReadOptions());
2218 ASSERT_TRUE(!iter
->Valid());
2219 ASSERT_OK(iter
->status());
2221 get_perf_context()->Reset();
2222 iter
->SeekForPrev(Key(5));
2223 ASSERT_TRUE(iter
->Valid());
2224 ASSERT_EQ(iter
->key(), Key(2));
2225 db_
->ReleaseSnapshot(snapshot
);
2229 TEST_F(DBRangeDelTest
, TombstoneOnlyFile
) {
2230 // L1_0: 1, 2, L1_1: [3, 5)
2232 // Seek(2) then Next() should advance L1 iterator into L1_1.
2233 // If sentinel works with tombstone only file, it should cover the key in L2.
2234 // Similar story for SeekForPrev(4).
2235 Options options
= CurrentOptions();
2236 options
.compression
= kNoCompression
;
2237 options
.disable_auto_compactions
= true;
2238 options
.target_file_size_base
= 3 * 1024;
2240 DestroyAndReopen(options
);
2243 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), "foo"));
2244 ASSERT_OK(db_
->Flush(FlushOptions()));
2245 MoveFilesToLevel(2);
2246 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2249 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2250 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), rnd
.RandomString(4 << 10)));
2251 ASSERT_OK(db_
->Flush(FlushOptions()));
2252 MoveFilesToLevel(1);
2253 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2254 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2257 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(3),
2259 ASSERT_OK(db_
->Flush(FlushOptions()));
2260 MoveFilesToLevel(1);
2261 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2262 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2264 auto iter
= db_
->NewIterator(ReadOptions());
2266 ASSERT_TRUE(iter
->Valid());
2267 ASSERT_EQ(iter
->key(), Key(2));
2269 VerifyIteratorReachesEnd(iter
);
2270 iter
->SeekForPrev(Key(4));
2271 ASSERT_TRUE(iter
->Valid());
2272 ASSERT_EQ(iter
->key(), Key(2));
2274 VerifyIteratorReachesEnd(iter
);
2278 void VerifyIteratorKey(InternalIterator
* iter
,
2279 const std::vector
<std::string
>& expected_keys
,
2280 bool forward
= true) {
2281 for (auto& key
: expected_keys
) {
2282 ASSERT_TRUE(iter
->Valid());
2283 ASSERT_EQ(iter
->user_key(), key
);
2292 TEST_F(DBRangeDelTest
, TombstoneOnlyLevel
) {
2295 // Any kind of iterator seek should skip 3 and 4 in L2.
2296 // L1 level iterator should produce sentinel key.
2297 Options options
= CurrentOptions();
2298 options
.compression
= kNoCompression
;
2299 options
.disable_auto_compactions
= true;
2300 options
.target_file_size_base
= 3 * 1024;
2302 DestroyAndReopen(options
);
2304 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), "foo"));
2305 ASSERT_OK(db_
->Put(WriteOptions(), Key(4), "bar"));
2306 ASSERT_OK(db_
->Flush(FlushOptions()));
2307 MoveFilesToLevel(2);
2308 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2311 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(3),
2313 ASSERT_OK(db_
->Flush(FlushOptions()));
2314 MoveFilesToLevel(1);
2315 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2317 auto iter
= db_
->NewIterator(ReadOptions());
2318 get_perf_context()->Reset();
2319 uint64_t expected_reseek
= 0;
2320 for (auto i
= 0; i
< 7; ++i
) {
2322 VerifyIteratorReachesEnd(iter
);
2326 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
,
2328 iter
->SeekForPrev(Key(i
));
2329 VerifyIteratorReachesEnd(iter
);
2333 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
,
2335 iter
->SeekToFirst();
2336 VerifyIteratorReachesEnd(iter
);
2337 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
,
2340 VerifyIteratorReachesEnd(iter
);
2341 ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count
,
2346 // Check L1 LevelIterator behavior
2347 ColumnFamilyData
* cfd
=
2348 static_cast_with_check
<ColumnFamilyHandleImpl
>(db_
->DefaultColumnFamily())
2350 SuperVersion
* sv
= cfd
->GetSuperVersion();
2352 ReadOptions read_options
;
2353 MergeIteratorBuilder
merge_iter_builder(&cfd
->internal_comparator(), &arena
,
2354 false /* prefix seek */);
2355 InternalIterator
* level_iter
= sv
->current
->TEST_GetLevelIterator(
2356 read_options
, &merge_iter_builder
, 1 /* level */, true);
2357 // This is needed to make LevelIterator range tombstone aware
2358 auto miter
= merge_iter_builder
.Finish();
2361 target
.SetInternalKey(k
, kMaxSequenceNumber
, kValueTypeForSeek
);
2362 level_iter
->Seek(target
.GetInternalKey());
2363 // sentinel key (file boundary as a fake key)
2364 VerifyIteratorKey(level_iter
, {Key(5)});
2365 VerifyIteratorReachesEnd(level_iter
);
2368 target
.SetInternalKey(k
, 0, kValueTypeForSeekForPrev
);
2369 level_iter
->SeekForPrev(target
.GetInternalKey());
2370 VerifyIteratorKey(level_iter
, {Key(3)}, false);
2371 VerifyIteratorReachesEnd(level_iter
);
2373 level_iter
->SeekToFirst();
2374 VerifyIteratorKey(level_iter
, {Key(5)});
2375 VerifyIteratorReachesEnd(level_iter
);
2377 level_iter
->SeekToLast();
2378 VerifyIteratorKey(level_iter
, {Key(3)}, false);
2379 VerifyIteratorReachesEnd(level_iter
);
2381 miter
->~InternalIterator();
2384 TEST_F(DBRangeDelTest
, TombstoneOnlyWithOlderVisibleKey
) {
2387 // 2 and 5 should be visible
2388 Options options
= CurrentOptions();
2389 options
.compression
= kNoCompression
;
2390 options
.disable_auto_compactions
= true;
2391 options
.target_file_size_base
= 3 * 1024;
2393 DestroyAndReopen(options
);
2395 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
2396 ASSERT_OK(db_
->Put(WriteOptions(), Key(4), "bar"));
2397 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), "foobar"));
2398 ASSERT_OK(db_
->Flush(FlushOptions()));
2399 MoveFilesToLevel(2);
2400 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2403 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(3),
2405 ASSERT_OK(db_
->Flush(FlushOptions()));
2406 MoveFilesToLevel(1);
2407 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2409 auto iter
= db_
->NewIterator(ReadOptions());
2410 auto iter_test_backward
= [&] {
2411 ASSERT_TRUE(iter
->Valid());
2412 ASSERT_EQ(iter
->key(), Key(5));
2414 ASSERT_TRUE(iter
->Valid());
2415 ASSERT_EQ(iter
->key(), Key(2));
2417 VerifyIteratorReachesEnd(iter
);
2419 auto iter_test_forward
= [&] {
2420 ASSERT_TRUE(iter
->Valid());
2421 ASSERT_EQ(iter
->key(), Key(2));
2423 ASSERT_TRUE(iter
->Valid());
2424 ASSERT_EQ(iter
->key(), Key(5));
2426 VerifyIteratorReachesEnd(iter
);
2429 iter_test_backward();
2430 iter
->SeekForPrev(Key(4));
2432 iter_test_backward();
2436 iter_test_forward();
2437 iter
->SeekForPrev(Key(4));
2438 iter_test_forward();
2440 iter
->SeekToFirst();
2441 iter_test_forward();
2443 iter_test_backward();
2448 TEST_F(DBRangeDelTest
, TombstoneSentinelDirectionChange
) {
2452 // Seek(5) will have sentinel key 6 at the top of minHeap in merging iterator.
2453 // then do a prev, how would sentinel work?
2454 // Redo the test after Put(5) into L1 so that there is a visible key in range
2456 Options options
= CurrentOptions();
2457 options
.compression
= kNoCompression
;
2458 options
.disable_auto_compactions
= true;
2459 options
.target_file_size_base
= 3 * 1024;
2461 DestroyAndReopen(options
);
2463 ASSERT_OK(db_
->Put(WriteOptions(), Key(4), "bar"));
2464 ASSERT_OK(db_
->Flush(FlushOptions()));
2465 MoveFilesToLevel(3);
2466 ASSERT_EQ(1, NumTableFilesAtLevel(3));
2468 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(4),
2470 ASSERT_OK(db_
->Flush(FlushOptions()));
2471 MoveFilesToLevel(2);
2472 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2475 ASSERT_OK(db_
->Put(WriteOptions(), Key(7), "foobar"));
2476 ASSERT_OK(db_
->Flush(FlushOptions()));
2477 MoveFilesToLevel(1);
2478 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2480 auto iter
= db_
->NewIterator(ReadOptions());
2482 ASSERT_TRUE(iter
->Valid());
2483 ASSERT_EQ(iter
->key(), Key(7));
2485 ASSERT_TRUE(!iter
->Valid() && iter
->status().ok());
2488 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), "foobar"));
2489 ASSERT_OK(db_
->Flush(FlushOptions()));
2490 MoveFilesToLevel(1);
2491 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2493 iter
= db_
->NewIterator(ReadOptions());
2495 ASSERT_TRUE(iter
->Valid());
2496 ASSERT_EQ(iter
->key(), Key(5));
2498 ASSERT_TRUE(!iter
->Valid() && iter
->status().ok());
2502 // Right sentinel tested in many test cases above
2503 TEST_F(DBRangeDelTest
, LeftSentinelKeyTest
) {
2504 // L1_0: 0, 1 L1_1: [2, 3), 5
2506 // SeekForPrev(4) should give 1 due to sentinel key keeping [2, 3) alive.
2507 Options options
= CurrentOptions();
2508 options
.compression
= kNoCompression
;
2509 options
.disable_auto_compactions
= true;
2510 options
.target_file_size_base
= 3 * 1024;
2511 options
.max_compaction_bytes
= 1024;
2513 DestroyAndReopen(options
);
2515 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), "foo"));
2516 ASSERT_OK(db_
->Flush(FlushOptions()));
2517 MoveFilesToLevel(2);
2518 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2522 ASSERT_OK(db_
->Put(WriteOptions(), Key(0), rnd
.RandomString(4 << 10)));
2523 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2524 ASSERT_OK(db_
->Flush(FlushOptions()));
2525 MoveFilesToLevel(1);
2526 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2529 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), "bar"));
2530 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
2532 ASSERT_OK(db_
->Flush(FlushOptions()));
2533 MoveFilesToLevel(1);
2534 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2536 auto iter
= db_
->NewIterator(ReadOptions());
2537 iter
->SeekForPrev(Key(4));
2538 ASSERT_TRUE(iter
->Valid());
2539 ASSERT_EQ(iter
->key(), Key(1));
2541 ASSERT_TRUE(iter
->Valid());
2542 ASSERT_EQ(iter
->key(), Key(0));
2544 ASSERT_TRUE(!iter
->Valid());
2545 ASSERT_OK(iter
->status());
2549 TEST_F(DBRangeDelTest
, LeftSentinelKeyTestWithNewerKey
) {
2550 // L1_0: 1, 2 newer than L1_1, L1_1: [2, 4), 5
2552 // SeekForPrev(4) then Prev() should give 2 and then 1.
2553 Options options
= CurrentOptions();
2554 options
.compression
= kNoCompression
;
2555 options
.disable_auto_compactions
= true;
2556 options
.target_file_size_base
= 3 * 1024;
2557 options
.max_compaction_bytes
= 1024;
2559 DestroyAndReopen(options
);
2561 ASSERT_OK(db_
->Put(WriteOptions(), Key(3), "foo"));
2562 ASSERT_OK(db_
->Flush(FlushOptions()));
2563 MoveFilesToLevel(2);
2564 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2567 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), "bar"));
2568 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(2),
2570 ASSERT_OK(db_
->Flush(FlushOptions()));
2571 MoveFilesToLevel(1);
2572 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2576 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2577 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), rnd
.RandomString(4 << 10)));
2578 // Used to verify sequence number of iterator key later.
2579 auto seq
= dbfull()->TEST_GetLastVisibleSequence();
2580 ASSERT_OK(db_
->Flush(FlushOptions()));
2581 MoveFilesToLevel(1);
2582 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2585 InternalKeyComparator
icmp(options
.comparator
);
2586 ReadOptions read_options
;
2587 ScopedArenaIterator iter
;
2589 dbfull()->NewInternalIterator(read_options
, &arena
, kMaxSequenceNumber
));
2593 target
.SetInternalKey(k
, 0 /* sequence_number */, kValueTypeForSeekForPrev
);
2594 iter
->SeekForPrev(target
.GetInternalKey());
2595 ASSERT_TRUE(iter
->Valid());
2596 ASSERT_EQ(iter
->user_key(), Key(2));
2597 SequenceNumber actual_seq
;
2599 UnPackSequenceAndType(ExtractInternalKeyFooter(iter
->key()), &actual_seq
,
2601 ASSERT_EQ(seq
, actual_seq
);
2602 // might as well check type
2603 ASSERT_EQ(type
, kTypeValue
);
2606 ASSERT_TRUE(iter
->Valid());
2607 ASSERT_EQ(iter
->user_key(), Key(1));
2609 ASSERT_TRUE(!iter
->Valid());
2610 ASSERT_OK(iter
->status());
2613 TEST_F(DBRangeDelTest
, SentinelKeyCommonCaseTest
) {
2615 // L1_0: 1, 2 L1_1: [3, 4) 5, 6, [7, 8) L1_2: 9
2616 // Check iterator operations on LevelIterator.
2617 Options options
= CurrentOptions();
2618 options
.compression
= kNoCompression
;
2619 options
.disable_auto_compactions
= true;
2620 options
.target_file_size_base
= 3 * 1024;
2622 DestroyAndReopen(options
);
2625 ASSERT_OK(db_
->Put(WriteOptions(), Key(1), rnd
.RandomString(4 << 10)));
2626 ASSERT_OK(db_
->Put(WriteOptions(), Key(2), rnd
.RandomString(4 << 10)));
2627 ASSERT_OK(db_
->Flush(FlushOptions()));
2628 MoveFilesToLevel(1);
2629 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2632 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(3),
2634 ASSERT_OK(db_
->Put(WriteOptions(), Key(5), rnd
.RandomString(4 << 10)));
2635 ASSERT_OK(db_
->Put(WriteOptions(), Key(6), rnd
.RandomString(4 << 10)));
2636 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), Key(7),
2638 ASSERT_OK(db_
->Flush(FlushOptions()));
2639 MoveFilesToLevel(1);
2640 ASSERT_EQ(2, NumTableFilesAtLevel(1));
2643 ASSERT_OK(db_
->Put(WriteOptions(), Key(9), rnd
.RandomString(4 << 10)));
2644 ASSERT_OK(db_
->Flush(FlushOptions()));
2645 MoveFilesToLevel(1);
2646 ASSERT_EQ(3, NumTableFilesAtLevel(1));
2648 ColumnFamilyData
* cfd
=
2649 static_cast_with_check
<ColumnFamilyHandleImpl
>(db_
->DefaultColumnFamily())
2651 SuperVersion
* sv
= cfd
->GetSuperVersion();
2653 ReadOptions read_options
;
2654 MergeIteratorBuilder
merge_iter_builder(&cfd
->internal_comparator(), &arena
,
2655 false /* prefix seek */);
2656 InternalIterator
* level_iter
= sv
->current
->TEST_GetLevelIterator(
2657 read_options
, &merge_iter_builder
, 1 /* level */, true);
2658 // This is needed to make LevelIterator range tombstone aware
2659 auto miter
= merge_iter_builder
.Finish();
2662 target
.SetInternalKey(k
, kMaxSequenceNumber
, kValueTypeForSeek
);
2663 level_iter
->Seek(target
.GetInternalKey());
2664 // The last Key(9) is a sentinel key.
2665 VerifyIteratorKey(level_iter
, {Key(8), Key(9), Key(9)});
2666 ASSERT_TRUE(!level_iter
->Valid() && level_iter
->status().ok());
2669 target
.SetInternalKey(k
, kMaxSequenceNumber
, kValueTypeForSeek
);
2670 level_iter
->Seek(target
.GetInternalKey());
2671 VerifyIteratorKey(level_iter
, {Key(6), Key(8), Key(9), Key(9)});
2672 ASSERT_TRUE(!level_iter
->Valid() && level_iter
->status().ok());
2675 target
.SetInternalKey(k
, 0, kValueTypeForSeekForPrev
);
2676 level_iter
->SeekForPrev(target
.GetInternalKey());
2677 VerifyIteratorKey(level_iter
, {Key(3), Key(2), Key(1), Key(1)}, false);
2678 ASSERT_TRUE(!level_iter
->Valid() && level_iter
->status().ok());
2681 target
.SetInternalKey(k
, 0, kValueTypeForSeekForPrev
);
2682 level_iter
->SeekForPrev(target
.GetInternalKey());
2683 VerifyIteratorKey(level_iter
, {Key(5), Key(3), Key(2), Key(1), Key(1)},
2686 level_iter
->SeekToFirst();
2687 VerifyIteratorKey(level_iter
, {Key(1), Key(2), Key(2), Key(5), Key(6), Key(8),
2689 ASSERT_TRUE(!level_iter
->Valid() && level_iter
->status().ok());
2691 level_iter
->SeekToLast();
2694 {Key(9), Key(9), Key(6), Key(5), Key(3), Key(2), Key(1), Key(1)}, false);
2695 ASSERT_TRUE(!level_iter
->Valid() && level_iter
->status().ok());
2697 miter
->~InternalIterator();
2700 TEST_F(DBRangeDelTest
, PrefixSentinelKey
) {
2701 // L1: ['aaaa', 'aaad'), 'bbbb'
2702 // L2: 'aaac', 'aaae'
2703 // Prefix extracts first 3 chars
2704 // Seek('aaab') should give 'aaae' as first key.
2705 // This is to test a previous bug where prefix seek sees there is no prefix in
2706 // the SST file, and will just set file iter to null in LevelIterator and may
2707 // just skip to the next SST file. But in this case, we should keep the file's
2709 Options options
= CurrentOptions();
2710 options
.compression
= kNoCompression
;
2711 options
.disable_auto_compactions
= true;
2712 options
.prefix_extractor
.reset(NewFixedPrefixTransform(3));
2713 BlockBasedTableOptions table_options
;
2714 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, false));
2715 table_options
.whole_key_filtering
= false;
2716 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2717 DestroyAndReopen(options
);
2721 ASSERT_OK(db_
->Put(WriteOptions(), "aaac", rnd
.RandomString(10)));
2722 ASSERT_OK(db_
->Put(WriteOptions(), "aaae", rnd
.RandomString(10)));
2723 ASSERT_OK(db_
->Flush(FlushOptions()));
2724 MoveFilesToLevel(2);
2725 ASSERT_EQ(1, NumTableFilesAtLevel(2));
2728 ASSERT_OK(db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "aaaa",
2730 ASSERT_OK(db_
->Put(WriteOptions(), "bbbb", rnd
.RandomString(10)));
2731 ASSERT_OK(db_
->Flush(FlushOptions()));
2732 MoveFilesToLevel(1);
2733 ASSERT_EQ(1, NumTableFilesAtLevel(1));
2735 auto iter
= db_
->NewIterator(ReadOptions());
2737 ASSERT_TRUE(iter
->Valid());
2738 ASSERT_EQ(iter
->key(), "aaae");
2742 TEST_F(DBRangeDelTest
, RefreshMemtableIter
) {
2743 Options options
= CurrentOptions();
2744 options
.disable_auto_compactions
= true;
2745 DestroyAndReopen(options
);
2747 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
2749 ro
.read_tier
= kMemtableTier
;
2750 std::unique_ptr
<Iterator
> iter
{db_
->NewIterator(ro
)};
2752 // First refresh reinits iter, which had a bug where
2753 // iter.memtable_range_tombstone_iter_ was not set to nullptr, and caused
2754 // subsequent refresh to double free.
2755 ASSERT_OK(iter
->Refresh());
2756 ASSERT_OK(iter
->Refresh());
2759 TEST_F(DBRangeDelTest
, RangeTombstoneRespectIterateUpperBound
) {
2760 // Memtable: a, [b, bz)
2761 // Do a Seek on `a` with iterate_upper_bound being az
2762 // range tombstone [b, bz) should not be processed (added to and
2763 // popped from the min_heap in MergingIterator).
2764 Options options
= CurrentOptions();
2765 options
.disable_auto_compactions
= true;
2766 DestroyAndReopen(options
);
2768 ASSERT_OK(Put("a", "bar"));
2770 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "b", "bz"));
2772 // I could not find a cleaner way to test this without relying on
2773 // implementation detail. Tried to test the value of
2774 // `internal_range_del_reseek_count` but that did not work
2775 // since BlockBasedTable iterator becomes !Valid() when point key
2776 // is out of bound and that reseek only happens when a point key
2777 // is covered by some range tombstone.
2778 SyncPoint::GetInstance()->SetCallBack("MergeIterator::PopDeleteRangeStart",
2780 // there should not be any range
2781 // tombstone in the heap.
2784 SyncPoint::GetInstance()->EnableProcessing();
2786 ReadOptions read_opts
;
2787 std::string upper_bound
= "az";
2788 Slice upper_bound_slice
= upper_bound
;
2789 read_opts
.iterate_upper_bound
= &upper_bound_slice
;
2790 std::unique_ptr
<Iterator
> iter
{db_
->NewIterator(read_opts
)};
2792 ASSERT_TRUE(iter
->Valid());
2793 ASSERT_EQ(iter
->key(), "a");
2795 ASSERT_FALSE(iter
->Valid());
2796 ASSERT_OK(iter
->status());
2799 #endif // ROCKSDB_LITE
2801 } // namespace ROCKSDB_NAMESPACE
2803 int main(int argc
, char** argv
) {
2804 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
2805 ::testing::InitGoogleTest(&argc
, argv
);
2806 return RUN_ALL_TESTS();