]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_range_del_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / db_range_del_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#include "db/db_test_util.h"
7#include "port/stack_trace.h"
f67539c2
TL
8#include "rocksdb/utilities/write_batch_with_index.h"
9#include "test_util/testutil.h"
20effc67 10#include "util/random.h"
7c673cae
FG
11#include "utilities/merge_operators.h"
12
f67539c2 13namespace ROCKSDB_NAMESPACE {
7c673cae
FG
14
15class DBRangeDelTest : public DBTestBase {
16 public:
20effc67 17 DBRangeDelTest() : DBTestBase("/db_range_del_test", /*env_do_fsync=*/false) {}
7c673cae
FG
18
19 std::string GetNumericStr(int key) {
20 uint64_t uint64_key = static_cast<uint64_t>(key);
21 std::string str;
22 str.resize(8);
23 memcpy(&str[0], static_cast<void*>(&uint64_key), 8);
24 return str;
25 }
26};
27
f67539c2
TL
28// PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not
29// supported in ROCKSDB_LITE
7c673cae
FG
30#ifndef ROCKSDB_LITE
31TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) {
11fdf7f2
TL
32 // TODO: figure out why MmapReads trips the iterator pinning assertion in
33 // RangeDelAggregator. Ideally it would be supported; otherwise it should at
34 // least be explicitly unsupported.
35 for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) {
36 option_config_ = config;
37 DestroyAndReopen(CurrentOptions());
38 ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
39 "dr1", "dr1")
40 .IsNotSupported());
41 }
7c673cae
FG
42}
43
f67539c2
TL
44TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) {
45 WriteBatchWithIndex indexedBatch{};
46 ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1")
47 .IsNotSupported());
48 ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported());
49}
50
20effc67
TL
51TEST_F(DBRangeDelTest, EndSameAsStartCoversNothing) {
52 ASSERT_OK(db_->Put(WriteOptions(), "b", "val"));
53 ASSERT_OK(
54 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "b"));
55 ASSERT_EQ("val", Get("b"));
56}
57
58TEST_F(DBRangeDelTest, EndComesBeforeStartInvalidArgument) {
59 db_->Put(WriteOptions(), "b", "val");
60 ASSERT_TRUE(
61 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "a")
62 .IsInvalidArgument());
63 ASSERT_EQ("val", Get("b"));
64}
65
7c673cae 66TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) {
11fdf7f2
TL
67 do {
68 DestroyAndReopen(CurrentOptions());
69 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
70 "dr1", "dr2"));
71 ASSERT_OK(db_->Flush(FlushOptions()));
72 ASSERT_EQ(1, NumTableFilesAtLevel(0));
73 } while (ChangeOptions(kRangeDelSkipConfigs));
7c673cae
FG
74}
75
76TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) {
11fdf7f2
TL
77 do {
78 Options opts = CurrentOptions();
79 opts.disable_auto_compactions = true;
80 opts.statistics = CreateDBStatistics();
81 DestroyAndReopen(opts);
82
83 // snapshot protects range tombstone from dropping due to becoming obsolete.
84 const Snapshot* snapshot = db_->GetSnapshot();
85 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z");
86 db_->Flush(FlushOptions());
87
88 ASSERT_EQ(1, NumTableFilesAtLevel(0));
89 ASSERT_EQ(0, NumTableFilesAtLevel(1));
90 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
91 true /* disallow_trivial_move */);
92 ASSERT_EQ(0, NumTableFilesAtLevel(0));
93 ASSERT_EQ(1, NumTableFilesAtLevel(1));
94 ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
95 db_->ReleaseSnapshot(snapshot);
96 // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
97 // compactions as the above assertions about the number of files in a level
98 // do not hold true.
494da23a
TL
99 } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction |
100 kSkipFIFOCompaction));
7c673cae
FG
101}
102
103TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
104 // regression test for exactly filled compaction output files. Previously
105 // another file would be generated containing all range deletions, which
106 // could invalidate the non-overlapping file boundary invariant.
107 const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10;
108 Options options = CurrentOptions();
109 options.disable_auto_compactions = true;
110 options.level0_file_num_compaction_trigger = kNumFiles;
111 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
112 options.num_levels = 2;
113 options.target_file_size_base = kFileBytes;
114 BlockBasedTableOptions table_options;
115 table_options.block_size_deviation = 50; // each block holds two keys
116 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
117 Reopen(options);
118
119 // snapshot protects range tombstone from dropping due to becoming obsolete.
120 const Snapshot* snapshot = db_->GetSnapshot();
121 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1));
122
123 Random rnd(301);
124 for (int i = 0; i < kNumFiles; ++i) {
125 std::vector<std::string> values;
126 // Write 12K (4 values, each 3K)
127 for (int j = 0; j < kNumPerFile; j++) {
20effc67 128 values.push_back(rnd.RandomString(3 << 10));
7c673cae
FG
129 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
130 if (j == 0 && i > 0) {
131 dbfull()->TEST_WaitForFlushMemTable();
132 }
133 }
134 }
135 // put extra key to trigger final flush
136 ASSERT_OK(Put("", ""));
137 dbfull()->TEST_WaitForFlushMemTable();
138 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
139 ASSERT_EQ(0, NumTableFilesAtLevel(1));
140
141 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
142 true /* disallow_trivial_move */);
143 ASSERT_EQ(0, NumTableFilesAtLevel(0));
144 ASSERT_EQ(2, NumTableFilesAtLevel(1));
145 db_->ReleaseSnapshot(snapshot);
146}
147
148TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
149 // Ensures range deletion spanning multiple compaction output files that are
150 // cut by max_compaction_bytes will have non-overlapping key-ranges.
151 // https://github.com/facebook/rocksdb/issues/1778
152 const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
153 Options opts = CurrentOptions();
154 opts.comparator = test::Uint64Comparator();
155 opts.disable_auto_compactions = true;
156 opts.level0_file_num_compaction_trigger = kNumFiles;
157 opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
158 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
159 // Want max_compaction_bytes to trigger the end of compaction output file, not
160 // target_file_size_base, so make the latter much bigger
161 opts.target_file_size_base = 100 * opts.max_compaction_bytes;
20effc67 162 DestroyAndReopen(opts);
7c673cae
FG
163
164 // snapshot protects range tombstone from dropping due to becoming obsolete.
165 const Snapshot* snapshot = db_->GetSnapshot();
166
167 // It spans the whole key-range, thus will be included in all output files
168 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
169 GetNumericStr(0),
170 GetNumericStr(kNumFiles * kNumPerFile - 1)));
171 Random rnd(301);
172 for (int i = 0; i < kNumFiles; ++i) {
173 std::vector<std::string> values;
174 // Write 1MB (256 values, each 4K)
175 for (int j = 0; j < kNumPerFile; j++) {
20effc67 176 values.push_back(rnd.RandomString(kBytesPerVal));
7c673cae
FG
177 ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
178 }
179 // extra entry to trigger SpecialSkipListFactory's flush
180 ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
181 dbfull()->TEST_WaitForFlushMemTable();
182 ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
183 }
184
185 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
186 true /* disallow_trivial_move */);
187 ASSERT_EQ(0, NumTableFilesAtLevel(0));
188 ASSERT_GE(NumTableFilesAtLevel(1), 2);
189
190 std::vector<std::vector<FileMetaData>> files;
191 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
192
20effc67 193 for (size_t i = 0; i + 1 < files[1].size(); ++i) {
7c673cae
FG
194 ASSERT_TRUE(InternalKeyComparator(opts.comparator)
195 .Compare(files[1][i].largest, files[1][i + 1].smallest) <
196 0);
197 }
198 db_->ReleaseSnapshot(snapshot);
199}
200
201TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) {
202 // Regression test for bug where sentinel range deletions (i.e., ones with
203 // sequence number of zero) were included in output files.
204 // snapshot protects range tombstone from dropping due to becoming obsolete.
205 const Snapshot* snapshot = db_->GetSnapshot();
206
207 // gaps between ranges creates sentinels in our internal representation
208 std::vector<std::pair<std::string, std::string>> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}};
209 for (const auto& range_del : range_dels) {
210 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
211 range_del.first, range_del.second));
212 }
213 ASSERT_OK(db_->Flush(FlushOptions()));
214 ASSERT_EQ(1, NumTableFilesAtLevel(0));
215
216 std::vector<std::vector<FileMetaData>> files;
217 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
11fdf7f2 218 ASSERT_GT(files[0][0].fd.smallest_seqno, 0);
7c673cae
FG
219
220 db_->ReleaseSnapshot(snapshot);
221}
222
223TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
224 db_->Put(WriteOptions(), "b1", "val");
225 ASSERT_OK(
226 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
227 db_->Put(WriteOptions(), "b2", "val");
228 ASSERT_OK(
229 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
230 // first iteration verifies query correctness in memtable, second verifies
231 // query correctness for a single SST file
232 for (int i = 0; i < 2; ++i) {
233 if (i > 0) {
234 ASSERT_OK(db_->Flush(FlushOptions()));
235 ASSERT_EQ(1, NumTableFilesAtLevel(0));
236 }
237 std::string value;
238 ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
239 ASSERT_OK(db_->Get(ReadOptions(), "b2", &value));
240 }
241}
242
243TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) {
244 db_->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction
245 db_->Put(WriteOptions(), "b1", "val");
246 ASSERT_OK(db_->Flush(FlushOptions()));
247 ASSERT_OK(
248 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
249 ASSERT_OK(db_->Flush(FlushOptions()));
250 ASSERT_OK(
251 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
252 ASSERT_OK(db_->Flush(FlushOptions()));
253 ASSERT_EQ(3, NumTableFilesAtLevel(0));
254
255 for (int i = 0; i < 2; ++i) {
256 if (i > 0) {
257 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
258 true /* disallow_trivial_move */);
259 ASSERT_EQ(0, NumTableFilesAtLevel(0));
260 ASSERT_EQ(1, NumTableFilesAtLevel(1));
261 }
262 std::string value;
263 ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
264 }
265}
266#endif // ROCKSDB_LITE
267
268TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) {
269 const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250;
270 Options opts = CurrentOptions();
271 opts.comparator = test::Uint64Comparator();
20effc67 272 DestroyAndReopen(opts);
7c673cae
FG
273
274 // Write a third before snapshot, a third between snapshot and tombstone, and
275 // a third after the tombstone. Keys older than snapshot or newer than the
276 // tombstone should be preserved.
277 const Snapshot* snapshot = nullptr;
278 for (int i = 0; i < kNum; ++i) {
279 if (i == kNum / 3) {
280 snapshot = db_->GetSnapshot();
281 } else if (i == 2 * kNum / 3) {
282 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
283 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
284 }
285 db_->Put(WriteOptions(), GetNumericStr(i), "val");
286 }
287 db_->Flush(FlushOptions());
288
289 for (int i = 0; i < kNum; ++i) {
290 ReadOptions read_opts;
291 read_opts.ignore_range_deletions = true;
292 std::string value;
293 if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) {
294 ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value));
295 } else {
296 ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound());
297 }
298 }
299 db_->ReleaseSnapshot(snapshot);
300}
301
302// NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
303#ifndef ROCKSDB_LITE
304TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) {
305 const int kNumPerFile = 100, kNumFiles = 4;
306 Options opts = CurrentOptions();
307 opts.comparator = test::Uint64Comparator();
308 opts.disable_auto_compactions = true;
309 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
310 opts.num_levels = 2;
311 opts.statistics = CreateDBStatistics();
20effc67 312 DestroyAndReopen(opts);
7c673cae
FG
313
314 for (int i = 0; i < kNumFiles; ++i) {
315 if (i > 0) {
316 // range tombstone covers first half of the previous file
317 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
318 GetNumericStr((i - 1) * kNumPerFile),
319 GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2));
320 }
321 // Make sure a given key appears in each file so compaction won't be able to
322 // use trivial move, which would happen if the ranges were non-overlapping.
323 // Also, we need an extra element since flush is only triggered when the
324 // number of keys is one greater than SpecialSkipListFactory's limit.
325 // We choose a key outside the key-range used by the test to avoid conflict.
326 db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val");
327
328 for (int j = 0; j < kNumPerFile; ++j) {
329 db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val");
330 }
331 dbfull()->TEST_WaitForFlushMemTable();
332 ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
333 }
334 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
335 ASSERT_EQ(0, NumTableFilesAtLevel(0));
336 ASSERT_GT(NumTableFilesAtLevel(1), 0);
337 ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2,
338 TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL));
339
340 for (int i = 0; i < kNumFiles; ++i) {
341 for (int j = 0; j < kNumPerFile; ++j) {
342 ReadOptions read_opts;
343 read_opts.ignore_range_deletions = true;
344 std::string value;
345 if (i == kNumFiles - 1 || j >= kNumPerFile / 2) {
346 ASSERT_OK(
347 db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value));
348 } else {
349 ASSERT_TRUE(
350 db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)
351 .IsNotFound());
352 }
353 }
354 }
355}
356
357TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) {
358 const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10;
359 Options options = CurrentOptions();
360 options.disable_auto_compactions = true;
361 options.level0_file_num_compaction_trigger = kNumFiles;
362 options.max_bytes_for_level_base = 2 * kFileBytes;
363 options.max_subcompactions = 4;
364 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
365 options.num_levels = 3;
366 options.target_file_size_base = kFileBytes;
367 options.target_file_size_multiplier = 1;
368 Reopen(options);
369
370 Random rnd(301);
371 for (int i = 0; i < 2; ++i) {
372 for (int j = 0; j < kNumFiles; ++j) {
373 if (i > 0) {
374 // delete [95,105) in two files, [295,305) in next two
375 int mid = (j + (1 - j % 2)) * kNumPerFile;
376 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
377 Key(mid - 5), Key(mid + 5));
378 }
379 std::vector<std::string> values;
380 // Write 100KB (100 values, each 1K)
381 for (int k = 0; k < kNumPerFile; k++) {
20effc67 382 values.push_back(rnd.RandomString(990));
7c673cae
FG
383 ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
384 }
385 // put extra key to trigger flush
386 ASSERT_OK(Put("", ""));
387 dbfull()->TEST_WaitForFlushMemTable();
388 if (j < kNumFiles - 1) {
389 // background compaction may happen early for kNumFiles'th file
390 ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
391 }
392 if (j == options.level0_file_num_compaction_trigger - 1) {
393 // When i == 1, compaction will output some files to L1, at which point
394 // L1 is not bottommost so range deletions cannot be compacted away. The
395 // new L1 files must be generated with non-overlapping key ranges even
396 // though multiple subcompactions see the same ranges deleted, else an
397 // assertion will fail.
398 //
399 // Only enable auto-compactions when we're ready; otherwise, the
400 // oversized L0 (relative to base_level) causes the compaction to run
401 // earlier.
402 ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()}));
403 dbfull()->TEST_WaitForCompact();
404 ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
405 {{"disable_auto_compactions", "true"}}));
406 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
407 ASSERT_GT(NumTableFilesAtLevel(1), 0);
408 ASSERT_GT(NumTableFilesAtLevel(2), 0);
409 }
410 }
411 }
412}
413
414TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
415 const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4;
416 Options options = CurrentOptions();
417 options.compaction_options_universal.min_merge_width = kFilesPerLevel;
418 options.compaction_options_universal.max_merge_width = kFilesPerLevel;
419 options.compaction_options_universal.size_ratio = 10;
420 options.compaction_style = kCompactionStyleUniversal;
421 options.level0_file_num_compaction_trigger = kFilesPerLevel;
422 options.max_subcompactions = 4;
423 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
424 options.num_levels = kNumLevels;
425 options.target_file_size_base = kNumPerFile << 10;
426 options.target_file_size_multiplier = 1;
427 Reopen(options);
428
429 Random rnd(301);
430 for (int i = 0; i < kNumLevels - 1; ++i) {
431 for (int j = 0; j < kFilesPerLevel; ++j) {
432 if (i == kNumLevels - 2) {
433 // insert range deletions [95,105) in two files, [295,305) in next two
434 // to prepare L1 for later manual compaction.
435 int mid = (j + (1 - j % 2)) * kNumPerFile;
436 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
437 Key(mid - 5), Key(mid + 5));
438 }
439 std::vector<std::string> values;
440 // Write 100KB (100 values, each 1K)
441 for (int k = 0; k < kNumPerFile; k++) {
20effc67 442 values.push_back(rnd.RandomString(990));
7c673cae
FG
443 ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
444 }
445 // put extra key to trigger flush
446 ASSERT_OK(Put("", ""));
447 dbfull()->TEST_WaitForFlushMemTable();
448 if (j < kFilesPerLevel - 1) {
449 // background compaction may happen early for kFilesPerLevel'th file
450 ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
451 }
452 }
453 dbfull()->TEST_WaitForCompact();
454 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
455 ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
456 }
457 // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
458 // happen since input level > 0; (2) range deletions are not dropped since
459 // output level is not bottommost. If no file boundary assertion fails, that
460 // probably means universal compaction + subcompaction + range deletion are
461 // compatible.
462 ASSERT_OK(dbfull()->RunManualCompaction(
20effc67 463 static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
7c673cae 464 ->cfd(),
f67539c2
TL
465 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
466 nullptr /* begin */, nullptr /* end */, true /* exclusive */,
467 true /* disallow_trivial_move */,
468 port::kMaxUint64 /* max_file_num_to_ignore */));
7c673cae
FG
469}
470#endif // ROCKSDB_LITE
471
472TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) {
473 const int kNumPerFile = 3, kNumFiles = 3;
474 Options opts = CurrentOptions();
475 opts.disable_auto_compactions = true;
476 opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile));
477 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
478 opts.num_levels = 2;
479 Reopen(opts);
480
481 // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
482 // requires an extra entry.
483 for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) {
484 if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) {
485 // Delete merge operands from all but the last file
486 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
487 "key_");
488 }
489 std::string val;
490 PutFixed64(&val, i);
491 db_->Merge(WriteOptions(), "key", val);
492 // we need to prevent trivial move using Puts so compaction will actually
493 // process the merge operands.
494 db_->Put(WriteOptions(), "prevent_trivial_move", "");
495 if (i > 0 && i % kNumPerFile == 0) {
496 dbfull()->TEST_WaitForFlushMemTable();
497 }
498 }
499
500 ReadOptions read_opts;
501 read_opts.ignore_range_deletions = true;
502 std::string expected, actual;
503 ASSERT_OK(db_->Get(read_opts, "key", &actual));
504 PutFixed64(&expected, 45); // 1+2+...+9
505 ASSERT_EQ(expected, actual);
506
507 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
508
509 expected.clear();
510 ASSERT_OK(db_->Get(read_opts, "key", &actual));
511 uint64_t tmp;
512 Slice tmp2(actual);
513 GetFixed64(&tmp2, &tmp);
514 PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone)
515 ASSERT_EQ(expected, actual);
516}
517
494da23a
TL
518TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) {
519 // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4)
520 // Flush. The `CompactionIterator` previously had a bug where we forgot to
521 // check for covering range tombstones when processing the (1) Put, causing
522 // it to reappear after the flush.
523 Options opts = CurrentOptions();
524 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
525 Reopen(opts);
526
527 std::string val;
528 PutFixed64(&val, 1);
529 ASSERT_OK(db_->Put(WriteOptions(), "key", val));
530 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
531 "key", "key_"));
532 ASSERT_OK(db_->Merge(WriteOptions(), "key", val));
533 ASSERT_OK(db_->Flush(FlushOptions()));
534
535 ReadOptions read_opts;
536 std::string expected, actual;
537 ASSERT_OK(db_->Get(read_opts, "key", &actual));
538 PutFixed64(&expected, 1);
539 ASSERT_EQ(expected, actual);
540}
541
7c673cae
FG
542// NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
543#ifndef ROCKSDB_LITE
544TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) {
545 // During compaction to bottommost level, verify range tombstones older than
546 // the oldest snapshot are removed, while others are preserved.
547 Options opts = CurrentOptions();
548 opts.disable_auto_compactions = true;
549 opts.num_levels = 2;
550 opts.statistics = CreateDBStatistics();
551 Reopen(opts);
552
553 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1",
11fdf7f2 554 "dr10"); // obsolete after compaction
7c673cae
FG
555 db_->Put(WriteOptions(), "key", "val");
556 db_->Flush(FlushOptions());
557 const Snapshot* snapshot = db_->GetSnapshot();
558 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2",
11fdf7f2 559 "dr20"); // protected by snapshot
7c673cae
FG
560 db_->Put(WriteOptions(), "key", "val");
561 db_->Flush(FlushOptions());
562
563 ASSERT_EQ(2, NumTableFilesAtLevel(0));
564 ASSERT_EQ(0, NumTableFilesAtLevel(1));
565 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
566 ASSERT_EQ(0, NumTableFilesAtLevel(0));
567 ASSERT_EQ(1, NumTableFilesAtLevel(1));
568 ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
569
570 db_->ReleaseSnapshot(snapshot);
571}
572
573TEST_F(DBRangeDelTest, TableEvictedDuringScan) {
574 // The RangeDelAggregator holds pointers into range deletion blocks created by
575 // table readers. This test ensures the aggregator can still access those
576 // blocks even if it outlives the table readers that created them.
577 //
578 // DBIter always keeps readers open for L0 files. So, in order to test
579 // aggregator outliving reader, we need to have deletions in L1 files, which
580 // are opened/closed on-demand during the scan. This is accomplished by
581 // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
582 // from all lingering in L0 (there is at most one range deletion per L0 file).
583 //
584 // The first L1 file will contain a range deletion since its begin key is 0.
585 // SeekToFirst() references that table's reader and adds its range tombstone
586 // to the aggregator. Upon advancing beyond that table's key-range via Next(),
587 // the table reader will be unreferenced by the iterator. Since we manually
588 // call Evict() on all readers before the full scan, this unreference causes
589 // the reader's refcount to drop to zero and thus be destroyed.
590 //
591 // When it is destroyed, we do not remove its range deletions from the
592 // aggregator. So, subsequent calls to Next() must be able to use these
593 // deletions to decide whether a key is covered. This will work as long as
594 // the aggregator properly references the range deletion block.
595 const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5;
596 Options opts = CurrentOptions();
597 opts.comparator = test::Uint64Comparator();
598 opts.level0_file_num_compaction_trigger = 4;
599 opts.level0_stop_writes_trigger = 4;
600 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
601 opts.num_levels = 2;
602 BlockBasedTableOptions bbto;
603 bbto.cache_index_and_filter_blocks = true;
604 bbto.block_cache = NewLRUCache(8 << 20);
605 opts.table_factory.reset(NewBlockBasedTableFactory(bbto));
20effc67 606 DestroyAndReopen(opts);
7c673cae
FG
607
608 // Hold a snapshot so range deletions can't become obsolete during compaction
609 // to bottommost level (i.e., L1).
610 const Snapshot* snapshot = db_->GetSnapshot();
611 for (int i = 0; i < kNum; ++i) {
612 db_->Put(WriteOptions(), GetNumericStr(i), "val");
613 if (i > 0) {
614 dbfull()->TEST_WaitForFlushMemTable();
615 }
616 if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) {
617 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
618 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
619 }
620 }
621 // Must be > 1 so the first L1 file can be closed before scan finishes
622 dbfull()->TEST_WaitForCompact();
623 ASSERT_GT(NumTableFilesAtLevel(1), 1);
624 std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
625
626 ReadOptions read_opts;
627 auto* iter = db_->NewIterator(read_opts);
628 int expected = kRangeEnd;
629 iter->SeekToFirst();
630 for (auto file_number : file_numbers) {
631 // This puts table caches in the state of being externally referenced only
632 // so they are destroyed immediately upon iterator unreferencing.
633 TableCache::Evict(dbfull()->TEST_table_cache(), file_number);
634 }
635 for (; iter->Valid(); iter->Next()) {
636 ASSERT_EQ(GetNumericStr(expected), iter->key());
637 ++expected;
638 // Keep clearing block cache's LRU so range deletion block can be freed as
639 // soon as its refcount drops to zero.
640 bbto.block_cache->EraseUnRefEntries();
641 }
642 ASSERT_EQ(kNum, expected);
643 delete iter;
644 db_->ReleaseSnapshot(snapshot);
645}
646
647TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) {
11fdf7f2
TL
648 do {
649 DestroyAndReopen(CurrentOptions());
650 db_->Put(WriteOptions(), "key", "val");
651 ASSERT_OK(
652 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
7c673cae 653
11fdf7f2
TL
654 ReadOptions read_opts;
655 std::string value;
656 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
657 } while (ChangeOptions(kRangeDelSkipConfigs));
7c673cae
FG
658}
659
660TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) {
11fdf7f2
TL
661 do {
662 Options opts = CurrentOptions();
663 opts.max_write_buffer_number = 3;
664 opts.min_write_buffer_number_to_merge = 2;
665 // SpecialSkipListFactory lets us specify maximum number of elements the
666 // memtable can hold. It switches the active memtable to immutable (flush is
667 // prevented by the above options) upon inserting an element that would
668 // overflow the memtable.
669 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
670 DestroyAndReopen(opts);
671
672 db_->Put(WriteOptions(), "key", "val");
673 ASSERT_OK(
674 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
675 db_->Put(WriteOptions(), "blah", "val");
7c673cae 676
11fdf7f2
TL
677 ReadOptions read_opts;
678 std::string value;
679 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
680 } while (ChangeOptions(kRangeDelSkipConfigs));
7c673cae
FG
681}
682
683TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) {
11fdf7f2
TL
684 do {
685 DestroyAndReopen(CurrentOptions());
686 db_->Put(WriteOptions(), "key", "val");
687 // snapshot prevents key from being deleted during flush
688 const Snapshot* snapshot = db_->GetSnapshot();
689 ASSERT_OK(
690 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
691 ASSERT_OK(db_->Flush(FlushOptions()));
7c673cae 692
11fdf7f2
TL
693 ReadOptions read_opts;
694 std::string value;
695 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
696 db_->ReleaseSnapshot(snapshot);
494da23a 697 } while (ChangeOptions(kRangeDelSkipConfigs));
7c673cae
FG
698}
699
700TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) {
701 const int kNumMergeOps = 10;
702 Options opts = CurrentOptions();
703 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
704 Reopen(opts);
705
706 for (int i = 0; i < kNumMergeOps; ++i) {
707 std::string val;
708 PutFixed64(&val, i);
709 db_->Merge(WriteOptions(), "key", val);
710 if (i == kNumMergeOps / 2) {
711 // deletes [0, 5]
712 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
713 "key_");
714 }
715 }
716
717 ReadOptions read_opts;
718 std::string expected, actual;
719 ASSERT_OK(db_->Get(read_opts, "key", &actual));
720 PutFixed64(&expected, 30); // 6+7+8+9
721 ASSERT_EQ(expected, actual);
722
723 expected.clear();
724 read_opts.ignore_range_deletions = true;
725 ASSERT_OK(db_->Get(read_opts, "key", &actual));
726 PutFixed64(&expected, 45); // 0+1+2+...+9
727 ASSERT_EQ(expected, actual);
728}
729
730TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) {
731 Options opts = CurrentOptions();
732 opts.max_write_buffer_number = 4;
733 opts.min_write_buffer_number_to_merge = 3;
734 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
735 Reopen(opts);
736
737 db_->Put(WriteOptions(), "sst_key", "val");
738 // snapshot prevents key from being deleted during flush
739 const Snapshot* snapshot = db_->GetSnapshot();
740 ASSERT_OK(
741 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
742 ASSERT_OK(db_->Flush(FlushOptions()));
743 db_->Put(WriteOptions(), "imm_key", "val");
744 ASSERT_OK(
745 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
746 db_->Put(WriteOptions(), "mem_key", "val");
747 ASSERT_OK(
748 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
749
750 ReadOptions read_opts;
751 read_opts.ignore_range_deletions = true;
752 for (std::string key : {"sst_key", "imm_key", "mem_key"}) {
753 std::string value;
754 ASSERT_OK(db_->Get(read_opts, key, &value));
755 }
756 db_->ReleaseSnapshot(snapshot);
757}
758
759TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) {
760 const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
761 Options opts = CurrentOptions();
762 opts.comparator = test::Uint64Comparator();
763 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
20effc67 764 DestroyAndReopen(opts);
7c673cae
FG
765
766 // Write half of the keys before the tombstone and half after the tombstone.
767 // Only covered keys (i.e., within the range and older than the tombstone)
768 // should be deleted.
769 for (int i = 0; i < kNum; ++i) {
770 if (i == kNum / 2) {
771 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
772 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
773 }
774 db_->Put(WriteOptions(), GetNumericStr(i), "val");
775 }
776 ReadOptions read_opts;
777 auto* iter = db_->NewIterator(read_opts);
778
779 int expected = 0;
780 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
781 ASSERT_EQ(GetNumericStr(expected), iter->key());
782 if (expected == kRangeBegin - 1) {
783 expected = kNum / 2;
784 } else {
785 ++expected;
786 }
787 }
788 ASSERT_EQ(kNum, expected);
789 delete iter;
790}
791
792TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) {
793 const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
794 Options opts = CurrentOptions();
795 opts.comparator = test::Uint64Comparator();
796 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
20effc67 797 DestroyAndReopen(opts);
7c673cae
FG
798
799 const Snapshot* snapshot = nullptr;
800 // Put a snapshot before the range tombstone, verify an iterator using that
801 // snapshot sees all inserted keys.
802 for (int i = 0; i < kNum; ++i) {
803 if (i == kNum / 2) {
804 snapshot = db_->GetSnapshot();
805 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
806 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
807 }
808 db_->Put(WriteOptions(), GetNumericStr(i), "val");
809 }
810 ReadOptions read_opts;
811 read_opts.snapshot = snapshot;
812 auto* iter = db_->NewIterator(read_opts);
813
814 int expected = 0;
815 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
816 ASSERT_EQ(GetNumericStr(expected), iter->key());
817 ++expected;
818 }
819 ASSERT_EQ(kNum / 2, expected);
820 delete iter;
821 db_->ReleaseSnapshot(snapshot);
822}
823
824TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) {
825 Options opts = CurrentOptions();
826 opts.max_write_buffer_number = 4;
827 opts.min_write_buffer_number_to_merge = 3;
828 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
829 Reopen(opts);
830
831 db_->Put(WriteOptions(), "sst_key", "val");
832 // snapshot prevents key from being deleted during flush
833 const Snapshot* snapshot = db_->GetSnapshot();
834 ASSERT_OK(
835 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
836 ASSERT_OK(db_->Flush(FlushOptions()));
837 db_->Put(WriteOptions(), "imm_key", "val");
838 ASSERT_OK(
839 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
840 db_->Put(WriteOptions(), "mem_key", "val");
841 ASSERT_OK(
842 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
843
844 ReadOptions read_opts;
845 read_opts.ignore_range_deletions = true;
846 auto* iter = db_->NewIterator(read_opts);
847 int i = 0;
848 std::string expected[] = {"imm_key", "mem_key", "sst_key"};
849 for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) {
850 std::string key;
851 ASSERT_EQ(expected[i], iter->key());
852 }
853 ASSERT_EQ(3, i);
854 delete iter;
855 db_->ReleaseSnapshot(snapshot);
856}
857
11fdf7f2 858#ifndef ROCKSDB_UBSAN_RUN
7c673cae
FG
859TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) {
860 db_->Put(WriteOptions(), "key", "val");
861 // snapshot prevents key from being deleted during flush
862 const Snapshot* snapshot = db_->GetSnapshot();
863 ASSERT_OK(
864 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
865
866 // iterations check unsupported in memtable, l0, and then l1
867 for (int i = 0; i < 3; ++i) {
868 ReadOptions read_opts;
869 read_opts.tailing = true;
870 auto* iter = db_->NewIterator(read_opts);
871 if (i == 2) {
872 // For L1+, iterators over files are created on-demand, so need seek
873 iter->SeekToFirst();
874 }
875 ASSERT_TRUE(iter->status().IsNotSupported());
876 delete iter;
877 if (i == 0) {
878 ASSERT_OK(db_->Flush(FlushOptions()));
879 } else if (i == 1) {
880 MoveFilesToLevel(1);
881 }
882 }
883 db_->ReleaseSnapshot(snapshot);
884}
11fdf7f2
TL
885
886#endif // !ROCKSDB_UBSAN_RUN
887
888TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) {
889 const int kNumFiles = 2, kNumKeysPerFile = 4;
890 Options options = CurrentOptions();
891 options.compression = kNoCompression;
892 options.disable_auto_compactions = true;
893 options.level0_file_num_compaction_trigger = kNumFiles;
894 options.max_subcompactions = 2;
895 options.num_levels = 2;
896 options.target_file_size_base = 4096;
897 Reopen(options);
898
899 // need a L1 file for subcompaction to be triggered
900 ASSERT_OK(
901 db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val"));
902 ASSERT_OK(db_->Flush(FlushOptions()));
903 MoveFilesToLevel(1);
904
905 // put enough keys to fill up the first subcompaction, and later range-delete
906 // them so that the first subcompaction outputs no key-values. In that case
907 // it'll consider making an SST file dedicated to range deletions.
908 for (int i = 0; i < kNumKeysPerFile; ++i) {
909 ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i),
910 std::string(1024, 'a')));
911 }
912 ASSERT_OK(db_->Flush(FlushOptions()));
913 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
914 Key(kNumKeysPerFile)));
915
916 // the above range tombstone can be dropped, so that one alone won't cause a
917 // dedicated file to be opened. We can make one protected by snapshot that
918 // must be considered. Make its range outside the first subcompaction's range
919 // to exercise the tricky part of the code.
920 const Snapshot* snapshot = db_->GetSnapshot();
921 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
922 Key(kNumKeysPerFile + 1),
923 Key(kNumKeysPerFile + 2)));
924 ASSERT_OK(db_->Flush(FlushOptions()));
925
926 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
927 ASSERT_EQ(1, NumTableFilesAtLevel(1));
928
929 db_->EnableAutoCompaction({db_->DefaultColumnFamily()});
930 dbfull()->TEST_WaitForCompact();
931 db_->ReleaseSnapshot(snapshot);
932}
933
934TEST_F(DBRangeDelTest, MemtableBloomFilter) {
935 // regression test for #2743. the range delete tombstones in memtable should
936 // be added even when Get() skips searching due to its prefix bloom filter
937 const int kMemtableSize = 1 << 20; // 1MB
938 const int kMemtablePrefixFilterSize = 1 << 13; // 8KB
939 const int kNumKeys = 1000;
940 const int kPrefixLen = 8;
941 Options options = CurrentOptions();
942 options.memtable_prefix_bloom_size_ratio =
943 static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize;
f67539c2
TL
944 options.prefix_extractor.reset(
945 ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen));
11fdf7f2
TL
946 options.write_buffer_size = kMemtableSize;
947 Reopen(options);
948
949 for (int i = 0; i < kNumKeys; ++i) {
950 ASSERT_OK(Put(Key(i), "val"));
951 }
952 Flush();
953 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
954 Key(kNumKeys)));
955 for (int i = 0; i < kNumKeys; ++i) {
956 std::string value;
957 ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound());
958 }
959}
960
961TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
962 // This test originally verified that compaction treated files containing a
963 // split range deletion in the input level as an atomic unit. I.e.,
964 // compacting any input-level file(s) containing a portion of the range
965 // deletion causes all other input-level files containing portions of that
966 // same range deletion to be included in the compaction. Range deletion
967 // tombstones are now truncated to sstable boundaries which removed the need
968 // for that behavior (which could lead to excessively large
969 // compactions).
970 const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10;
971 Options options = CurrentOptions();
972 options.compression = kNoCompression;
973 options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
974 options.memtable_factory.reset(
975 new SpecialSkipListFactory(2 /* num_entries_flush */));
976 options.target_file_size_base = kValueBytes;
977 // i == 0: CompactFiles
978 // i == 1: CompactRange
979 // i == 2: automatic compaction
980 for (int i = 0; i < 3; ++i) {
981 DestroyAndReopen(options);
982
983 ASSERT_OK(Put(Key(0), ""));
984 ASSERT_OK(db_->Flush(FlushOptions()));
985 MoveFilesToLevel(2);
986 ASSERT_EQ(1, NumTableFilesAtLevel(2));
987
988 // snapshot protects range tombstone from dropping due to becoming obsolete.
989 const Snapshot* snapshot = db_->GetSnapshot();
990 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
991 Key(2 * kNumFilesPerLevel));
992
993 Random rnd(301);
20effc67 994 std::string value = rnd.RandomString(kValueBytes);
11fdf7f2
TL
995 for (int j = 0; j < kNumFilesPerLevel; ++j) {
996 // give files overlapping key-ranges to prevent trivial move
997 ASSERT_OK(Put(Key(j), value));
998 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
999 if (j > 0) {
1000 dbfull()->TEST_WaitForFlushMemTable();
1001 ASSERT_EQ(j, NumTableFilesAtLevel(0));
1002 }
1003 }
1004 // put extra key to trigger final flush
1005 ASSERT_OK(Put("", ""));
1006 dbfull()->TEST_WaitForFlushMemTable();
1007 dbfull()->TEST_WaitForCompact();
1008 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1009 ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1));
1010
1011 ColumnFamilyMetaData meta;
1012 db_->GetColumnFamilyMetaData(&meta);
1013 if (i == 0) {
1014 ASSERT_OK(db_->CompactFiles(
1015 CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */));
1016 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1017 } else if (i == 1) {
1018 auto begin_str = Key(0), end_str = Key(1);
1019 Slice begin = begin_str, end = end_str;
1020 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end));
1021 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1022 } else if (i == 2) {
1023 ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
1024 {{"max_bytes_for_level_base", "10000"}}));
1025 dbfull()->TEST_WaitForCompact();
1026 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1027 }
1028 ASSERT_GT(NumTableFilesAtLevel(2), 0);
1029
1030 db_->ReleaseSnapshot(snapshot);
1031 }
1032}
1033
1034TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
1035 // Test the handling of the range-tombstone end-key as the
1036 // upper-bound for an sstable.
1037
1038 const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10;
1039 Options options = CurrentOptions();
1040 options.compression = kNoCompression;
1041 options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
1042 options.memtable_factory.reset(
1043 new SpecialSkipListFactory(2 /* num_entries_flush */));
1044 options.target_file_size_base = kValueBytes;
1045 options.disable_auto_compactions = true;
1046
1047 DestroyAndReopen(options);
1048
1049 // Create an initial sstable at L2:
1050 // [key000000#1,1, key000000#1,1]
1051 ASSERT_OK(Put(Key(0), ""));
1052 ASSERT_OK(db_->Flush(FlushOptions()));
1053 MoveFilesToLevel(2);
1054 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1055
1056 // A snapshot protects the range tombstone from dropping due to
1057 // becoming obsolete.
1058 const Snapshot* snapshot = db_->GetSnapshot();
1059 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1060 Key(0), Key(2 * kNumFilesPerLevel));
1061
1062 // Create 2 additional sstables in L0. Note that the first sstable
1063 // contains the range tombstone.
1064 // [key000000#3,1, key000004#72057594037927935,15]
1065 // [key000001#5,1, key000002#6,1]
1066 Random rnd(301);
20effc67 1067 std::string value = rnd.RandomString(kValueBytes);
11fdf7f2
TL
1068 for (int j = 0; j < kNumFilesPerLevel; ++j) {
1069 // Give files overlapping key-ranges to prevent a trivial move when we
1070 // compact from L0 to L1.
1071 ASSERT_OK(Put(Key(j), value));
1072 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
1073 ASSERT_OK(db_->Flush(FlushOptions()));
1074 ASSERT_EQ(j + 1, NumTableFilesAtLevel(0));
1075 }
1076 // Compact the 2 L0 sstables to L1, resulting in the following LSM. There
1077 // are 2 sstables generated in L1 due to the target_file_size_base setting.
1078 // L1:
1079 // [key000000#3,1, key000002#72057594037927935,15]
1080 // [key000002#6,1, key000004#72057594037927935,15]
1081 // L2:
1082 // [key000000#1,1, key000000#1,1]
1083 MoveFilesToLevel(1);
1084 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1085
1086 {
1087 // Compact the second sstable in L1:
1088 // L1:
1089 // [key000000#3,1, key000002#72057594037927935,15]
1090 // L2:
1091 // [key000000#1,1, key000000#1,1]
1092 // [key000002#6,1, key000004#72057594037927935,15]
494da23a
TL
1093 //
1094 // At the same time, verify the compaction does not cause the key at the
1095 // endpoint (key000002#6,1) to disappear.
1096 ASSERT_EQ(value, Get(Key(2)));
11fdf7f2 1097 auto begin_str = Key(3);
f67539c2 1098 const ROCKSDB_NAMESPACE::Slice begin = begin_str;
11fdf7f2
TL
1099 dbfull()->TEST_CompactRange(1, &begin, nullptr);
1100 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1101 ASSERT_EQ(2, NumTableFilesAtLevel(2));
494da23a 1102 ASSERT_EQ(value, Get(Key(2)));
11fdf7f2
TL
1103 }
1104
1105 {
1106 // Compact the first sstable in L1. This should be copacetic, but
1107 // was previously resulting in overlapping sstables in L2 due to
1108 // mishandling of the range tombstone end-key when used as the
1109 // largest key for an sstable. The resulting LSM structure should
1110 // be:
1111 //
1112 // L2:
1113 // [key000000#1,1, key000001#72057594037927935,15]
1114 // [key000001#5,1, key000002#72057594037927935,15]
1115 // [key000002#6,1, key000004#72057594037927935,15]
1116 auto begin_str = Key(0);
f67539c2 1117 const ROCKSDB_NAMESPACE::Slice begin = begin_str;
11fdf7f2
TL
1118 dbfull()->TEST_CompactRange(1, &begin, &begin);
1119 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1120 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1121 }
1122
1123 db_->ReleaseSnapshot(snapshot);
1124}
1125
1126TEST_F(DBRangeDelTest, UnorderedTombstones) {
1127 // Regression test for #2752. Range delete tombstones between
1128 // different snapshot stripes are not stored in order, so the first
1129 // tombstone of each snapshot stripe should be checked as a smallest
1130 // candidate.
1131 Options options = CurrentOptions();
1132 DestroyAndReopen(options);
1133
1134 auto cf = db_->DefaultColumnFamily();
1135
1136 ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a"));
1137 ASSERT_OK(db_->Flush(FlushOptions(), cf));
1138 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1139 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
1140 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1141
1142 ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c"));
1143 // Hold a snapshot to separate these two delete ranges.
1144 auto snapshot = db_->GetSnapshot();
1145 ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b"));
1146 ASSERT_OK(db_->Flush(FlushOptions(), cf));
1147 db_->ReleaseSnapshot(snapshot);
1148
1149 std::vector<std::vector<FileMetaData>> files;
1150 dbfull()->TEST_GetFilesMetaData(cf, &files);
1151 ASSERT_EQ(1, files[0].size());
1152 ASSERT_EQ("a", files[0][0].smallest.user_key());
1153 ASSERT_EQ("c", files[0][0].largest.user_key());
1154
1155 std::string v;
1156 auto s = db_->Get(ReadOptions(), "a", &v);
1157 ASSERT_TRUE(s.IsNotFound());
1158}
1159
494da23a
TL
1160class MockMergeOperator : public MergeOperator {
1161 // Mock non-associative operator. Non-associativity is expressed by lack of
1162 // implementation for any `PartialMerge*` functions.
1163 public:
1164 bool FullMergeV2(const MergeOperationInput& merge_in,
1165 MergeOperationOutput* merge_out) const override {
1166 assert(merge_out != nullptr);
1167 merge_out->new_value = merge_in.operand_list.back().ToString();
1168 return true;
1169 }
1170
1171 const char* Name() const override { return "MockMergeOperator"; }
1172};
1173
1174TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) {
1175 // This test uses a non-associative merge operator since that is a convenient
1176 // way to get compaction to write out files with overlapping user-keys at the
1177 // endpoints. Note, however, overlapping endpoints can also occur with other
1178 // value types (Put, etc.), assuming the right snapshots are present.
1179 const int kFileBytes = 1 << 20;
1180 const int kValueBytes = 1 << 10;
1181 const int kNumFiles = 4;
1182
1183 Options options = CurrentOptions();
1184 options.compression = kNoCompression;
1185 options.disable_auto_compactions = true;
1186 options.merge_operator.reset(new MockMergeOperator());
1187 options.target_file_size_base = kFileBytes;
1188 Reopen(options);
1189
1190 // Push dummy data to L3 so that our actual test files on L0-L2
1191 // will not be considered "bottommost" level, otherwise compaction
1192 // may prevent us from creating overlapping user keys
1193 // as on the bottommost layer MergeHelper
1194 ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy"));
1195 ASSERT_OK(db_->Flush(FlushOptions()));
1196 MoveFilesToLevel(3);
1197
1198 Random rnd(301);
1199 const Snapshot* snapshot = nullptr;
1200 for (int i = 0; i < kNumFiles; ++i) {
1201 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
20effc67 1202 auto value = rnd.RandomString(kValueBytes);
494da23a
TL
1203 ASSERT_OK(db_->Merge(WriteOptions(), "key", value));
1204 }
1205 if (i == kNumFiles - 1) {
1206 // Take snapshot to prevent covered merge operands from being dropped by
1207 // compaction.
1208 snapshot = db_->GetSnapshot();
1209 // The DeleteRange is the last write so all merge operands are covered.
1210 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1211 "key", "key_"));
1212 }
1213 ASSERT_OK(db_->Flush(FlushOptions()));
1214 }
1215 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1216 std::string value;
1217 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1218
1219 dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */,
1220 nullptr /* end */, nullptr /* column_family */,
1221 true /* disallow_trivial_move */);
1222 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1223 // Now we have multiple files at L1 all containing a single user key, thus
1224 // guaranteeing overlap in the file endpoints.
1225 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1226
1227 // Verify no merge operands reappeared after the compaction.
1228 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1229
1230 // Compact and verify again. It's worthwhile because now the files have
1231 // tighter endpoints, so we can verify that doesn't mess anything up.
1232 dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */,
1233 nullptr /* end */, nullptr /* column_family */,
1234 true /* disallow_trivial_move */);
1235 ASSERT_GT(NumTableFilesAtLevel(2), 1);
1236 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1237
1238 db_->ReleaseSnapshot(snapshot);
1239}
1240
1241TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) {
1242 // Verify a key newer than a range tombstone cannot be deleted by being
1243 // compacted to the bottom level (and thus having its seqnum zeroed) before
1244 // the range tombstone. This used to happen when range tombstones were
1245 // untruncated on reads such that they extended past their file boundaries.
1246 //
1247 // Test summary:
1248 //
1249 // - L1 is bottommost.
1250 // - A couple snapshots are strategically taken to prevent seqnums from being
1251 // zeroed, range tombstone from being dropped, merge operands from being
1252 // dropped, and merge operands from being combined.
1253 // - Left half of files in L1 all have same user key, ensuring their file
1254 // boundaries overlap. In the past this would cause range tombstones to be
1255 // untruncated.
1256 // - Right half of L1 files all have different keys, ensuring no overlap.
1257 // - A range tombstone spans all L1 keys, so it is stored in every L1 file.
1258 // - Keys in the right side of the key-range are overwritten. These are
1259 // compacted down to L1 after releasing snapshots such that their seqnums
1260 // will be zeroed.
1261 // - A full range scan is performed. If the tombstone in the left L1 files
1262 // were untruncated, it would now cover keys newer than it (but with zeroed
1263 // seqnums) in the right L1 files.
1264 const int kFileBytes = 1 << 20;
1265 const int kValueBytes = 1 << 10;
1266 const int kNumFiles = 4;
1267 const int kMaxKey = kNumFiles* kFileBytes / kValueBytes;
1268 const int kKeysOverwritten = 10;
1269
1270 Options options = CurrentOptions();
1271 options.compression = kNoCompression;
1272 options.disable_auto_compactions = true;
1273 options.merge_operator.reset(new MockMergeOperator());
1274 options.num_levels = 2;
1275 options.target_file_size_base = kFileBytes;
1276 Reopen(options);
1277
1278 Random rnd(301);
1279 // - snapshots[0] prevents merge operands from being combined during
1280 // compaction.
1281 // - snapshots[1] prevents merge operands from being dropped due to the
1282 // covering range tombstone.
1283 const Snapshot* snapshots[] = {nullptr, nullptr};
1284 for (int i = 0; i < kNumFiles; ++i) {
1285 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
20effc67 1286 auto value = rnd.RandomString(kValueBytes);
494da23a
TL
1287 std::string key;
1288 if (i < kNumFiles / 2) {
1289 key = Key(0);
1290 } else {
1291 key = Key(1 + i * kFileBytes / kValueBytes + j);
1292 }
1293 ASSERT_OK(db_->Merge(WriteOptions(), key, value));
1294 }
1295 if (i == 0) {
1296 snapshots[0] = db_->GetSnapshot();
1297 }
1298 if (i == kNumFiles - 1) {
1299 snapshots[1] = db_->GetSnapshot();
1300 // The DeleteRange is the last write so all merge operands are covered.
1301 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1302 Key(0), Key(kMaxKey + 1)));
1303 }
1304 ASSERT_OK(db_->Flush(FlushOptions()));
1305 }
1306 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1307
1308 auto get_key_count = [this]() -> int {
1309 auto* iter = db_->NewIterator(ReadOptions());
1310 iter->SeekToFirst();
1311 int keys_found = 0;
1312 for (; iter->Valid(); iter->Next()) {
1313 ++keys_found;
1314 }
1315 delete iter;
1316 return keys_found;
1317 };
1318
1319 // All keys should be covered
1320 ASSERT_EQ(0, get_key_count());
1321
1322 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1323 nullptr /* end_key */));
1324 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1325 // Roughly the left half of L1 files should have overlapping boundary keys,
1326 // while the right half should not.
1327 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1328
1329 // Now overwrite a few keys that are in L1 files that definitely don't have
1330 // overlapping boundary keys.
1331 for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) {
20effc67 1332 auto value = rnd.RandomString(kValueBytes);
494da23a
TL
1333 ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value));
1334 }
1335 ASSERT_OK(db_->Flush(FlushOptions()));
1336
1337 // The overwritten keys are in L0 now, so clearly aren't covered by the range
1338 // tombstone in L1.
1339 ASSERT_EQ(kKeysOverwritten, get_key_count());
1340
1341 // Release snapshots so seqnums can be zeroed when L0->L1 happens.
1342 db_->ReleaseSnapshot(snapshots[0]);
1343 db_->ReleaseSnapshot(snapshots[1]);
1344
1345 auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1);
1346 auto end_key_storage = Key(kMaxKey);
1347 Slice begin_key(begin_key_storage);
1348 Slice end_key(end_key_storage);
1349 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key));
1350 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1351 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1352
1353 ASSERT_EQ(kKeysOverwritten, get_key_count());
1354}
1355
1356TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) {
1357 // Exposes a bug where we were using
1358 // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands
1359 // in the forward direction. Confusingly, this case happened during
1360 // `DBIter::Prev`. It could cause assertion failure, or reappearing keys.
1361 const int kFileBytes = 1 << 20;
1362 const int kValueBytes = 1 << 10;
1363 // Need multiple keys so we can get results when calling `Prev()` after
1364 // `SeekToLast()`.
1365 const int kNumKeys = 3;
1366 const int kNumFiles = 4;
1367
1368 Options options = CurrentOptions();
1369 options.compression = kNoCompression;
1370 options.disable_auto_compactions = true;
1371 options.merge_operator.reset(new MockMergeOperator());
1372 options.target_file_size_base = kFileBytes;
1373 Reopen(options);
1374
1375 Random rnd(301);
1376 const Snapshot* snapshot = nullptr;
1377 for (int i = 0; i < kNumFiles; ++i) {
1378 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
20effc67 1379 auto value = rnd.RandomString(kValueBytes);
494da23a
TL
1380 ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value));
1381 if (i == 0 && j == kNumKeys) {
1382 // Take snapshot to prevent covered merge operands from being dropped or
1383 // merged by compaction.
1384 snapshot = db_->GetSnapshot();
1385 // Do a DeleteRange near the beginning so only the oldest merge operand
1386 // for each key is covered. This ensures the sequence of events:
1387 //
1388 // - `DBIter::Prev()` is called
1389 // - After several same versions of the same user key are encountered,
1390 // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`.
1391 // - Binary searches to the newest version of the key, which is in the
1392 // leftmost file containing the user key.
1393 // - Scans forwards to collect all merge operands. Eventually reaches
1394 // the rightmost file containing the oldest merge operand, which
1395 // should be covered by the `DeleteRange`. If `RangeDelAggregator`
1396 // were not properly using `kForwardTraversal` here, that operand
1397 // would reappear.
1398 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1399 Key(0), Key(kNumKeys + 1)));
1400 }
1401 }
1402 ASSERT_OK(db_->Flush(FlushOptions()));
1403 }
1404 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1405
1406 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1407 nullptr /* end_key */));
1408 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1409 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1410
1411 auto* iter = db_->NewIterator(ReadOptions());
1412 iter->SeekToLast();
1413 int keys_found = 0;
1414 for (; iter->Valid(); iter->Prev()) {
1415 ++keys_found;
1416 }
1417 delete iter;
1418 ASSERT_EQ(kNumKeys, keys_found);
1419
1420 db_->ReleaseSnapshot(snapshot);
1421}
1422
1423TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) {
1424 const int kFileBytes = 1 << 20;
1425
1426 Options options = CurrentOptions();
1427 options.compression = kNoCompression;
1428 options.disable_auto_compactions = true;
1429 options.target_file_size_base = kFileBytes;
1430 Reopen(options);
1431
1432 ASSERT_OK(Put(Key(0), "a"));
1433 const Snapshot* snapshot = db_->GetSnapshot();
1434
1435 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1436 Key(10)));
1437
1438 db_->Flush(FlushOptions());
1439
1440 ReadOptions read_opts;
1441 read_opts.snapshot = snapshot;
1442 auto* iter = db_->NewIterator(read_opts);
1443
1444 iter->SeekToFirst();
1445 ASSERT_TRUE(iter->Valid());
1446 ASSERT_EQ(Key(0), iter->key());
1447
1448 iter->Next();
1449 ASSERT_FALSE(iter->Valid());
1450
1451 delete iter;
1452 db_->ReleaseSnapshot(snapshot);
1453}
1454
f67539c2
TL
1455TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) {
1456 const int kFileBytes = 1 << 20;
1457
1458 Options options = CurrentOptions();
1459 options.compression = kNoCompression;
1460 options.disable_auto_compactions = true;
1461 options.target_file_size_base = kFileBytes;
1462 Reopen(options);
1463
1464 // block flush thread -> pin immtables in memory
1465 SyncPoint::GetInstance()->DisableProcessing();
1466 SyncPoint::GetInstance()->LoadDependency({
1467 {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator",
1468 "DBImpl::BGWorkFlush"},
1469 });
1470 SyncPoint::GetInstance()->EnableProcessing();
1471
1472 ASSERT_OK(Put(Key(0), "a"));
1473 std::unique_ptr<const Snapshot, std::function<void(const Snapshot*)>>
1474 snapshot(db_->GetSnapshot(),
1475 [this](const Snapshot* s) { db_->ReleaseSnapshot(s); });
1476
1477 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1478 Key(10)));
1479
1480 ASSERT_OK(dbfull()->TEST_SwitchMemtable());
1481
1482 ReadOptions read_opts;
1483 read_opts.snapshot = snapshot.get();
1484 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
1485
1486 TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator");
1487
1488 iter->SeekToFirst();
1489 ASSERT_TRUE(iter->Valid());
1490 ASSERT_EQ(Key(0), iter->key());
1491
1492 iter->Next();
1493 ASSERT_FALSE(iter->Valid());
1494}
1495
494da23a
TL
1496TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
1497 // Adapted from
1498 // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398.
1499 // Regression test for issue where range tombstone was written to more files
1500 // than necessary when it began exactly at the begin key in the next
1501 // compaction output file.
1502 const int kFileBytes = 1 << 20;
1503 const int kValueBytes = 4 << 10;
1504 Options options = CurrentOptions();
1505 options.compression = kNoCompression;
1506 options.disable_auto_compactions = true;
1507 // Have a bit of slack in the size limits but we enforce them more strictly
1508 // when manually flushing/compacting.
1509 options.max_compaction_bytes = 2 * kFileBytes;
1510 options.target_file_size_base = 2 * kFileBytes;
1511 options.write_buffer_size = 2 * kFileBytes;
1512 Reopen(options);
1513
1514 Random rnd(301);
1515 for (char first_char : {'a', 'b', 'c'}) {
1516 for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
1517 std::string key(1, first_char);
1518 key.append(Key(i));
20effc67 1519 std::string value = rnd.RandomString(kValueBytes);
494da23a
TL
1520 ASSERT_OK(Put(key, value));
1521 }
1522 db_->Flush(FlushOptions());
1523 MoveFilesToLevel(2);
1524 }
1525 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1526 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1527
1528 // Populate the memtable lightly while spanning the whole key-space. The
1529 // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple
1530 // files to prevent a large L1->L2 compaction later.
1531 ASSERT_OK(Put("a", "val"));
1532 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1533 "c" + Key(1), "d"));
1534 // Our compaction output file cutting logic currently only considers point
1535 // keys. So, in order for the range tombstone to have a chance at landing at
1536 // the start of a new file, we need a point key at the range tombstone's
1537 // start.
1538 // TODO(ajkr): remove this `Put` after file cutting accounts for range
1539 // tombstones (#3977).
1540 ASSERT_OK(Put("c" + Key(1), "value"));
1541 db_->Flush(FlushOptions());
1542
1543 // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone
1544 // and the range tombstone is only placed in the second SST.
1545 std::string begin_key_storage("c" + Key(1));
1546 Slice begin_key(begin_key_storage);
1547 std::string end_key_storage("d");
1548 Slice end_key(end_key_storage);
1549 dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */,
1550 &end_key /* end */, nullptr /* column_family */,
1551 true /* disallow_trivial_move */);
1552 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1553
1554 std::vector<LiveFileMetaData> all_metadata;
1555 std::vector<LiveFileMetaData> l1_metadata;
1556 db_->GetLiveFilesMetaData(&all_metadata);
1557 for (const auto& metadata : all_metadata) {
1558 if (metadata.level == 1) {
1559 l1_metadata.push_back(metadata);
1560 }
1561 }
1562 std::sort(l1_metadata.begin(), l1_metadata.end(),
1563 [&](const LiveFileMetaData& a, const LiveFileMetaData& b) {
1564 return options.comparator->Compare(a.smallestkey, b.smallestkey) <
1565 0;
1566 });
1567 ASSERT_EQ("a", l1_metadata[0].smallestkey);
1568 ASSERT_EQ("a", l1_metadata[0].largestkey);
1569 ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey);
1570 ASSERT_EQ("d", l1_metadata[1].largestkey);
1571
1572 TablePropertiesCollection all_table_props;
1573 ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props));
1574 int64_t num_range_deletions = 0;
1575 for (const auto& name_and_table_props : all_table_props) {
1576 const auto& name = name_and_table_props.first;
1577 const auto& table_props = name_and_table_props.second;
1578 // The range tombstone should only be output to the second L1 SST.
1579 if (name.size() >= l1_metadata[1].name.size() &&
1580 name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) {
1581 ASSERT_EQ(1, table_props->num_range_deletions);
1582 ++num_range_deletions;
1583 } else {
1584 ASSERT_EQ(0, table_props->num_range_deletions);
1585 }
1586 }
1587 ASSERT_EQ(1, num_range_deletions);
1588}
1589
f67539c2
TL
1590TEST_F(DBRangeDelTest, OverlappedTombstones) {
1591 const int kNumPerFile = 4, kNumFiles = 2;
1592 Options options = CurrentOptions();
1593 options.disable_auto_compactions = true;
1594 options.max_compaction_bytes = 9 * 1024;
1595 DestroyAndReopen(options);
1596 Random rnd(301);
1597 for (int i = 0; i < kNumFiles; ++i) {
1598 std::vector<std::string> values;
1599 // Write 12K (4 values, each 3K)
1600 for (int j = 0; j < kNumPerFile; j++) {
20effc67 1601 values.push_back(rnd.RandomString(3 << 10));
f67539c2
TL
1602 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1603 }
1604 }
1605 ASSERT_OK(db_->Flush(FlushOptions()));
1606 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1607 MoveFilesToLevel(2);
1608 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1609
1610 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1),
1611 Key((kNumFiles)*kNumPerFile + 1)));
1612 ASSERT_OK(db_->Flush(FlushOptions()));
1613
1614 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1615
1616 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1617 true /* disallow_trivial_move */);
1618
1619 // The tombstone range is not broken up into multiple SSTs which may incur a
1620 // large compaction with L2.
1621 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1622 std::vector<std::vector<FileMetaData>> files;
1623 dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1624 true /* disallow_trivial_move */);
1625 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1626 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1627}
1628
1629TEST_F(DBRangeDelTest, OverlappedKeys) {
1630 const int kNumPerFile = 4, kNumFiles = 2;
1631 Options options = CurrentOptions();
1632 options.disable_auto_compactions = true;
1633 options.max_compaction_bytes = 9 * 1024;
1634 DestroyAndReopen(options);
1635 Random rnd(301);
1636 for (int i = 0; i < kNumFiles; ++i) {
1637 std::vector<std::string> values;
1638 // Write 12K (4 values, each 3K)
1639 for (int j = 0; j < kNumPerFile; j++) {
20effc67 1640 values.push_back(rnd.RandomString(3 << 10));
f67539c2
TL
1641 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1642 }
1643 }
1644 ASSERT_OK(db_->Flush(FlushOptions()));
1645 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1646 MoveFilesToLevel(2);
1647 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1648
1649 for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) {
1650 ASSERT_OK(Put(Key(i), "0x123"));
1651 }
1652 ASSERT_OK(db_->Flush(FlushOptions()));
1653 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1654
1655 // The key range is broken up into three SSTs to avoid a future big compaction
1656 // with the grandparent
1657 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1658 true /* disallow_trivial_move */);
1659 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1660
1661 std::vector<std::vector<FileMetaData>> files;
1662 dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1663 true /* disallow_trivial_move */);
1664 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1665 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1666}
1667
7c673cae
FG
1668#endif // ROCKSDB_LITE
1669
f67539c2 1670} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
1671
1672int main(int argc, char** argv) {
f67539c2 1673 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae
FG
1674 ::testing::InitGoogleTest(&argc, argv);
1675 return RUN_ALL_TESTS();
1676}