]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2016-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #include "db/db_test_util.h" | |
7 | #include "port/stack_trace.h" | |
f67539c2 TL |
8 | #include "rocksdb/utilities/write_batch_with_index.h" |
9 | #include "test_util/testutil.h" | |
20effc67 | 10 | #include "util/random.h" |
7c673cae FG |
11 | #include "utilities/merge_operators.h" |
12 | ||
f67539c2 | 13 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
14 | |
15 | class DBRangeDelTest : public DBTestBase { | |
16 | public: | |
20effc67 | 17 | DBRangeDelTest() : DBTestBase("/db_range_del_test", /*env_do_fsync=*/false) {} |
7c673cae FG |
18 | |
19 | std::string GetNumericStr(int key) { | |
20 | uint64_t uint64_key = static_cast<uint64_t>(key); | |
21 | std::string str; | |
22 | str.resize(8); | |
23 | memcpy(&str[0], static_cast<void*>(&uint64_key), 8); | |
24 | return str; | |
25 | } | |
26 | }; | |
27 | ||
f67539c2 TL |
28 | // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not |
29 | // supported in ROCKSDB_LITE | |
7c673cae FG |
30 | #ifndef ROCKSDB_LITE |
31 | TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) { | |
11fdf7f2 TL |
32 | // TODO: figure out why MmapReads trips the iterator pinning assertion in |
33 | // RangeDelAggregator. Ideally it would be supported; otherwise it should at | |
34 | // least be explicitly unsupported. | |
35 | for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) { | |
36 | option_config_ = config; | |
37 | DestroyAndReopen(CurrentOptions()); | |
38 | ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
39 | "dr1", "dr1") | |
40 | .IsNotSupported()); | |
41 | } | |
7c673cae FG |
42 | } |
43 | ||
f67539c2 TL |
44 | TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) { |
45 | WriteBatchWithIndex indexedBatch{}; | |
46 | ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1") | |
47 | .IsNotSupported()); | |
48 | ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported()); | |
49 | } | |
50 | ||
20effc67 TL |
51 | TEST_F(DBRangeDelTest, EndSameAsStartCoversNothing) { |
52 | ASSERT_OK(db_->Put(WriteOptions(), "b", "val")); | |
53 | ASSERT_OK( | |
54 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "b")); | |
55 | ASSERT_EQ("val", Get("b")); | |
56 | } | |
57 | ||
58 | TEST_F(DBRangeDelTest, EndComesBeforeStartInvalidArgument) { | |
59 | db_->Put(WriteOptions(), "b", "val"); | |
60 | ASSERT_TRUE( | |
61 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "a") | |
62 | .IsInvalidArgument()); | |
63 | ASSERT_EQ("val", Get("b")); | |
64 | } | |
65 | ||
7c673cae | 66 | TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) { |
11fdf7f2 TL |
67 | do { |
68 | DestroyAndReopen(CurrentOptions()); | |
69 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
70 | "dr1", "dr2")); | |
71 | ASSERT_OK(db_->Flush(FlushOptions())); | |
72 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
73 | } while (ChangeOptions(kRangeDelSkipConfigs)); | |
7c673cae FG |
74 | } |
75 | ||
76 | TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) { | |
11fdf7f2 TL |
77 | do { |
78 | Options opts = CurrentOptions(); | |
79 | opts.disable_auto_compactions = true; | |
80 | opts.statistics = CreateDBStatistics(); | |
81 | DestroyAndReopen(opts); | |
82 | ||
83 | // snapshot protects range tombstone from dropping due to becoming obsolete. | |
84 | const Snapshot* snapshot = db_->GetSnapshot(); | |
85 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"); | |
86 | db_->Flush(FlushOptions()); | |
87 | ||
88 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
89 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
90 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
91 | true /* disallow_trivial_move */); | |
92 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
93 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
94 | ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); | |
95 | db_->ReleaseSnapshot(snapshot); | |
96 | // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled | |
97 | // compactions as the above assertions about the number of files in a level | |
98 | // do not hold true. | |
494da23a TL |
99 | } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction | |
100 | kSkipFIFOCompaction)); | |
7c673cae FG |
101 | } |
102 | ||
103 | TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) { | |
104 | // regression test for exactly filled compaction output files. Previously | |
105 | // another file would be generated containing all range deletions, which | |
106 | // could invalidate the non-overlapping file boundary invariant. | |
107 | const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10; | |
108 | Options options = CurrentOptions(); | |
109 | options.disable_auto_compactions = true; | |
110 | options.level0_file_num_compaction_trigger = kNumFiles; | |
111 | options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
112 | options.num_levels = 2; | |
113 | options.target_file_size_base = kFileBytes; | |
114 | BlockBasedTableOptions table_options; | |
115 | table_options.block_size_deviation = 50; // each block holds two keys | |
116 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); | |
117 | Reopen(options); | |
118 | ||
119 | // snapshot protects range tombstone from dropping due to becoming obsolete. | |
120 | const Snapshot* snapshot = db_->GetSnapshot(); | |
121 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1)); | |
122 | ||
123 | Random rnd(301); | |
124 | for (int i = 0; i < kNumFiles; ++i) { | |
125 | std::vector<std::string> values; | |
126 | // Write 12K (4 values, each 3K) | |
127 | for (int j = 0; j < kNumPerFile; j++) { | |
20effc67 | 128 | values.push_back(rnd.RandomString(3 << 10)); |
7c673cae FG |
129 | ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); |
130 | if (j == 0 && i > 0) { | |
131 | dbfull()->TEST_WaitForFlushMemTable(); | |
132 | } | |
133 | } | |
134 | } | |
135 | // put extra key to trigger final flush | |
136 | ASSERT_OK(Put("", "")); | |
137 | dbfull()->TEST_WaitForFlushMemTable(); | |
138 | ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); | |
139 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
140 | ||
141 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
142 | true /* disallow_trivial_move */); | |
143 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
144 | ASSERT_EQ(2, NumTableFilesAtLevel(1)); | |
145 | db_->ReleaseSnapshot(snapshot); | |
146 | } | |
147 | ||
148 | TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) { | |
149 | // Ensures range deletion spanning multiple compaction output files that are | |
150 | // cut by max_compaction_bytes will have non-overlapping key-ranges. | |
151 | // https://github.com/facebook/rocksdb/issues/1778 | |
152 | const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12; | |
153 | Options opts = CurrentOptions(); | |
154 | opts.comparator = test::Uint64Comparator(); | |
155 | opts.disable_auto_compactions = true; | |
156 | opts.level0_file_num_compaction_trigger = kNumFiles; | |
157 | opts.max_compaction_bytes = kNumPerFile * kBytesPerVal; | |
158 | opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
159 | // Want max_compaction_bytes to trigger the end of compaction output file, not | |
160 | // target_file_size_base, so make the latter much bigger | |
161 | opts.target_file_size_base = 100 * opts.max_compaction_bytes; | |
20effc67 | 162 | DestroyAndReopen(opts); |
7c673cae FG |
163 | |
164 | // snapshot protects range tombstone from dropping due to becoming obsolete. | |
165 | const Snapshot* snapshot = db_->GetSnapshot(); | |
166 | ||
167 | // It spans the whole key-range, thus will be included in all output files | |
168 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
169 | GetNumericStr(0), | |
170 | GetNumericStr(kNumFiles * kNumPerFile - 1))); | |
171 | Random rnd(301); | |
172 | for (int i = 0; i < kNumFiles; ++i) { | |
173 | std::vector<std::string> values; | |
174 | // Write 1MB (256 values, each 4K) | |
175 | for (int j = 0; j < kNumPerFile; j++) { | |
20effc67 | 176 | values.push_back(rnd.RandomString(kBytesPerVal)); |
7c673cae FG |
177 | ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j])); |
178 | } | |
179 | // extra entry to trigger SpecialSkipListFactory's flush | |
180 | ASSERT_OK(Put(GetNumericStr(kNumPerFile), "")); | |
181 | dbfull()->TEST_WaitForFlushMemTable(); | |
182 | ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); | |
183 | } | |
184 | ||
185 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
186 | true /* disallow_trivial_move */); | |
187 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
188 | ASSERT_GE(NumTableFilesAtLevel(1), 2); | |
189 | ||
190 | std::vector<std::vector<FileMetaData>> files; | |
191 | dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); | |
192 | ||
20effc67 | 193 | for (size_t i = 0; i + 1 < files[1].size(); ++i) { |
7c673cae FG |
194 | ASSERT_TRUE(InternalKeyComparator(opts.comparator) |
195 | .Compare(files[1][i].largest, files[1][i + 1].smallest) < | |
196 | 0); | |
197 | } | |
198 | db_->ReleaseSnapshot(snapshot); | |
199 | } | |
200 | ||
201 | TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) { | |
202 | // Regression test for bug where sentinel range deletions (i.e., ones with | |
203 | // sequence number of zero) were included in output files. | |
204 | // snapshot protects range tombstone from dropping due to becoming obsolete. | |
205 | const Snapshot* snapshot = db_->GetSnapshot(); | |
206 | ||
207 | // gaps between ranges creates sentinels in our internal representation | |
208 | std::vector<std::pair<std::string, std::string>> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}}; | |
209 | for (const auto& range_del : range_dels) { | |
210 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
211 | range_del.first, range_del.second)); | |
212 | } | |
213 | ASSERT_OK(db_->Flush(FlushOptions())); | |
214 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
215 | ||
216 | std::vector<std::vector<FileMetaData>> files; | |
217 | dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); | |
11fdf7f2 | 218 | ASSERT_GT(files[0][0].fd.smallest_seqno, 0); |
7c673cae FG |
219 | |
220 | db_->ReleaseSnapshot(snapshot); | |
221 | } | |
222 | ||
223 | TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) { | |
224 | db_->Put(WriteOptions(), "b1", "val"); | |
225 | ASSERT_OK( | |
226 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); | |
227 | db_->Put(WriteOptions(), "b2", "val"); | |
228 | ASSERT_OK( | |
229 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); | |
230 | // first iteration verifies query correctness in memtable, second verifies | |
231 | // query correctness for a single SST file | |
232 | for (int i = 0; i < 2; ++i) { | |
233 | if (i > 0) { | |
234 | ASSERT_OK(db_->Flush(FlushOptions())); | |
235 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
236 | } | |
237 | std::string value; | |
238 | ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); | |
239 | ASSERT_OK(db_->Get(ReadOptions(), "b2", &value)); | |
240 | } | |
241 | } | |
242 | ||
243 | TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) { | |
244 | db_->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction | |
245 | db_->Put(WriteOptions(), "b1", "val"); | |
246 | ASSERT_OK(db_->Flush(FlushOptions())); | |
247 | ASSERT_OK( | |
248 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); | |
249 | ASSERT_OK(db_->Flush(FlushOptions())); | |
250 | ASSERT_OK( | |
251 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); | |
252 | ASSERT_OK(db_->Flush(FlushOptions())); | |
253 | ASSERT_EQ(3, NumTableFilesAtLevel(0)); | |
254 | ||
255 | for (int i = 0; i < 2; ++i) { | |
256 | if (i > 0) { | |
257 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
258 | true /* disallow_trivial_move */); | |
259 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
260 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
261 | } | |
262 | std::string value; | |
263 | ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); | |
264 | } | |
265 | } | |
266 | #endif // ROCKSDB_LITE | |
267 | ||
268 | TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) { | |
269 | const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250; | |
270 | Options opts = CurrentOptions(); | |
271 | opts.comparator = test::Uint64Comparator(); | |
20effc67 | 272 | DestroyAndReopen(opts); |
7c673cae FG |
273 | |
274 | // Write a third before snapshot, a third between snapshot and tombstone, and | |
275 | // a third after the tombstone. Keys older than snapshot or newer than the | |
276 | // tombstone should be preserved. | |
277 | const Snapshot* snapshot = nullptr; | |
278 | for (int i = 0; i < kNum; ++i) { | |
279 | if (i == kNum / 3) { | |
280 | snapshot = db_->GetSnapshot(); | |
281 | } else if (i == 2 * kNum / 3) { | |
282 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
283 | GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); | |
284 | } | |
285 | db_->Put(WriteOptions(), GetNumericStr(i), "val"); | |
286 | } | |
287 | db_->Flush(FlushOptions()); | |
288 | ||
289 | for (int i = 0; i < kNum; ++i) { | |
290 | ReadOptions read_opts; | |
291 | read_opts.ignore_range_deletions = true; | |
292 | std::string value; | |
293 | if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) { | |
294 | ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value)); | |
295 | } else { | |
296 | ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound()); | |
297 | } | |
298 | } | |
299 | db_->ReleaseSnapshot(snapshot); | |
300 | } | |
301 | ||
302 | // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE | |
303 | #ifndef ROCKSDB_LITE | |
304 | TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) { | |
305 | const int kNumPerFile = 100, kNumFiles = 4; | |
306 | Options opts = CurrentOptions(); | |
307 | opts.comparator = test::Uint64Comparator(); | |
308 | opts.disable_auto_compactions = true; | |
309 | opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
310 | opts.num_levels = 2; | |
311 | opts.statistics = CreateDBStatistics(); | |
20effc67 | 312 | DestroyAndReopen(opts); |
7c673cae FG |
313 | |
314 | for (int i = 0; i < kNumFiles; ++i) { | |
315 | if (i > 0) { | |
316 | // range tombstone covers first half of the previous file | |
317 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
318 | GetNumericStr((i - 1) * kNumPerFile), | |
319 | GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2)); | |
320 | } | |
321 | // Make sure a given key appears in each file so compaction won't be able to | |
322 | // use trivial move, which would happen if the ranges were non-overlapping. | |
323 | // Also, we need an extra element since flush is only triggered when the | |
324 | // number of keys is one greater than SpecialSkipListFactory's limit. | |
325 | // We choose a key outside the key-range used by the test to avoid conflict. | |
326 | db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val"); | |
327 | ||
328 | for (int j = 0; j < kNumPerFile; ++j) { | |
329 | db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val"); | |
330 | } | |
331 | dbfull()->TEST_WaitForFlushMemTable(); | |
332 | ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); | |
333 | } | |
334 | db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); | |
335 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
336 | ASSERT_GT(NumTableFilesAtLevel(1), 0); | |
337 | ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2, | |
338 | TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL)); | |
339 | ||
340 | for (int i = 0; i < kNumFiles; ++i) { | |
341 | for (int j = 0; j < kNumPerFile; ++j) { | |
342 | ReadOptions read_opts; | |
343 | read_opts.ignore_range_deletions = true; | |
344 | std::string value; | |
345 | if (i == kNumFiles - 1 || j >= kNumPerFile / 2) { | |
346 | ASSERT_OK( | |
347 | db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)); | |
348 | } else { | |
349 | ASSERT_TRUE( | |
350 | db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value) | |
351 | .IsNotFound()); | |
352 | } | |
353 | } | |
354 | } | |
355 | } | |
356 | ||
357 | TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) { | |
358 | const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10; | |
359 | Options options = CurrentOptions(); | |
360 | options.disable_auto_compactions = true; | |
361 | options.level0_file_num_compaction_trigger = kNumFiles; | |
362 | options.max_bytes_for_level_base = 2 * kFileBytes; | |
363 | options.max_subcompactions = 4; | |
364 | options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
365 | options.num_levels = 3; | |
366 | options.target_file_size_base = kFileBytes; | |
367 | options.target_file_size_multiplier = 1; | |
368 | Reopen(options); | |
369 | ||
370 | Random rnd(301); | |
371 | for (int i = 0; i < 2; ++i) { | |
372 | for (int j = 0; j < kNumFiles; ++j) { | |
373 | if (i > 0) { | |
374 | // delete [95,105) in two files, [295,305) in next two | |
375 | int mid = (j + (1 - j % 2)) * kNumPerFile; | |
376 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
377 | Key(mid - 5), Key(mid + 5)); | |
378 | } | |
379 | std::vector<std::string> values; | |
380 | // Write 100KB (100 values, each 1K) | |
381 | for (int k = 0; k < kNumPerFile; k++) { | |
20effc67 | 382 | values.push_back(rnd.RandomString(990)); |
7c673cae FG |
383 | ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); |
384 | } | |
385 | // put extra key to trigger flush | |
386 | ASSERT_OK(Put("", "")); | |
387 | dbfull()->TEST_WaitForFlushMemTable(); | |
388 | if (j < kNumFiles - 1) { | |
389 | // background compaction may happen early for kNumFiles'th file | |
390 | ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); | |
391 | } | |
392 | if (j == options.level0_file_num_compaction_trigger - 1) { | |
393 | // When i == 1, compaction will output some files to L1, at which point | |
394 | // L1 is not bottommost so range deletions cannot be compacted away. The | |
395 | // new L1 files must be generated with non-overlapping key ranges even | |
396 | // though multiple subcompactions see the same ranges deleted, else an | |
397 | // assertion will fail. | |
398 | // | |
399 | // Only enable auto-compactions when we're ready; otherwise, the | |
400 | // oversized L0 (relative to base_level) causes the compaction to run | |
401 | // earlier. | |
402 | ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()})); | |
403 | dbfull()->TEST_WaitForCompact(); | |
404 | ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), | |
405 | {{"disable_auto_compactions", "true"}})); | |
406 | ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |
407 | ASSERT_GT(NumTableFilesAtLevel(1), 0); | |
408 | ASSERT_GT(NumTableFilesAtLevel(2), 0); | |
409 | } | |
410 | } | |
411 | } | |
412 | } | |
413 | ||
414 | TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) { | |
415 | const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4; | |
416 | Options options = CurrentOptions(); | |
417 | options.compaction_options_universal.min_merge_width = kFilesPerLevel; | |
418 | options.compaction_options_universal.max_merge_width = kFilesPerLevel; | |
419 | options.compaction_options_universal.size_ratio = 10; | |
420 | options.compaction_style = kCompactionStyleUniversal; | |
421 | options.level0_file_num_compaction_trigger = kFilesPerLevel; | |
422 | options.max_subcompactions = 4; | |
423 | options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
424 | options.num_levels = kNumLevels; | |
425 | options.target_file_size_base = kNumPerFile << 10; | |
426 | options.target_file_size_multiplier = 1; | |
427 | Reopen(options); | |
428 | ||
429 | Random rnd(301); | |
430 | for (int i = 0; i < kNumLevels - 1; ++i) { | |
431 | for (int j = 0; j < kFilesPerLevel; ++j) { | |
432 | if (i == kNumLevels - 2) { | |
433 | // insert range deletions [95,105) in two files, [295,305) in next two | |
434 | // to prepare L1 for later manual compaction. | |
435 | int mid = (j + (1 - j % 2)) * kNumPerFile; | |
436 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
437 | Key(mid - 5), Key(mid + 5)); | |
438 | } | |
439 | std::vector<std::string> values; | |
440 | // Write 100KB (100 values, each 1K) | |
441 | for (int k = 0; k < kNumPerFile; k++) { | |
20effc67 | 442 | values.push_back(rnd.RandomString(990)); |
7c673cae FG |
443 | ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); |
444 | } | |
445 | // put extra key to trigger flush | |
446 | ASSERT_OK(Put("", "")); | |
447 | dbfull()->TEST_WaitForFlushMemTable(); | |
448 | if (j < kFilesPerLevel - 1) { | |
449 | // background compaction may happen early for kFilesPerLevel'th file | |
450 | ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); | |
451 | } | |
452 | } | |
453 | dbfull()->TEST_WaitForCompact(); | |
454 | ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |
455 | ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1); | |
456 | } | |
457 | // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions | |
458 | // happen since input level > 0; (2) range deletions are not dropped since | |
459 | // output level is not bottommost. If no file boundary assertion fails, that | |
460 | // probably means universal compaction + subcompaction + range deletion are | |
461 | // compatible. | |
462 | ASSERT_OK(dbfull()->RunManualCompaction( | |
20effc67 | 463 | static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily()) |
7c673cae | 464 | ->cfd(), |
f67539c2 TL |
465 | 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(), |
466 | nullptr /* begin */, nullptr /* end */, true /* exclusive */, | |
467 | true /* disallow_trivial_move */, | |
468 | port::kMaxUint64 /* max_file_num_to_ignore */)); | |
7c673cae FG |
469 | } |
470 | #endif // ROCKSDB_LITE | |
471 | ||
472 | TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) { | |
473 | const int kNumPerFile = 3, kNumFiles = 3; | |
474 | Options opts = CurrentOptions(); | |
475 | opts.disable_auto_compactions = true; | |
476 | opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile)); | |
477 | opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); | |
478 | opts.num_levels = 2; | |
479 | Reopen(opts); | |
480 | ||
481 | // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file | |
482 | // requires an extra entry. | |
483 | for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) { | |
484 | if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) { | |
485 | // Delete merge operands from all but the last file | |
486 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", | |
487 | "key_"); | |
488 | } | |
489 | std::string val; | |
490 | PutFixed64(&val, i); | |
491 | db_->Merge(WriteOptions(), "key", val); | |
492 | // we need to prevent trivial move using Puts so compaction will actually | |
493 | // process the merge operands. | |
494 | db_->Put(WriteOptions(), "prevent_trivial_move", ""); | |
495 | if (i > 0 && i % kNumPerFile == 0) { | |
496 | dbfull()->TEST_WaitForFlushMemTable(); | |
497 | } | |
498 | } | |
499 | ||
500 | ReadOptions read_opts; | |
501 | read_opts.ignore_range_deletions = true; | |
502 | std::string expected, actual; | |
503 | ASSERT_OK(db_->Get(read_opts, "key", &actual)); | |
504 | PutFixed64(&expected, 45); // 1+2+...+9 | |
505 | ASSERT_EQ(expected, actual); | |
506 | ||
507 | db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); | |
508 | ||
509 | expected.clear(); | |
510 | ASSERT_OK(db_->Get(read_opts, "key", &actual)); | |
511 | uint64_t tmp; | |
512 | Slice tmp2(actual); | |
513 | GetFixed64(&tmp2, &tmp); | |
514 | PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone) | |
515 | ASSERT_EQ(expected, actual); | |
516 | } | |
517 | ||
494da23a TL |
518 | TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) { |
519 | // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4) | |
520 | // Flush. The `CompactionIterator` previously had a bug where we forgot to | |
521 | // check for covering range tombstones when processing the (1) Put, causing | |
522 | // it to reappear after the flush. | |
523 | Options opts = CurrentOptions(); | |
524 | opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); | |
525 | Reopen(opts); | |
526 | ||
527 | std::string val; | |
528 | PutFixed64(&val, 1); | |
529 | ASSERT_OK(db_->Put(WriteOptions(), "key", val)); | |
530 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
531 | "key", "key_")); | |
532 | ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); | |
533 | ASSERT_OK(db_->Flush(FlushOptions())); | |
534 | ||
535 | ReadOptions read_opts; | |
536 | std::string expected, actual; | |
537 | ASSERT_OK(db_->Get(read_opts, "key", &actual)); | |
538 | PutFixed64(&expected, 1); | |
539 | ASSERT_EQ(expected, actual); | |
540 | } | |
541 | ||
7c673cae FG |
542 | // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE |
543 | #ifndef ROCKSDB_LITE | |
544 | TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) { | |
545 | // During compaction to bottommost level, verify range tombstones older than | |
546 | // the oldest snapshot are removed, while others are preserved. | |
547 | Options opts = CurrentOptions(); | |
548 | opts.disable_auto_compactions = true; | |
549 | opts.num_levels = 2; | |
550 | opts.statistics = CreateDBStatistics(); | |
551 | Reopen(opts); | |
552 | ||
553 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", | |
11fdf7f2 | 554 | "dr10"); // obsolete after compaction |
7c673cae FG |
555 | db_->Put(WriteOptions(), "key", "val"); |
556 | db_->Flush(FlushOptions()); | |
557 | const Snapshot* snapshot = db_->GetSnapshot(); | |
558 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2", | |
11fdf7f2 | 559 | "dr20"); // protected by snapshot |
7c673cae FG |
560 | db_->Put(WriteOptions(), "key", "val"); |
561 | db_->Flush(FlushOptions()); | |
562 | ||
563 | ASSERT_EQ(2, NumTableFilesAtLevel(0)); | |
564 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
565 | db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); | |
566 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
567 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
568 | ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); | |
569 | ||
570 | db_->ReleaseSnapshot(snapshot); | |
571 | } | |
572 | ||
573 | TEST_F(DBRangeDelTest, TableEvictedDuringScan) { | |
574 | // The RangeDelAggregator holds pointers into range deletion blocks created by | |
575 | // table readers. This test ensures the aggregator can still access those | |
576 | // blocks even if it outlives the table readers that created them. | |
577 | // | |
578 | // DBIter always keeps readers open for L0 files. So, in order to test | |
579 | // aggregator outliving reader, we need to have deletions in L1 files, which | |
580 | // are opened/closed on-demand during the scan. This is accomplished by | |
581 | // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions | |
582 | // from all lingering in L0 (there is at most one range deletion per L0 file). | |
583 | // | |
584 | // The first L1 file will contain a range deletion since its begin key is 0. | |
585 | // SeekToFirst() references that table's reader and adds its range tombstone | |
586 | // to the aggregator. Upon advancing beyond that table's key-range via Next(), | |
587 | // the table reader will be unreferenced by the iterator. Since we manually | |
588 | // call Evict() on all readers before the full scan, this unreference causes | |
589 | // the reader's refcount to drop to zero and thus be destroyed. | |
590 | // | |
591 | // When it is destroyed, we do not remove its range deletions from the | |
592 | // aggregator. So, subsequent calls to Next() must be able to use these | |
593 | // deletions to decide whether a key is covered. This will work as long as | |
594 | // the aggregator properly references the range deletion block. | |
595 | const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5; | |
596 | Options opts = CurrentOptions(); | |
597 | opts.comparator = test::Uint64Comparator(); | |
598 | opts.level0_file_num_compaction_trigger = 4; | |
599 | opts.level0_stop_writes_trigger = 4; | |
600 | opts.memtable_factory.reset(new SpecialSkipListFactory(1)); | |
601 | opts.num_levels = 2; | |
602 | BlockBasedTableOptions bbto; | |
603 | bbto.cache_index_and_filter_blocks = true; | |
604 | bbto.block_cache = NewLRUCache(8 << 20); | |
605 | opts.table_factory.reset(NewBlockBasedTableFactory(bbto)); | |
20effc67 | 606 | DestroyAndReopen(opts); |
7c673cae FG |
607 | |
608 | // Hold a snapshot so range deletions can't become obsolete during compaction | |
609 | // to bottommost level (i.e., L1). | |
610 | const Snapshot* snapshot = db_->GetSnapshot(); | |
611 | for (int i = 0; i < kNum; ++i) { | |
612 | db_->Put(WriteOptions(), GetNumericStr(i), "val"); | |
613 | if (i > 0) { | |
614 | dbfull()->TEST_WaitForFlushMemTable(); | |
615 | } | |
616 | if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) { | |
617 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
618 | GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); | |
619 | } | |
620 | } | |
621 | // Must be > 1 so the first L1 file can be closed before scan finishes | |
622 | dbfull()->TEST_WaitForCompact(); | |
623 | ASSERT_GT(NumTableFilesAtLevel(1), 1); | |
624 | std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_); | |
625 | ||
626 | ReadOptions read_opts; | |
627 | auto* iter = db_->NewIterator(read_opts); | |
628 | int expected = kRangeEnd; | |
629 | iter->SeekToFirst(); | |
630 | for (auto file_number : file_numbers) { | |
631 | // This puts table caches in the state of being externally referenced only | |
632 | // so they are destroyed immediately upon iterator unreferencing. | |
633 | TableCache::Evict(dbfull()->TEST_table_cache(), file_number); | |
634 | } | |
635 | for (; iter->Valid(); iter->Next()) { | |
636 | ASSERT_EQ(GetNumericStr(expected), iter->key()); | |
637 | ++expected; | |
638 | // Keep clearing block cache's LRU so range deletion block can be freed as | |
639 | // soon as its refcount drops to zero. | |
640 | bbto.block_cache->EraseUnRefEntries(); | |
641 | } | |
642 | ASSERT_EQ(kNum, expected); | |
643 | delete iter; | |
644 | db_->ReleaseSnapshot(snapshot); | |
645 | } | |
646 | ||
647 | TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) { | |
11fdf7f2 TL |
648 | do { |
649 | DestroyAndReopen(CurrentOptions()); | |
650 | db_->Put(WriteOptions(), "key", "val"); | |
651 | ASSERT_OK( | |
652 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
7c673cae | 653 | |
11fdf7f2 TL |
654 | ReadOptions read_opts; |
655 | std::string value; | |
656 | ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); | |
657 | } while (ChangeOptions(kRangeDelSkipConfigs)); | |
7c673cae FG |
658 | } |
659 | ||
660 | TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) { | |
11fdf7f2 TL |
661 | do { |
662 | Options opts = CurrentOptions(); | |
663 | opts.max_write_buffer_number = 3; | |
664 | opts.min_write_buffer_number_to_merge = 2; | |
665 | // SpecialSkipListFactory lets us specify maximum number of elements the | |
666 | // memtable can hold. It switches the active memtable to immutable (flush is | |
667 | // prevented by the above options) upon inserting an element that would | |
668 | // overflow the memtable. | |
669 | opts.memtable_factory.reset(new SpecialSkipListFactory(1)); | |
670 | DestroyAndReopen(opts); | |
671 | ||
672 | db_->Put(WriteOptions(), "key", "val"); | |
673 | ASSERT_OK( | |
674 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
675 | db_->Put(WriteOptions(), "blah", "val"); | |
7c673cae | 676 | |
11fdf7f2 TL |
677 | ReadOptions read_opts; |
678 | std::string value; | |
679 | ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); | |
680 | } while (ChangeOptions(kRangeDelSkipConfigs)); | |
7c673cae FG |
681 | } |
682 | ||
683 | TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) { | |
11fdf7f2 TL |
684 | do { |
685 | DestroyAndReopen(CurrentOptions()); | |
686 | db_->Put(WriteOptions(), "key", "val"); | |
687 | // snapshot prevents key from being deleted during flush | |
688 | const Snapshot* snapshot = db_->GetSnapshot(); | |
689 | ASSERT_OK( | |
690 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
691 | ASSERT_OK(db_->Flush(FlushOptions())); | |
7c673cae | 692 | |
11fdf7f2 TL |
693 | ReadOptions read_opts; |
694 | std::string value; | |
695 | ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); | |
696 | db_->ReleaseSnapshot(snapshot); | |
494da23a | 697 | } while (ChangeOptions(kRangeDelSkipConfigs)); |
7c673cae FG |
698 | } |
699 | ||
700 | TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) { | |
701 | const int kNumMergeOps = 10; | |
702 | Options opts = CurrentOptions(); | |
703 | opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); | |
704 | Reopen(opts); | |
705 | ||
706 | for (int i = 0; i < kNumMergeOps; ++i) { | |
707 | std::string val; | |
708 | PutFixed64(&val, i); | |
709 | db_->Merge(WriteOptions(), "key", val); | |
710 | if (i == kNumMergeOps / 2) { | |
711 | // deletes [0, 5] | |
712 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", | |
713 | "key_"); | |
714 | } | |
715 | } | |
716 | ||
717 | ReadOptions read_opts; | |
718 | std::string expected, actual; | |
719 | ASSERT_OK(db_->Get(read_opts, "key", &actual)); | |
720 | PutFixed64(&expected, 30); // 6+7+8+9 | |
721 | ASSERT_EQ(expected, actual); | |
722 | ||
723 | expected.clear(); | |
724 | read_opts.ignore_range_deletions = true; | |
725 | ASSERT_OK(db_->Get(read_opts, "key", &actual)); | |
726 | PutFixed64(&expected, 45); // 0+1+2+...+9 | |
727 | ASSERT_EQ(expected, actual); | |
728 | } | |
729 | ||
730 | TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) { | |
731 | Options opts = CurrentOptions(); | |
732 | opts.max_write_buffer_number = 4; | |
733 | opts.min_write_buffer_number_to_merge = 3; | |
734 | opts.memtable_factory.reset(new SpecialSkipListFactory(1)); | |
735 | Reopen(opts); | |
736 | ||
737 | db_->Put(WriteOptions(), "sst_key", "val"); | |
738 | // snapshot prevents key from being deleted during flush | |
739 | const Snapshot* snapshot = db_->GetSnapshot(); | |
740 | ASSERT_OK( | |
741 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
742 | ASSERT_OK(db_->Flush(FlushOptions())); | |
743 | db_->Put(WriteOptions(), "imm_key", "val"); | |
744 | ASSERT_OK( | |
745 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
746 | db_->Put(WriteOptions(), "mem_key", "val"); | |
747 | ASSERT_OK( | |
748 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
749 | ||
750 | ReadOptions read_opts; | |
751 | read_opts.ignore_range_deletions = true; | |
752 | for (std::string key : {"sst_key", "imm_key", "mem_key"}) { | |
753 | std::string value; | |
754 | ASSERT_OK(db_->Get(read_opts, key, &value)); | |
755 | } | |
756 | db_->ReleaseSnapshot(snapshot); | |
757 | } | |
758 | ||
759 | TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) { | |
760 | const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; | |
761 | Options opts = CurrentOptions(); | |
762 | opts.comparator = test::Uint64Comparator(); | |
763 | opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
20effc67 | 764 | DestroyAndReopen(opts); |
7c673cae FG |
765 | |
766 | // Write half of the keys before the tombstone and half after the tombstone. | |
767 | // Only covered keys (i.e., within the range and older than the tombstone) | |
768 | // should be deleted. | |
769 | for (int i = 0; i < kNum; ++i) { | |
770 | if (i == kNum / 2) { | |
771 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
772 | GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); | |
773 | } | |
774 | db_->Put(WriteOptions(), GetNumericStr(i), "val"); | |
775 | } | |
776 | ReadOptions read_opts; | |
777 | auto* iter = db_->NewIterator(read_opts); | |
778 | ||
779 | int expected = 0; | |
780 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |
781 | ASSERT_EQ(GetNumericStr(expected), iter->key()); | |
782 | if (expected == kRangeBegin - 1) { | |
783 | expected = kNum / 2; | |
784 | } else { | |
785 | ++expected; | |
786 | } | |
787 | } | |
788 | ASSERT_EQ(kNum, expected); | |
789 | delete iter; | |
790 | } | |
791 | ||
792 | TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) { | |
793 | const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; | |
794 | Options opts = CurrentOptions(); | |
795 | opts.comparator = test::Uint64Comparator(); | |
796 | opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); | |
20effc67 | 797 | DestroyAndReopen(opts); |
7c673cae FG |
798 | |
799 | const Snapshot* snapshot = nullptr; | |
800 | // Put a snapshot before the range tombstone, verify an iterator using that | |
801 | // snapshot sees all inserted keys. | |
802 | for (int i = 0; i < kNum; ++i) { | |
803 | if (i == kNum / 2) { | |
804 | snapshot = db_->GetSnapshot(); | |
805 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
806 | GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); | |
807 | } | |
808 | db_->Put(WriteOptions(), GetNumericStr(i), "val"); | |
809 | } | |
810 | ReadOptions read_opts; | |
811 | read_opts.snapshot = snapshot; | |
812 | auto* iter = db_->NewIterator(read_opts); | |
813 | ||
814 | int expected = 0; | |
815 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |
816 | ASSERT_EQ(GetNumericStr(expected), iter->key()); | |
817 | ++expected; | |
818 | } | |
819 | ASSERT_EQ(kNum / 2, expected); | |
820 | delete iter; | |
821 | db_->ReleaseSnapshot(snapshot); | |
822 | } | |
823 | ||
824 | TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) { | |
825 | Options opts = CurrentOptions(); | |
826 | opts.max_write_buffer_number = 4; | |
827 | opts.min_write_buffer_number_to_merge = 3; | |
828 | opts.memtable_factory.reset(new SpecialSkipListFactory(1)); | |
829 | Reopen(opts); | |
830 | ||
831 | db_->Put(WriteOptions(), "sst_key", "val"); | |
832 | // snapshot prevents key from being deleted during flush | |
833 | const Snapshot* snapshot = db_->GetSnapshot(); | |
834 | ASSERT_OK( | |
835 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
836 | ASSERT_OK(db_->Flush(FlushOptions())); | |
837 | db_->Put(WriteOptions(), "imm_key", "val"); | |
838 | ASSERT_OK( | |
839 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
840 | db_->Put(WriteOptions(), "mem_key", "val"); | |
841 | ASSERT_OK( | |
842 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
843 | ||
844 | ReadOptions read_opts; | |
845 | read_opts.ignore_range_deletions = true; | |
846 | auto* iter = db_->NewIterator(read_opts); | |
847 | int i = 0; | |
848 | std::string expected[] = {"imm_key", "mem_key", "sst_key"}; | |
849 | for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) { | |
850 | std::string key; | |
851 | ASSERT_EQ(expected[i], iter->key()); | |
852 | } | |
853 | ASSERT_EQ(3, i); | |
854 | delete iter; | |
855 | db_->ReleaseSnapshot(snapshot); | |
856 | } | |
857 | ||
11fdf7f2 | 858 | #ifndef ROCKSDB_UBSAN_RUN |
7c673cae FG |
859 | TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) { |
860 | db_->Put(WriteOptions(), "key", "val"); | |
861 | // snapshot prevents key from being deleted during flush | |
862 | const Snapshot* snapshot = db_->GetSnapshot(); | |
863 | ASSERT_OK( | |
864 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); | |
865 | ||
866 | // iterations check unsupported in memtable, l0, and then l1 | |
867 | for (int i = 0; i < 3; ++i) { | |
868 | ReadOptions read_opts; | |
869 | read_opts.tailing = true; | |
870 | auto* iter = db_->NewIterator(read_opts); | |
871 | if (i == 2) { | |
872 | // For L1+, iterators over files are created on-demand, so need seek | |
873 | iter->SeekToFirst(); | |
874 | } | |
875 | ASSERT_TRUE(iter->status().IsNotSupported()); | |
876 | delete iter; | |
877 | if (i == 0) { | |
878 | ASSERT_OK(db_->Flush(FlushOptions())); | |
879 | } else if (i == 1) { | |
880 | MoveFilesToLevel(1); | |
881 | } | |
882 | } | |
883 | db_->ReleaseSnapshot(snapshot); | |
884 | } | |
11fdf7f2 TL |
885 | |
886 | #endif // !ROCKSDB_UBSAN_RUN | |
887 | ||
888 | TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) { | |
889 | const int kNumFiles = 2, kNumKeysPerFile = 4; | |
890 | Options options = CurrentOptions(); | |
891 | options.compression = kNoCompression; | |
892 | options.disable_auto_compactions = true; | |
893 | options.level0_file_num_compaction_trigger = kNumFiles; | |
894 | options.max_subcompactions = 2; | |
895 | options.num_levels = 2; | |
896 | options.target_file_size_base = 4096; | |
897 | Reopen(options); | |
898 | ||
899 | // need a L1 file for subcompaction to be triggered | |
900 | ASSERT_OK( | |
901 | db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val")); | |
902 | ASSERT_OK(db_->Flush(FlushOptions())); | |
903 | MoveFilesToLevel(1); | |
904 | ||
905 | // put enough keys to fill up the first subcompaction, and later range-delete | |
906 | // them so that the first subcompaction outputs no key-values. In that case | |
907 | // it'll consider making an SST file dedicated to range deletions. | |
908 | for (int i = 0; i < kNumKeysPerFile; ++i) { | |
909 | ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), | |
910 | std::string(1024, 'a'))); | |
911 | } | |
912 | ASSERT_OK(db_->Flush(FlushOptions())); | |
913 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), | |
914 | Key(kNumKeysPerFile))); | |
915 | ||
916 | // the above range tombstone can be dropped, so that one alone won't cause a | |
917 | // dedicated file to be opened. We can make one protected by snapshot that | |
918 | // must be considered. Make its range outside the first subcompaction's range | |
919 | // to exercise the tricky part of the code. | |
920 | const Snapshot* snapshot = db_->GetSnapshot(); | |
921 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
922 | Key(kNumKeysPerFile + 1), | |
923 | Key(kNumKeysPerFile + 2))); | |
924 | ASSERT_OK(db_->Flush(FlushOptions())); | |
925 | ||
926 | ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); | |
927 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
928 | ||
929 | db_->EnableAutoCompaction({db_->DefaultColumnFamily()}); | |
930 | dbfull()->TEST_WaitForCompact(); | |
931 | db_->ReleaseSnapshot(snapshot); | |
932 | } | |
933 | ||
934 | TEST_F(DBRangeDelTest, MemtableBloomFilter) { | |
935 | // regression test for #2743. the range delete tombstones in memtable should | |
936 | // be added even when Get() skips searching due to its prefix bloom filter | |
937 | const int kMemtableSize = 1 << 20; // 1MB | |
938 | const int kMemtablePrefixFilterSize = 1 << 13; // 8KB | |
939 | const int kNumKeys = 1000; | |
940 | const int kPrefixLen = 8; | |
941 | Options options = CurrentOptions(); | |
942 | options.memtable_prefix_bloom_size_ratio = | |
943 | static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize; | |
f67539c2 TL |
944 | options.prefix_extractor.reset( |
945 | ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); | |
11fdf7f2 TL |
946 | options.write_buffer_size = kMemtableSize; |
947 | Reopen(options); | |
948 | ||
949 | for (int i = 0; i < kNumKeys; ++i) { | |
950 | ASSERT_OK(Put(Key(i), "val")); | |
951 | } | |
952 | Flush(); | |
953 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), | |
954 | Key(kNumKeys))); | |
955 | for (int i = 0; i < kNumKeys; ++i) { | |
956 | std::string value; | |
957 | ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound()); | |
958 | } | |
959 | } | |
960 | ||
961 | TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) { | |
962 | // This test originally verified that compaction treated files containing a | |
963 | // split range deletion in the input level as an atomic unit. I.e., | |
964 | // compacting any input-level file(s) containing a portion of the range | |
965 | // deletion causes all other input-level files containing portions of that | |
966 | // same range deletion to be included in the compaction. Range deletion | |
967 | // tombstones are now truncated to sstable boundaries which removed the need | |
968 | // for that behavior (which could lead to excessively large | |
969 | // compactions). | |
970 | const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10; | |
971 | Options options = CurrentOptions(); | |
972 | options.compression = kNoCompression; | |
973 | options.level0_file_num_compaction_trigger = kNumFilesPerLevel; | |
974 | options.memtable_factory.reset( | |
975 | new SpecialSkipListFactory(2 /* num_entries_flush */)); | |
976 | options.target_file_size_base = kValueBytes; | |
977 | // i == 0: CompactFiles | |
978 | // i == 1: CompactRange | |
979 | // i == 2: automatic compaction | |
980 | for (int i = 0; i < 3; ++i) { | |
981 | DestroyAndReopen(options); | |
982 | ||
983 | ASSERT_OK(Put(Key(0), "")); | |
984 | ASSERT_OK(db_->Flush(FlushOptions())); | |
985 | MoveFilesToLevel(2); | |
986 | ASSERT_EQ(1, NumTableFilesAtLevel(2)); | |
987 | ||
988 | // snapshot protects range tombstone from dropping due to becoming obsolete. | |
989 | const Snapshot* snapshot = db_->GetSnapshot(); | |
990 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), | |
991 | Key(2 * kNumFilesPerLevel)); | |
992 | ||
993 | Random rnd(301); | |
20effc67 | 994 | std::string value = rnd.RandomString(kValueBytes); |
11fdf7f2 TL |
995 | for (int j = 0; j < kNumFilesPerLevel; ++j) { |
996 | // give files overlapping key-ranges to prevent trivial move | |
997 | ASSERT_OK(Put(Key(j), value)); | |
998 | ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); | |
999 | if (j > 0) { | |
1000 | dbfull()->TEST_WaitForFlushMemTable(); | |
1001 | ASSERT_EQ(j, NumTableFilesAtLevel(0)); | |
1002 | } | |
1003 | } | |
1004 | // put extra key to trigger final flush | |
1005 | ASSERT_OK(Put("", "")); | |
1006 | dbfull()->TEST_WaitForFlushMemTable(); | |
1007 | dbfull()->TEST_WaitForCompact(); | |
1008 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1009 | ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1)); | |
1010 | ||
1011 | ColumnFamilyMetaData meta; | |
1012 | db_->GetColumnFamilyMetaData(&meta); | |
1013 | if (i == 0) { | |
1014 | ASSERT_OK(db_->CompactFiles( | |
1015 | CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */)); | |
1016 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
1017 | } else if (i == 1) { | |
1018 | auto begin_str = Key(0), end_str = Key(1); | |
1019 | Slice begin = begin_str, end = end_str; | |
1020 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end)); | |
1021 | ASSERT_EQ(3, NumTableFilesAtLevel(1)); | |
1022 | } else if (i == 2) { | |
1023 | ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), | |
1024 | {{"max_bytes_for_level_base", "10000"}})); | |
1025 | dbfull()->TEST_WaitForCompact(); | |
1026 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
1027 | } | |
1028 | ASSERT_GT(NumTableFilesAtLevel(2), 0); | |
1029 | ||
1030 | db_->ReleaseSnapshot(snapshot); | |
1031 | } | |
1032 | } | |
1033 | ||
1034 | TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) { | |
1035 | // Test the handling of the range-tombstone end-key as the | |
1036 | // upper-bound for an sstable. | |
1037 | ||
1038 | const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10; | |
1039 | Options options = CurrentOptions(); | |
1040 | options.compression = kNoCompression; | |
1041 | options.level0_file_num_compaction_trigger = kNumFilesPerLevel; | |
1042 | options.memtable_factory.reset( | |
1043 | new SpecialSkipListFactory(2 /* num_entries_flush */)); | |
1044 | options.target_file_size_base = kValueBytes; | |
1045 | options.disable_auto_compactions = true; | |
1046 | ||
1047 | DestroyAndReopen(options); | |
1048 | ||
1049 | // Create an initial sstable at L2: | |
1050 | // [key000000#1,1, key000000#1,1] | |
1051 | ASSERT_OK(Put(Key(0), "")); | |
1052 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1053 | MoveFilesToLevel(2); | |
1054 | ASSERT_EQ(1, NumTableFilesAtLevel(2)); | |
1055 | ||
1056 | // A snapshot protects the range tombstone from dropping due to | |
1057 | // becoming obsolete. | |
1058 | const Snapshot* snapshot = db_->GetSnapshot(); | |
1059 | db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1060 | Key(0), Key(2 * kNumFilesPerLevel)); | |
1061 | ||
1062 | // Create 2 additional sstables in L0. Note that the first sstable | |
1063 | // contains the range tombstone. | |
1064 | // [key000000#3,1, key000004#72057594037927935,15] | |
1065 | // [key000001#5,1, key000002#6,1] | |
1066 | Random rnd(301); | |
20effc67 | 1067 | std::string value = rnd.RandomString(kValueBytes); |
11fdf7f2 TL |
1068 | for (int j = 0; j < kNumFilesPerLevel; ++j) { |
1069 | // Give files overlapping key-ranges to prevent a trivial move when we | |
1070 | // compact from L0 to L1. | |
1071 | ASSERT_OK(Put(Key(j), value)); | |
1072 | ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); | |
1073 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1074 | ASSERT_EQ(j + 1, NumTableFilesAtLevel(0)); | |
1075 | } | |
1076 | // Compact the 2 L0 sstables to L1, resulting in the following LSM. There | |
1077 | // are 2 sstables generated in L1 due to the target_file_size_base setting. | |
1078 | // L1: | |
1079 | // [key000000#3,1, key000002#72057594037927935,15] | |
1080 | // [key000002#6,1, key000004#72057594037927935,15] | |
1081 | // L2: | |
1082 | // [key000000#1,1, key000000#1,1] | |
1083 | MoveFilesToLevel(1); | |
1084 | ASSERT_EQ(2, NumTableFilesAtLevel(1)); | |
1085 | ||
1086 | { | |
1087 | // Compact the second sstable in L1: | |
1088 | // L1: | |
1089 | // [key000000#3,1, key000002#72057594037927935,15] | |
1090 | // L2: | |
1091 | // [key000000#1,1, key000000#1,1] | |
1092 | // [key000002#6,1, key000004#72057594037927935,15] | |
494da23a TL |
1093 | // |
1094 | // At the same time, verify the compaction does not cause the key at the | |
1095 | // endpoint (key000002#6,1) to disappear. | |
1096 | ASSERT_EQ(value, Get(Key(2))); | |
11fdf7f2 | 1097 | auto begin_str = Key(3); |
f67539c2 | 1098 | const ROCKSDB_NAMESPACE::Slice begin = begin_str; |
11fdf7f2 TL |
1099 | dbfull()->TEST_CompactRange(1, &begin, nullptr); |
1100 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
1101 | ASSERT_EQ(2, NumTableFilesAtLevel(2)); | |
494da23a | 1102 | ASSERT_EQ(value, Get(Key(2))); |
11fdf7f2 TL |
1103 | } |
1104 | ||
1105 | { | |
1106 | // Compact the first sstable in L1. This should be copacetic, but | |
1107 | // was previously resulting in overlapping sstables in L2 due to | |
1108 | // mishandling of the range tombstone end-key when used as the | |
1109 | // largest key for an sstable. The resulting LSM structure should | |
1110 | // be: | |
1111 | // | |
1112 | // L2: | |
1113 | // [key000000#1,1, key000001#72057594037927935,15] | |
1114 | // [key000001#5,1, key000002#72057594037927935,15] | |
1115 | // [key000002#6,1, key000004#72057594037927935,15] | |
1116 | auto begin_str = Key(0); | |
f67539c2 | 1117 | const ROCKSDB_NAMESPACE::Slice begin = begin_str; |
11fdf7f2 TL |
1118 | dbfull()->TEST_CompactRange(1, &begin, &begin); |
1119 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
1120 | ASSERT_EQ(3, NumTableFilesAtLevel(2)); | |
1121 | } | |
1122 | ||
1123 | db_->ReleaseSnapshot(snapshot); | |
1124 | } | |
1125 | ||
1126 | TEST_F(DBRangeDelTest, UnorderedTombstones) { | |
1127 | // Regression test for #2752. Range delete tombstones between | |
1128 | // different snapshot stripes are not stored in order, so the first | |
1129 | // tombstone of each snapshot stripe should be checked as a smallest | |
1130 | // candidate. | |
1131 | Options options = CurrentOptions(); | |
1132 | DestroyAndReopen(options); | |
1133 | ||
1134 | auto cf = db_->DefaultColumnFamily(); | |
1135 | ||
1136 | ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a")); | |
1137 | ASSERT_OK(db_->Flush(FlushOptions(), cf)); | |
1138 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1139 | ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); | |
1140 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
1141 | ||
1142 | ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c")); | |
1143 | // Hold a snapshot to separate these two delete ranges. | |
1144 | auto snapshot = db_->GetSnapshot(); | |
1145 | ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b")); | |
1146 | ASSERT_OK(db_->Flush(FlushOptions(), cf)); | |
1147 | db_->ReleaseSnapshot(snapshot); | |
1148 | ||
1149 | std::vector<std::vector<FileMetaData>> files; | |
1150 | dbfull()->TEST_GetFilesMetaData(cf, &files); | |
1151 | ASSERT_EQ(1, files[0].size()); | |
1152 | ASSERT_EQ("a", files[0][0].smallest.user_key()); | |
1153 | ASSERT_EQ("c", files[0][0].largest.user_key()); | |
1154 | ||
1155 | std::string v; | |
1156 | auto s = db_->Get(ReadOptions(), "a", &v); | |
1157 | ASSERT_TRUE(s.IsNotFound()); | |
1158 | } | |
1159 | ||
494da23a TL |
1160 | class MockMergeOperator : public MergeOperator { |
1161 | // Mock non-associative operator. Non-associativity is expressed by lack of | |
1162 | // implementation for any `PartialMerge*` functions. | |
1163 | public: | |
1164 | bool FullMergeV2(const MergeOperationInput& merge_in, | |
1165 | MergeOperationOutput* merge_out) const override { | |
1166 | assert(merge_out != nullptr); | |
1167 | merge_out->new_value = merge_in.operand_list.back().ToString(); | |
1168 | return true; | |
1169 | } | |
1170 | ||
1171 | const char* Name() const override { return "MockMergeOperator"; } | |
1172 | }; | |
1173 | ||
1174 | TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) { | |
1175 | // This test uses a non-associative merge operator since that is a convenient | |
1176 | // way to get compaction to write out files with overlapping user-keys at the | |
1177 | // endpoints. Note, however, overlapping endpoints can also occur with other | |
1178 | // value types (Put, etc.), assuming the right snapshots are present. | |
1179 | const int kFileBytes = 1 << 20; | |
1180 | const int kValueBytes = 1 << 10; | |
1181 | const int kNumFiles = 4; | |
1182 | ||
1183 | Options options = CurrentOptions(); | |
1184 | options.compression = kNoCompression; | |
1185 | options.disable_auto_compactions = true; | |
1186 | options.merge_operator.reset(new MockMergeOperator()); | |
1187 | options.target_file_size_base = kFileBytes; | |
1188 | Reopen(options); | |
1189 | ||
1190 | // Push dummy data to L3 so that our actual test files on L0-L2 | |
1191 | // will not be considered "bottommost" level, otherwise compaction | |
1192 | // may prevent us from creating overlapping user keys | |
1193 | // as on the bottommost layer MergeHelper | |
1194 | ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy")); | |
1195 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1196 | MoveFilesToLevel(3); | |
1197 | ||
1198 | Random rnd(301); | |
1199 | const Snapshot* snapshot = nullptr; | |
1200 | for (int i = 0; i < kNumFiles; ++i) { | |
1201 | for (int j = 0; j < kFileBytes / kValueBytes; ++j) { | |
20effc67 | 1202 | auto value = rnd.RandomString(kValueBytes); |
494da23a TL |
1203 | ASSERT_OK(db_->Merge(WriteOptions(), "key", value)); |
1204 | } | |
1205 | if (i == kNumFiles - 1) { | |
1206 | // Take snapshot to prevent covered merge operands from being dropped by | |
1207 | // compaction. | |
1208 | snapshot = db_->GetSnapshot(); | |
1209 | // The DeleteRange is the last write so all merge operands are covered. | |
1210 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1211 | "key", "key_")); | |
1212 | } | |
1213 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1214 | } | |
1215 | ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); | |
1216 | std::string value; | |
1217 | ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); | |
1218 | ||
1219 | dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */, | |
1220 | nullptr /* end */, nullptr /* column_family */, | |
1221 | true /* disallow_trivial_move */); | |
1222 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1223 | // Now we have multiple files at L1 all containing a single user key, thus | |
1224 | // guaranteeing overlap in the file endpoints. | |
1225 | ASSERT_GT(NumTableFilesAtLevel(1), 1); | |
1226 | ||
1227 | // Verify no merge operands reappeared after the compaction. | |
1228 | ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); | |
1229 | ||
1230 | // Compact and verify again. It's worthwhile because now the files have | |
1231 | // tighter endpoints, so we can verify that doesn't mess anything up. | |
1232 | dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */, | |
1233 | nullptr /* end */, nullptr /* column_family */, | |
1234 | true /* disallow_trivial_move */); | |
1235 | ASSERT_GT(NumTableFilesAtLevel(2), 1); | |
1236 | ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); | |
1237 | ||
1238 | db_->ReleaseSnapshot(snapshot); | |
1239 | } | |
1240 | ||
1241 | TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) { | |
1242 | // Verify a key newer than a range tombstone cannot be deleted by being | |
1243 | // compacted to the bottom level (and thus having its seqnum zeroed) before | |
1244 | // the range tombstone. This used to happen when range tombstones were | |
1245 | // untruncated on reads such that they extended past their file boundaries. | |
1246 | // | |
1247 | // Test summary: | |
1248 | // | |
1249 | // - L1 is bottommost. | |
1250 | // - A couple snapshots are strategically taken to prevent seqnums from being | |
1251 | // zeroed, range tombstone from being dropped, merge operands from being | |
1252 | // dropped, and merge operands from being combined. | |
1253 | // - Left half of files in L1 all have same user key, ensuring their file | |
1254 | // boundaries overlap. In the past this would cause range tombstones to be | |
1255 | // untruncated. | |
1256 | // - Right half of L1 files all have different keys, ensuring no overlap. | |
1257 | // - A range tombstone spans all L1 keys, so it is stored in every L1 file. | |
1258 | // - Keys in the right side of the key-range are overwritten. These are | |
1259 | // compacted down to L1 after releasing snapshots such that their seqnums | |
1260 | // will be zeroed. | |
1261 | // - A full range scan is performed. If the tombstone in the left L1 files | |
1262 | // were untruncated, it would now cover keys newer than it (but with zeroed | |
1263 | // seqnums) in the right L1 files. | |
1264 | const int kFileBytes = 1 << 20; | |
1265 | const int kValueBytes = 1 << 10; | |
1266 | const int kNumFiles = 4; | |
1267 | const int kMaxKey = kNumFiles* kFileBytes / kValueBytes; | |
1268 | const int kKeysOverwritten = 10; | |
1269 | ||
1270 | Options options = CurrentOptions(); | |
1271 | options.compression = kNoCompression; | |
1272 | options.disable_auto_compactions = true; | |
1273 | options.merge_operator.reset(new MockMergeOperator()); | |
1274 | options.num_levels = 2; | |
1275 | options.target_file_size_base = kFileBytes; | |
1276 | Reopen(options); | |
1277 | ||
1278 | Random rnd(301); | |
1279 | // - snapshots[0] prevents merge operands from being combined during | |
1280 | // compaction. | |
1281 | // - snapshots[1] prevents merge operands from being dropped due to the | |
1282 | // covering range tombstone. | |
1283 | const Snapshot* snapshots[] = {nullptr, nullptr}; | |
1284 | for (int i = 0; i < kNumFiles; ++i) { | |
1285 | for (int j = 0; j < kFileBytes / kValueBytes; ++j) { | |
20effc67 | 1286 | auto value = rnd.RandomString(kValueBytes); |
494da23a TL |
1287 | std::string key; |
1288 | if (i < kNumFiles / 2) { | |
1289 | key = Key(0); | |
1290 | } else { | |
1291 | key = Key(1 + i * kFileBytes / kValueBytes + j); | |
1292 | } | |
1293 | ASSERT_OK(db_->Merge(WriteOptions(), key, value)); | |
1294 | } | |
1295 | if (i == 0) { | |
1296 | snapshots[0] = db_->GetSnapshot(); | |
1297 | } | |
1298 | if (i == kNumFiles - 1) { | |
1299 | snapshots[1] = db_->GetSnapshot(); | |
1300 | // The DeleteRange is the last write so all merge operands are covered. | |
1301 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1302 | Key(0), Key(kMaxKey + 1))); | |
1303 | } | |
1304 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1305 | } | |
1306 | ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); | |
1307 | ||
1308 | auto get_key_count = [this]() -> int { | |
1309 | auto* iter = db_->NewIterator(ReadOptions()); | |
1310 | iter->SeekToFirst(); | |
1311 | int keys_found = 0; | |
1312 | for (; iter->Valid(); iter->Next()) { | |
1313 | ++keys_found; | |
1314 | } | |
1315 | delete iter; | |
1316 | return keys_found; | |
1317 | }; | |
1318 | ||
1319 | // All keys should be covered | |
1320 | ASSERT_EQ(0, get_key_count()); | |
1321 | ||
1322 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, | |
1323 | nullptr /* end_key */)); | |
1324 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1325 | // Roughly the left half of L1 files should have overlapping boundary keys, | |
1326 | // while the right half should not. | |
1327 | ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); | |
1328 | ||
1329 | // Now overwrite a few keys that are in L1 files that definitely don't have | |
1330 | // overlapping boundary keys. | |
1331 | for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) { | |
20effc67 | 1332 | auto value = rnd.RandomString(kValueBytes); |
494da23a TL |
1333 | ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value)); |
1334 | } | |
1335 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1336 | ||
1337 | // The overwritten keys are in L0 now, so clearly aren't covered by the range | |
1338 | // tombstone in L1. | |
1339 | ASSERT_EQ(kKeysOverwritten, get_key_count()); | |
1340 | ||
1341 | // Release snapshots so seqnums can be zeroed when L0->L1 happens. | |
1342 | db_->ReleaseSnapshot(snapshots[0]); | |
1343 | db_->ReleaseSnapshot(snapshots[1]); | |
1344 | ||
1345 | auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1); | |
1346 | auto end_key_storage = Key(kMaxKey); | |
1347 | Slice begin_key(begin_key_storage); | |
1348 | Slice end_key(end_key_storage); | |
1349 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key)); | |
1350 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1351 | ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); | |
1352 | ||
1353 | ASSERT_EQ(kKeysOverwritten, get_key_count()); | |
1354 | } | |
1355 | ||
1356 | TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { | |
1357 | // Exposes a bug where we were using | |
1358 | // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands | |
1359 | // in the forward direction. Confusingly, this case happened during | |
1360 | // `DBIter::Prev`. It could cause assertion failure, or reappearing keys. | |
1361 | const int kFileBytes = 1 << 20; | |
1362 | const int kValueBytes = 1 << 10; | |
1363 | // Need multiple keys so we can get results when calling `Prev()` after | |
1364 | // `SeekToLast()`. | |
1365 | const int kNumKeys = 3; | |
1366 | const int kNumFiles = 4; | |
1367 | ||
1368 | Options options = CurrentOptions(); | |
1369 | options.compression = kNoCompression; | |
1370 | options.disable_auto_compactions = true; | |
1371 | options.merge_operator.reset(new MockMergeOperator()); | |
1372 | options.target_file_size_base = kFileBytes; | |
1373 | Reopen(options); | |
1374 | ||
1375 | Random rnd(301); | |
1376 | const Snapshot* snapshot = nullptr; | |
1377 | for (int i = 0; i < kNumFiles; ++i) { | |
1378 | for (int j = 0; j < kFileBytes / kValueBytes; ++j) { | |
20effc67 | 1379 | auto value = rnd.RandomString(kValueBytes); |
494da23a TL |
1380 | ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value)); |
1381 | if (i == 0 && j == kNumKeys) { | |
1382 | // Take snapshot to prevent covered merge operands from being dropped or | |
1383 | // merged by compaction. | |
1384 | snapshot = db_->GetSnapshot(); | |
1385 | // Do a DeleteRange near the beginning so only the oldest merge operand | |
1386 | // for each key is covered. This ensures the sequence of events: | |
1387 | // | |
1388 | // - `DBIter::Prev()` is called | |
1389 | // - After several same versions of the same user key are encountered, | |
1390 | // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`. | |
1391 | // - Binary searches to the newest version of the key, which is in the | |
1392 | // leftmost file containing the user key. | |
1393 | // - Scans forwards to collect all merge operands. Eventually reaches | |
1394 | // the rightmost file containing the oldest merge operand, which | |
1395 | // should be covered by the `DeleteRange`. If `RangeDelAggregator` | |
1396 | // were not properly using `kForwardTraversal` here, that operand | |
1397 | // would reappear. | |
1398 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1399 | Key(0), Key(kNumKeys + 1))); | |
1400 | } | |
1401 | } | |
1402 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1403 | } | |
1404 | ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); | |
1405 | ||
1406 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, | |
1407 | nullptr /* end_key */)); | |
1408 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1409 | ASSERT_GT(NumTableFilesAtLevel(1), 1); | |
1410 | ||
1411 | auto* iter = db_->NewIterator(ReadOptions()); | |
1412 | iter->SeekToLast(); | |
1413 | int keys_found = 0; | |
1414 | for (; iter->Valid(); iter->Prev()) { | |
1415 | ++keys_found; | |
1416 | } | |
1417 | delete iter; | |
1418 | ASSERT_EQ(kNumKeys, keys_found); | |
1419 | ||
1420 | db_->ReleaseSnapshot(snapshot); | |
1421 | } | |
1422 | ||
1423 | TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { | |
1424 | const int kFileBytes = 1 << 20; | |
1425 | ||
1426 | Options options = CurrentOptions(); | |
1427 | options.compression = kNoCompression; | |
1428 | options.disable_auto_compactions = true; | |
1429 | options.target_file_size_base = kFileBytes; | |
1430 | Reopen(options); | |
1431 | ||
1432 | ASSERT_OK(Put(Key(0), "a")); | |
1433 | const Snapshot* snapshot = db_->GetSnapshot(); | |
1434 | ||
1435 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), | |
1436 | Key(10))); | |
1437 | ||
1438 | db_->Flush(FlushOptions()); | |
1439 | ||
1440 | ReadOptions read_opts; | |
1441 | read_opts.snapshot = snapshot; | |
1442 | auto* iter = db_->NewIterator(read_opts); | |
1443 | ||
1444 | iter->SeekToFirst(); | |
1445 | ASSERT_TRUE(iter->Valid()); | |
1446 | ASSERT_EQ(Key(0), iter->key()); | |
1447 | ||
1448 | iter->Next(); | |
1449 | ASSERT_FALSE(iter->Valid()); | |
1450 | ||
1451 | delete iter; | |
1452 | db_->ReleaseSnapshot(snapshot); | |
1453 | } | |
1454 | ||
f67539c2 TL |
1455 | TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) { |
1456 | const int kFileBytes = 1 << 20; | |
1457 | ||
1458 | Options options = CurrentOptions(); | |
1459 | options.compression = kNoCompression; | |
1460 | options.disable_auto_compactions = true; | |
1461 | options.target_file_size_base = kFileBytes; | |
1462 | Reopen(options); | |
1463 | ||
1464 | // block flush thread -> pin immtables in memory | |
1465 | SyncPoint::GetInstance()->DisableProcessing(); | |
1466 | SyncPoint::GetInstance()->LoadDependency({ | |
1467 | {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator", | |
1468 | "DBImpl::BGWorkFlush"}, | |
1469 | }); | |
1470 | SyncPoint::GetInstance()->EnableProcessing(); | |
1471 | ||
1472 | ASSERT_OK(Put(Key(0), "a")); | |
1473 | std::unique_ptr<const Snapshot, std::function<void(const Snapshot*)>> | |
1474 | snapshot(db_->GetSnapshot(), | |
1475 | [this](const Snapshot* s) { db_->ReleaseSnapshot(s); }); | |
1476 | ||
1477 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), | |
1478 | Key(10))); | |
1479 | ||
1480 | ASSERT_OK(dbfull()->TEST_SwitchMemtable()); | |
1481 | ||
1482 | ReadOptions read_opts; | |
1483 | read_opts.snapshot = snapshot.get(); | |
1484 | std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts)); | |
1485 | ||
1486 | TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator"); | |
1487 | ||
1488 | iter->SeekToFirst(); | |
1489 | ASSERT_TRUE(iter->Valid()); | |
1490 | ASSERT_EQ(Key(0), iter->key()); | |
1491 | ||
1492 | iter->Next(); | |
1493 | ASSERT_FALSE(iter->Valid()); | |
1494 | } | |
1495 | ||
494da23a TL |
1496 | TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) { |
1497 | // Adapted from | |
1498 | // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398. | |
1499 | // Regression test for issue where range tombstone was written to more files | |
1500 | // than necessary when it began exactly at the begin key in the next | |
1501 | // compaction output file. | |
1502 | const int kFileBytes = 1 << 20; | |
1503 | const int kValueBytes = 4 << 10; | |
1504 | Options options = CurrentOptions(); | |
1505 | options.compression = kNoCompression; | |
1506 | options.disable_auto_compactions = true; | |
1507 | // Have a bit of slack in the size limits but we enforce them more strictly | |
1508 | // when manually flushing/compacting. | |
1509 | options.max_compaction_bytes = 2 * kFileBytes; | |
1510 | options.target_file_size_base = 2 * kFileBytes; | |
1511 | options.write_buffer_size = 2 * kFileBytes; | |
1512 | Reopen(options); | |
1513 | ||
1514 | Random rnd(301); | |
1515 | for (char first_char : {'a', 'b', 'c'}) { | |
1516 | for (int i = 0; i < kFileBytes / kValueBytes; ++i) { | |
1517 | std::string key(1, first_char); | |
1518 | key.append(Key(i)); | |
20effc67 | 1519 | std::string value = rnd.RandomString(kValueBytes); |
494da23a TL |
1520 | ASSERT_OK(Put(key, value)); |
1521 | } | |
1522 | db_->Flush(FlushOptions()); | |
1523 | MoveFilesToLevel(2); | |
1524 | } | |
1525 | ASSERT_EQ(0, NumTableFilesAtLevel(0)); | |
1526 | ASSERT_EQ(3, NumTableFilesAtLevel(2)); | |
1527 | ||
1528 | // Populate the memtable lightly while spanning the whole key-space. The | |
1529 | // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple | |
1530 | // files to prevent a large L1->L2 compaction later. | |
1531 | ASSERT_OK(Put("a", "val")); | |
1532 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), | |
1533 | "c" + Key(1), "d")); | |
1534 | // Our compaction output file cutting logic currently only considers point | |
1535 | // keys. So, in order for the range tombstone to have a chance at landing at | |
1536 | // the start of a new file, we need a point key at the range tombstone's | |
1537 | // start. | |
1538 | // TODO(ajkr): remove this `Put` after file cutting accounts for range | |
1539 | // tombstones (#3977). | |
1540 | ASSERT_OK(Put("c" + Key(1), "value")); | |
1541 | db_->Flush(FlushOptions()); | |
1542 | ||
1543 | // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone | |
1544 | // and the range tombstone is only placed in the second SST. | |
1545 | std::string begin_key_storage("c" + Key(1)); | |
1546 | Slice begin_key(begin_key_storage); | |
1547 | std::string end_key_storage("d"); | |
1548 | Slice end_key(end_key_storage); | |
1549 | dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */, | |
1550 | &end_key /* end */, nullptr /* column_family */, | |
1551 | true /* disallow_trivial_move */); | |
1552 | ASSERT_EQ(2, NumTableFilesAtLevel(1)); | |
1553 | ||
1554 | std::vector<LiveFileMetaData> all_metadata; | |
1555 | std::vector<LiveFileMetaData> l1_metadata; | |
1556 | db_->GetLiveFilesMetaData(&all_metadata); | |
1557 | for (const auto& metadata : all_metadata) { | |
1558 | if (metadata.level == 1) { | |
1559 | l1_metadata.push_back(metadata); | |
1560 | } | |
1561 | } | |
1562 | std::sort(l1_metadata.begin(), l1_metadata.end(), | |
1563 | [&](const LiveFileMetaData& a, const LiveFileMetaData& b) { | |
1564 | return options.comparator->Compare(a.smallestkey, b.smallestkey) < | |
1565 | 0; | |
1566 | }); | |
1567 | ASSERT_EQ("a", l1_metadata[0].smallestkey); | |
1568 | ASSERT_EQ("a", l1_metadata[0].largestkey); | |
1569 | ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey); | |
1570 | ASSERT_EQ("d", l1_metadata[1].largestkey); | |
1571 | ||
1572 | TablePropertiesCollection all_table_props; | |
1573 | ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props)); | |
1574 | int64_t num_range_deletions = 0; | |
1575 | for (const auto& name_and_table_props : all_table_props) { | |
1576 | const auto& name = name_and_table_props.first; | |
1577 | const auto& table_props = name_and_table_props.second; | |
1578 | // The range tombstone should only be output to the second L1 SST. | |
1579 | if (name.size() >= l1_metadata[1].name.size() && | |
1580 | name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) { | |
1581 | ASSERT_EQ(1, table_props->num_range_deletions); | |
1582 | ++num_range_deletions; | |
1583 | } else { | |
1584 | ASSERT_EQ(0, table_props->num_range_deletions); | |
1585 | } | |
1586 | } | |
1587 | ASSERT_EQ(1, num_range_deletions); | |
1588 | } | |
1589 | ||
f67539c2 TL |
1590 | TEST_F(DBRangeDelTest, OverlappedTombstones) { |
1591 | const int kNumPerFile = 4, kNumFiles = 2; | |
1592 | Options options = CurrentOptions(); | |
1593 | options.disable_auto_compactions = true; | |
1594 | options.max_compaction_bytes = 9 * 1024; | |
1595 | DestroyAndReopen(options); | |
1596 | Random rnd(301); | |
1597 | for (int i = 0; i < kNumFiles; ++i) { | |
1598 | std::vector<std::string> values; | |
1599 | // Write 12K (4 values, each 3K) | |
1600 | for (int j = 0; j < kNumPerFile; j++) { | |
20effc67 | 1601 | values.push_back(rnd.RandomString(3 << 10)); |
f67539c2 TL |
1602 | ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); |
1603 | } | |
1604 | } | |
1605 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1606 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1607 | MoveFilesToLevel(2); | |
1608 | ASSERT_EQ(2, NumTableFilesAtLevel(2)); | |
1609 | ||
1610 | ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), | |
1611 | Key((kNumFiles)*kNumPerFile + 1))); | |
1612 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1613 | ||
1614 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1615 | ||
1616 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
1617 | true /* disallow_trivial_move */); | |
1618 | ||
1619 | // The tombstone range is not broken up into multiple SSTs which may incur a | |
1620 | // large compaction with L2. | |
1621 | ASSERT_EQ(1, NumTableFilesAtLevel(1)); | |
1622 | std::vector<std::vector<FileMetaData>> files; | |
1623 | dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, | |
1624 | true /* disallow_trivial_move */); | |
1625 | ASSERT_EQ(1, NumTableFilesAtLevel(2)); | |
1626 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
1627 | } | |
1628 | ||
1629 | TEST_F(DBRangeDelTest, OverlappedKeys) { | |
1630 | const int kNumPerFile = 4, kNumFiles = 2; | |
1631 | Options options = CurrentOptions(); | |
1632 | options.disable_auto_compactions = true; | |
1633 | options.max_compaction_bytes = 9 * 1024; | |
1634 | DestroyAndReopen(options); | |
1635 | Random rnd(301); | |
1636 | for (int i = 0; i < kNumFiles; ++i) { | |
1637 | std::vector<std::string> values; | |
1638 | // Write 12K (4 values, each 3K) | |
1639 | for (int j = 0; j < kNumPerFile; j++) { | |
20effc67 | 1640 | values.push_back(rnd.RandomString(3 << 10)); |
f67539c2 TL |
1641 | ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); |
1642 | } | |
1643 | } | |
1644 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1645 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1646 | MoveFilesToLevel(2); | |
1647 | ASSERT_EQ(2, NumTableFilesAtLevel(2)); | |
1648 | ||
1649 | for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) { | |
1650 | ASSERT_OK(Put(Key(i), "0x123")); | |
1651 | } | |
1652 | ASSERT_OK(db_->Flush(FlushOptions())); | |
1653 | ASSERT_EQ(1, NumTableFilesAtLevel(0)); | |
1654 | ||
1655 | // The key range is broken up into three SSTs to avoid a future big compaction | |
1656 | // with the grandparent | |
1657 | dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, | |
1658 | true /* disallow_trivial_move */); | |
1659 | ASSERT_EQ(3, NumTableFilesAtLevel(1)); | |
1660 | ||
1661 | std::vector<std::vector<FileMetaData>> files; | |
1662 | dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, | |
1663 | true /* disallow_trivial_move */); | |
1664 | ASSERT_EQ(1, NumTableFilesAtLevel(2)); | |
1665 | ASSERT_EQ(0, NumTableFilesAtLevel(1)); | |
1666 | } | |
1667 | ||
7c673cae FG |
1668 | #endif // ROCKSDB_LITE |
1669 | ||
f67539c2 | 1670 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
1671 | |
1672 | int main(int argc, char** argv) { | |
f67539c2 | 1673 | ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); |
7c673cae FG |
1674 | ::testing::InitGoogleTest(&argc, argv); |
1675 | return RUN_ALL_TESTS(); | |
1676 | } |