]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/version_set_test.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / db / version_set_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "db/version_set.h"
20effc67 11
1e59de90
TL
12#include <algorithm>
13
f67539c2 14#include "db/db_impl/db_impl.h"
1e59de90 15#include "db/db_test_util.h"
11fdf7f2 16#include "db/log_writer.h"
1e59de90
TL
17#include "rocksdb/advanced_options.h"
18#include "rocksdb/convenience.h"
19#include "rocksdb/file_system.h"
20effc67 20#include "table/block_based/block_based_table_factory.h"
11fdf7f2 21#include "table/mock_table.h"
1e59de90 22#include "table/unique_id_impl.h"
f67539c2
TL
23#include "test_util/testharness.h"
24#include "test_util/testutil.h"
11fdf7f2 25#include "util/string_util.h"
7c673cae 26
f67539c2 27namespace ROCKSDB_NAMESPACE {
7c673cae
FG
28
29class GenerateLevelFilesBriefTest : public testing::Test {
30 public:
31 std::vector<FileMetaData*> files_;
32 LevelFilesBrief file_level_;
33 Arena arena_;
34
1e59de90 35 GenerateLevelFilesBriefTest() {}
7c673cae 36
494da23a 37 ~GenerateLevelFilesBriefTest() override {
7c673cae
FG
38 for (size_t i = 0; i < files_.size(); i++) {
39 delete files_[i];
40 }
41 }
42
43 void Add(const char* smallest, const char* largest,
44 SequenceNumber smallest_seq = 100,
45 SequenceNumber largest_seq = 100) {
f67539c2
TL
46 FileMetaData* f = new FileMetaData(
47 files_.size() + 1, 0, 0,
48 InternalKey(smallest, smallest_seq, kTypeValue),
49 InternalKey(largest, largest_seq, kTypeValue), smallest_seq,
1e59de90
TL
50 largest_seq, /* marked_for_compact */ false, Temperature::kUnknown,
51 kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
52 kUnknownFileCreationTime, kUnknownFileChecksum,
53 kUnknownFileChecksumFuncName, kNullUniqueId64x2);
7c673cae
FG
54 files_.push_back(f);
55 }
56
57 int Compare() {
58 int diff = 0;
59 for (size_t i = 0; i < files_.size(); i++) {
60 if (file_level_.files[i].fd.GetNumber() != files_[i]->fd.GetNumber()) {
61 diff++;
62 }
63 }
64 return diff;
65 }
66};
67
68TEST_F(GenerateLevelFilesBriefTest, Empty) {
69 DoGenerateLevelFilesBrief(&file_level_, files_, &arena_);
70 ASSERT_EQ(0u, file_level_.num_files);
71 ASSERT_EQ(0, Compare());
72}
73
74TEST_F(GenerateLevelFilesBriefTest, Single) {
75 Add("p", "q");
76 DoGenerateLevelFilesBrief(&file_level_, files_, &arena_);
77 ASSERT_EQ(1u, file_level_.num_files);
78 ASSERT_EQ(0, Compare());
79}
80
81TEST_F(GenerateLevelFilesBriefTest, Multiple) {
82 Add("150", "200");
83 Add("200", "250");
84 Add("300", "350");
85 Add("400", "450");
86 DoGenerateLevelFilesBrief(&file_level_, files_, &arena_);
87 ASSERT_EQ(4u, file_level_.num_files);
88 ASSERT_EQ(0, Compare());
89}
90
91class CountingLogger : public Logger {
92 public:
93 CountingLogger() : log_count(0) {}
94 using Logger::Logv;
494da23a 95 void Logv(const char* /*format*/, va_list /*ap*/) override { log_count++; }
7c673cae
FG
96 int log_count;
97};
98
99Options GetOptionsWithNumLevels(int num_levels,
100 std::shared_ptr<CountingLogger> logger) {
101 Options opt;
102 opt.num_levels = num_levels;
103 opt.info_log = logger;
104 return opt;
105}
106
20effc67 107class VersionStorageInfoTestBase : public testing::Test {
7c673cae
FG
108 public:
109 const Comparator* ucmp_;
110 InternalKeyComparator icmp_;
111 std::shared_ptr<CountingLogger> logger_;
112 Options options_;
1e59de90 113 ImmutableOptions ioptions_;
7c673cae
FG
114 MutableCFOptions mutable_cf_options_;
115 VersionStorageInfo vstorage_;
116
117 InternalKey GetInternalKey(const char* ukey,
118 SequenceNumber smallest_seq = 100) {
119 return InternalKey(ukey, smallest_seq, kTypeValue);
120 }
121
20effc67
TL
122 explicit VersionStorageInfoTestBase(const Comparator* ucmp)
123 : ucmp_(ucmp),
7c673cae
FG
124 icmp_(ucmp_),
125 logger_(new CountingLogger()),
126 options_(GetOptionsWithNumLevels(6, logger_)),
127 ioptions_(options_),
128 mutable_cf_options_(options_),
20effc67
TL
129 vstorage_(&icmp_, ucmp_, 6, kCompactionStyleLevel,
130 /*src_vstorage=*/nullptr,
131 /*_force_consistency_checks=*/false) {}
7c673cae 132
20effc67
TL
133 ~VersionStorageInfoTestBase() override {
134 for (int i = 0; i < vstorage_.num_levels(); ++i) {
7c673cae
FG
135 for (auto* f : vstorage_.LevelFiles(i)) {
136 if (--f->refs == 0) {
137 delete f;
138 }
139 }
140 }
141 }
142
143 void Add(int level, uint32_t file_number, const char* smallest,
1e59de90
TL
144 const char* largest, uint64_t file_size = 0,
145 uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
146 constexpr SequenceNumber dummy_seq = 0;
147
148 Add(level, file_number, GetInternalKey(smallest, dummy_seq),
149 GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number);
7c673cae 150 }
11fdf7f2
TL
151
152 void Add(int level, uint32_t file_number, const InternalKey& smallest,
1e59de90
TL
153 const InternalKey& largest, uint64_t file_size = 0,
154 uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
11fdf7f2 155 assert(level < vstorage_.num_levels());
f67539c2
TL
156 FileMetaData* f = new FileMetaData(
157 file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0,
158 /* largest_seq */ 0, /* marked_for_compact */ false,
1e59de90
TL
159 Temperature::kUnknown, oldest_blob_file_number,
160 kUnknownOldestAncesterTime, kUnknownFileCreationTime,
161 kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
11fdf7f2 162 f->compensated_file_size = file_size;
11fdf7f2
TL
163 vstorage_.AddFile(level, f);
164 }
165
1e59de90
TL
166 void AddBlob(uint64_t blob_file_number, uint64_t total_blob_count,
167 uint64_t total_blob_bytes,
168 BlobFileMetaData::LinkedSsts linked_ssts,
169 uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) {
170 auto shared_meta = SharedBlobFileMetaData::Create(
171 blob_file_number, total_blob_count, total_blob_bytes,
172 /* checksum_method */ std::string(),
173 /* checksum_value */ std::string());
174 auto meta =
175 BlobFileMetaData::Create(std::move(shared_meta), std::move(linked_ssts),
176 garbage_blob_count, garbage_blob_bytes);
177
178 vstorage_.AddBlobFile(std::move(meta));
179 }
180
181 void UpdateVersionStorageInfo() {
182 vstorage_.PrepareForVersionAppend(ioptions_, mutable_cf_options_);
183 vstorage_.SetFinalized();
184 }
185
11fdf7f2
TL
186 std::string GetOverlappingFiles(int level, const InternalKey& begin,
187 const InternalKey& end) {
188 std::vector<FileMetaData*> inputs;
189 vstorage_.GetOverlappingInputs(level, &begin, &end, &inputs);
190
191 std::string result;
192 for (size_t i = 0; i < inputs.size(); ++i) {
193 if (i > 0) {
194 result += ",";
195 }
196 AppendNumberTo(&result, inputs[i]->fd.GetNumber());
197 }
198 return result;
199 }
7c673cae
FG
200};
201
20effc67
TL
202class VersionStorageInfoTest : public VersionStorageInfoTestBase {
203 public:
204 VersionStorageInfoTest() : VersionStorageInfoTestBase(BytewiseComparator()) {}
205
206 ~VersionStorageInfoTest() override {}
207};
208
7c673cae
FG
209TEST_F(VersionStorageInfoTest, MaxBytesForLevelStatic) {
210 ioptions_.level_compaction_dynamic_level_bytes = false;
211 mutable_cf_options_.max_bytes_for_level_base = 10;
212 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
7c673cae 213
1e59de90
TL
214 Add(4, 100U, "1", "2", 100U);
215 Add(5, 101U, "1", "2", 100U);
216
217 UpdateVersionStorageInfo();
218
7c673cae
FG
219 ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 10U);
220 ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 50U);
221 ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 250U);
222 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1250U);
223
224 ASSERT_EQ(0, logger_->log_count);
225}
226
1e59de90 227TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_1) {
7c673cae
FG
228 ioptions_.level_compaction_dynamic_level_bytes = true;
229 mutable_cf_options_.max_bytes_for_level_base = 1000;
230 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
1e59de90 231
7c673cae
FG
232 Add(5, 1U, "1", "2", 500U);
233
1e59de90
TL
234 UpdateVersionStorageInfo();
235
7c673cae
FG
236 ASSERT_EQ(0, logger_->log_count);
237 ASSERT_EQ(vstorage_.base_level(), 5);
1e59de90
TL
238}
239
240TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_2) {
241 ioptions_.level_compaction_dynamic_level_bytes = true;
242 mutable_cf_options_.max_bytes_for_level_base = 1000;
243 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
7c673cae 244
1e59de90 245 Add(5, 1U, "1", "2", 500U);
7c673cae 246 Add(5, 2U, "3", "4", 550U);
1e59de90
TL
247
248 UpdateVersionStorageInfo();
249
7c673cae 250 ASSERT_EQ(0, logger_->log_count);
11fdf7f2 251 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U);
7c673cae 252 ASSERT_EQ(vstorage_.base_level(), 4);
1e59de90
TL
253}
254
255TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_3) {
256 ioptions_.level_compaction_dynamic_level_bytes = true;
257 mutable_cf_options_.max_bytes_for_level_base = 1000;
258 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
7c673cae 259
1e59de90
TL
260 Add(5, 1U, "1", "2", 500U);
261 Add(5, 2U, "3", "4", 550U);
7c673cae 262 Add(4, 3U, "3", "4", 550U);
1e59de90
TL
263
264 UpdateVersionStorageInfo();
265
7c673cae 266 ASSERT_EQ(0, logger_->log_count);
11fdf7f2 267 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U);
7c673cae 268 ASSERT_EQ(vstorage_.base_level(), 4);
1e59de90 269}
7c673cae 270
1e59de90
TL
271TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_4) {
272 ioptions_.level_compaction_dynamic_level_bytes = true;
273 mutable_cf_options_.max_bytes_for_level_base = 1000;
274 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
275
276 Add(5, 1U, "1", "2", 500U);
277 Add(5, 2U, "3", "4", 550U);
278 Add(4, 3U, "3", "4", 550U);
7c673cae
FG
279 Add(3, 4U, "3", "4", 250U);
280 Add(3, 5U, "5", "7", 300U);
1e59de90
TL
281
282 UpdateVersionStorageInfo();
283
7c673cae
FG
284 ASSERT_EQ(1, logger_->log_count);
285 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1005U);
11fdf7f2 286 ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 1000U);
7c673cae 287 ASSERT_EQ(vstorage_.base_level(), 3);
1e59de90 288}
7c673cae 289
1e59de90
TL
290TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_5) {
291 ioptions_.level_compaction_dynamic_level_bytes = true;
292 mutable_cf_options_.max_bytes_for_level_base = 1000;
293 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
294
295 Add(5, 1U, "1", "2", 500U);
296 Add(5, 2U, "3", "4", 550U);
297 Add(4, 3U, "3", "4", 550U);
298 Add(3, 4U, "3", "4", 250U);
299 Add(3, 5U, "5", "7", 300U);
7c673cae
FG
300 Add(1, 6U, "3", "4", 5U);
301 Add(1, 7U, "8", "9", 5U);
1e59de90
TL
302
303 UpdateVersionStorageInfo();
304
7c673cae
FG
305 ASSERT_EQ(1, logger_->log_count);
306 ASSERT_GT(vstorage_.MaxBytesForLevel(4), 1005U);
307 ASSERT_GT(vstorage_.MaxBytesForLevel(3), 1005U);
308 ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 1005U);
11fdf7f2 309 ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 1000U);
7c673cae
FG
310 ASSERT_EQ(vstorage_.base_level(), 1);
311}
312
313TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLotsOfData) {
314 ioptions_.level_compaction_dynamic_level_bytes = true;
315 mutable_cf_options_.max_bytes_for_level_base = 100;
316 mutable_cf_options_.max_bytes_for_level_multiplier = 2;
1e59de90 317
7c673cae
FG
318 Add(0, 1U, "1", "2", 50U);
319 Add(1, 2U, "1", "2", 50U);
320 Add(2, 3U, "1", "2", 500U);
321 Add(3, 4U, "1", "2", 500U);
322 Add(4, 5U, "1", "2", 1700U);
323 Add(5, 6U, "1", "2", 500U);
324
1e59de90
TL
325 UpdateVersionStorageInfo();
326
7c673cae
FG
327 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 800U);
328 ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 400U);
329 ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 200U);
330 ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 100U);
331 ASSERT_EQ(vstorage_.base_level(), 1);
332 ASSERT_EQ(0, logger_->log_count);
333}
334
335TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLargeLevel) {
336 uint64_t kOneGB = 1000U * 1000U * 1000U;
337 ioptions_.level_compaction_dynamic_level_bytes = true;
338 mutable_cf_options_.max_bytes_for_level_base = 10U * kOneGB;
339 mutable_cf_options_.max_bytes_for_level_multiplier = 10;
1e59de90 340
7c673cae
FG
341 Add(0, 1U, "1", "2", 50U);
342 Add(3, 4U, "1", "2", 32U * kOneGB);
343 Add(4, 5U, "1", "2", 500U * kOneGB);
344 Add(5, 6U, "1", "2", 3000U * kOneGB);
345
1e59de90
TL
346 UpdateVersionStorageInfo();
347
7c673cae
FG
348 ASSERT_EQ(vstorage_.MaxBytesForLevel(5), 3000U * kOneGB);
349 ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 300U * kOneGB);
350 ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 30U * kOneGB);
11fdf7f2 351 ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 10U * kOneGB);
7c673cae
FG
352 ASSERT_EQ(vstorage_.base_level(), 2);
353 ASSERT_EQ(0, logger_->log_count);
354}
355
494da23a
TL
356TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_1) {
357 ioptions_.level_compaction_dynamic_level_bytes = true;
358 mutable_cf_options_.max_bytes_for_level_base = 40000;
359 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
360 mutable_cf_options_.level0_file_num_compaction_trigger = 2;
361
362 Add(0, 1U, "1", "2", 10000U);
363 Add(0, 2U, "1", "2", 10000U);
364 Add(0, 3U, "1", "2", 10000U);
365
366 Add(5, 4U, "1", "2", 1286250U);
367 Add(4, 5U, "1", "2", 200000U);
368 Add(3, 6U, "1", "2", 40000U);
369 Add(2, 7U, "1", "2", 8000U);
370
1e59de90
TL
371 UpdateVersionStorageInfo();
372
494da23a
TL
373 ASSERT_EQ(0, logger_->log_count);
374 ASSERT_EQ(2, vstorage_.base_level());
375 // level multiplier should be 3.5
376 ASSERT_EQ(vstorage_.level_multiplier(), 5.0);
494da23a
TL
377 ASSERT_EQ(40000U, vstorage_.MaxBytesForLevel(2));
378 ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3));
379 ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4));
1e59de90
TL
380
381 vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
382 // Only L0 hits compaction.
383 ASSERT_EQ(vstorage_.CompactionScoreLevel(0), 0);
494da23a
TL
384}
385
386TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_2) {
387 ioptions_.level_compaction_dynamic_level_bytes = true;
388 mutable_cf_options_.max_bytes_for_level_base = 10000;
389 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
1e59de90 390 mutable_cf_options_.level0_file_num_compaction_trigger = 4;
494da23a
TL
391
392 Add(0, 11U, "1", "2", 10000U);
393 Add(0, 12U, "1", "2", 10000U);
394 Add(0, 13U, "1", "2", 10000U);
395
1e59de90 396 // Level size should be around 10,000, 10,290, 51,450, 257,250
494da23a 397 Add(5, 4U, "1", "2", 1286250U);
1e59de90
TL
398 Add(4, 5U, "1", "2", 258000U); // unadjusted score 1.003
399 Add(3, 6U, "1", "2", 53000U); // unadjusted score 1.03
400 Add(2, 7U, "1", "2", 20000U); // unadjusted score 1.94
401
402 UpdateVersionStorageInfo();
494da23a 403
494da23a 404 ASSERT_EQ(0, logger_->log_count);
1e59de90
TL
405 ASSERT_EQ(1, vstorage_.base_level());
406 ASSERT_EQ(10000U, vstorage_.MaxBytesForLevel(1));
407 ASSERT_EQ(10290U, vstorage_.MaxBytesForLevel(2));
408 ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3));
409 ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4));
410
411 vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
412 // Although L2 and l3 have higher unadjusted compaction score, considering
413 // a relatively large L0 being compacted down soon, L4 is picked up for
414 // compaction.
415 // L0 is still picked up for oversizing.
416 ASSERT_EQ(0, vstorage_.CompactionScoreLevel(0));
417 ASSERT_EQ(4, vstorage_.CompactionScoreLevel(1));
494da23a
TL
418}
419
420TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_3) {
421 ioptions_.level_compaction_dynamic_level_bytes = true;
1e59de90 422 mutable_cf_options_.max_bytes_for_level_base = 20000;
494da23a 423 mutable_cf_options_.max_bytes_for_level_multiplier = 5;
1e59de90 424 mutable_cf_options_.level0_file_num_compaction_trigger = 5;
494da23a 425
1e59de90
TL
426 Add(0, 11U, "1", "2", 2500U);
427 Add(0, 12U, "1", "2", 2500U);
428 Add(0, 13U, "1", "2", 2500U);
429 Add(0, 14U, "1", "2", 2500U);
494da23a 430
1e59de90 431 // Level size should be around 20,000, 53000, 258000
494da23a 432 Add(5, 4U, "1", "2", 1286250U);
1e59de90
TL
433 Add(4, 5U, "1", "2", 260000U); // Unadjusted score 1.01, adjusted about 4.3
434 Add(3, 6U, "1", "2", 85000U); // Unadjusted score 1.42, adjusted about 11.6
435 Add(2, 7U, "1", "2", 30000); // Unadjusted score 1.5, adjusted about 10.0
436
437 UpdateVersionStorageInfo();
494da23a 438
494da23a
TL
439 ASSERT_EQ(0, logger_->log_count);
440 ASSERT_EQ(2, vstorage_.base_level());
1e59de90
TL
441 ASSERT_EQ(20000U, vstorage_.MaxBytesForLevel(2));
442
443 vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
444 // Although L2 has higher unadjusted compaction score, considering
445 // a relatively large L0 being compacted down soon, L3 is picked up for
446 // compaction.
447
448 ASSERT_EQ(3, vstorage_.CompactionScoreLevel(0));
449 ASSERT_EQ(2, vstorage_.CompactionScoreLevel(1));
450 ASSERT_EQ(4, vstorage_.CompactionScoreLevel(2));
494da23a
TL
451}
452
7c673cae
FG
453TEST_F(VersionStorageInfoTest, EstimateLiveDataSize) {
454 // Test whether the overlaps are detected as expected
455 Add(1, 1U, "4", "7", 1U); // Perfect overlap with last level
456 Add(2, 2U, "3", "5", 1U); // Partial overlap with last level
457 Add(2, 3U, "6", "8", 1U); // Partial overlap with last level
458 Add(3, 4U, "1", "9", 1U); // Contains range of last level
459 Add(4, 5U, "4", "5", 1U); // Inside range of last level
20effc67
TL
460 Add(4, 6U, "6", "7", 1U); // Inside range of last level
461 Add(5, 7U, "4", "7", 10U);
1e59de90
TL
462
463 UpdateVersionStorageInfo();
464
7c673cae
FG
465 ASSERT_EQ(10U, vstorage_.EstimateLiveDataSize());
466}
467
468TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) {
469 Add(0, 1U, "9", "9", 1U); // Level 0 is not ordered
20effc67
TL
470 Add(0, 2U, "5", "6", 1U); // Ignored because of [5,6] in l1
471 Add(1, 3U, "1", "2", 1U); // Ignored because of [2,3] in l2
472 Add(1, 4U, "3", "4", 1U); // Ignored because of [2,3] in l2
473 Add(1, 5U, "5", "6", 1U);
474 Add(2, 6U, "2", "3", 1U);
475 Add(3, 7U, "7", "8", 1U);
1e59de90
TL
476
477 UpdateVersionStorageInfo();
478
7c673cae
FG
479 ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize());
480}
481
11fdf7f2
TL
482TEST_F(VersionStorageInfoTest, GetOverlappingInputs) {
483 // Two files that overlap at the range deletion tombstone sentinel.
1e59de90
TL
484 Add(1, 1U, {"a", 0, kTypeValue},
485 {"b", kMaxSequenceNumber, kTypeRangeDeletion}, 1);
11fdf7f2
TL
486 Add(1, 2U, {"b", 0, kTypeValue}, {"c", 0, kTypeValue}, 1);
487 // Two files that overlap at the same user key.
488 Add(1, 3U, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeValue}, 1);
489 Add(1, 4U, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}, 1);
490 // Two files that do not overlap.
491 Add(1, 5U, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}, 1);
492 Add(1, 6U, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}, 1);
1e59de90
TL
493
494 UpdateVersionStorageInfo();
495
496 ASSERT_EQ("1,2",
497 GetOverlappingFiles(1, {"a", 0, kTypeValue}, {"b", 0, kTypeValue}));
498 ASSERT_EQ("1",
499 GetOverlappingFiles(1, {"a", 0, kTypeValue},
500 {"b", kMaxSequenceNumber, kTypeRangeDeletion}));
501 ASSERT_EQ("2", GetOverlappingFiles(1, {"b", kMaxSequenceNumber, kTypeValue},
502 {"c", 0, kTypeValue}));
503 ASSERT_EQ("3,4",
504 GetOverlappingFiles(1, {"d", 0, kTypeValue}, {"e", 0, kTypeValue}));
505 ASSERT_EQ("3",
506 GetOverlappingFiles(1, {"d", 0, kTypeValue},
507 {"e", kMaxSequenceNumber, kTypeRangeDeletion}));
508 ASSERT_EQ("3,4", GetOverlappingFiles(1, {"e", kMaxSequenceNumber, kTypeValue},
509 {"f", 0, kTypeValue}));
510 ASSERT_EQ("3,4",
511 GetOverlappingFiles(1, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}));
512 ASSERT_EQ("5",
513 GetOverlappingFiles(1, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}));
514 ASSERT_EQ("6",
515 GetOverlappingFiles(1, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}));
11fdf7f2
TL
516}
517
20effc67
TL
518TEST_F(VersionStorageInfoTest, FileLocationAndMetaDataByNumber) {
519 Add(0, 11U, "1", "2", 5000U);
520 Add(0, 12U, "1", "2", 5000U);
521
522 Add(2, 7U, "1", "2", 8000U);
523
1e59de90
TL
524 UpdateVersionStorageInfo();
525
20effc67
TL
526 ASSERT_EQ(vstorage_.GetFileLocation(11U),
527 VersionStorageInfo::FileLocation(0, 0));
528 ASSERT_NE(vstorage_.GetFileMetaDataByNumber(11U), nullptr);
529
530 ASSERT_EQ(vstorage_.GetFileLocation(12U),
531 VersionStorageInfo::FileLocation(0, 1));
532 ASSERT_NE(vstorage_.GetFileMetaDataByNumber(12U), nullptr);
533
534 ASSERT_EQ(vstorage_.GetFileLocation(7U),
535 VersionStorageInfo::FileLocation(2, 0));
536 ASSERT_NE(vstorage_.GetFileMetaDataByNumber(7U), nullptr);
537
538 ASSERT_FALSE(vstorage_.GetFileLocation(999U).IsValid());
539 ASSERT_EQ(vstorage_.GetFileMetaDataByNumber(999U), nullptr);
540}
541
1e59de90
TL
542TEST_F(VersionStorageInfoTest, ForcedBlobGCEmpty) {
543 // No SST or blob files in VersionStorageInfo
544 UpdateVersionStorageInfo();
545
546 constexpr double age_cutoff = 0.5;
547 constexpr double force_threshold = 0.75;
548 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
549
550 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
551}
552
553TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) {
554 // Test the edge case when all blob files are part of the oldest batch.
555 // We have one L0 SST file #1, and four blob files #10, #11, #12, and #13.
556 // The oldest blob file used by SST #1 is blob file #10.
557
558 constexpr int level = 0;
559
560 constexpr uint64_t sst = 1;
561
562 constexpr uint64_t first_blob = 10;
563 constexpr uint64_t second_blob = 11;
564 constexpr uint64_t third_blob = 12;
565 constexpr uint64_t fourth_blob = 13;
566
567 {
568 constexpr char smallest[] = "bar1";
569 constexpr char largest[] = "foo1";
570 constexpr uint64_t file_size = 1000;
571
572 Add(level, sst, smallest, largest, file_size, first_blob);
573 }
574
575 {
576 constexpr uint64_t total_blob_count = 10;
577 constexpr uint64_t total_blob_bytes = 100000;
578 constexpr uint64_t garbage_blob_count = 2;
579 constexpr uint64_t garbage_blob_bytes = 15000;
580
581 AddBlob(first_blob, total_blob_count, total_blob_bytes,
582 BlobFileMetaData::LinkedSsts{sst}, garbage_blob_count,
583 garbage_blob_bytes);
584 }
585
586 {
587 constexpr uint64_t total_blob_count = 4;
588 constexpr uint64_t total_blob_bytes = 400000;
589 constexpr uint64_t garbage_blob_count = 3;
590 constexpr uint64_t garbage_blob_bytes = 235000;
591
592 AddBlob(second_blob, total_blob_count, total_blob_bytes,
593 BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
594 garbage_blob_bytes);
595 }
596
597 {
598 constexpr uint64_t total_blob_count = 20;
599 constexpr uint64_t total_blob_bytes = 1000000;
600 constexpr uint64_t garbage_blob_count = 8;
601 constexpr uint64_t garbage_blob_bytes = 400000;
602
603 AddBlob(third_blob, total_blob_count, total_blob_bytes,
604 BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
605 garbage_blob_bytes);
606 }
607
608 {
609 constexpr uint64_t total_blob_count = 128;
610 constexpr uint64_t total_blob_bytes = 1000000;
611 constexpr uint64_t garbage_blob_count = 67;
612 constexpr uint64_t garbage_blob_bytes = 600000;
613
614 AddBlob(fourth_blob, total_blob_count, total_blob_bytes,
615 BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
616 garbage_blob_bytes);
617 }
618
619 UpdateVersionStorageInfo();
620
621 assert(vstorage_.num_levels() > 0);
622 const auto& level_files = vstorage_.LevelFiles(level);
623
624 assert(level_files.size() == 1);
625 assert(level_files[0] && level_files[0]->fd.GetNumber() == sst);
626
627 // No blob files eligible for GC due to the age cutoff
628
629 {
630 constexpr double age_cutoff = 0.1;
631 constexpr double force_threshold = 0.0;
632 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
633
634 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
635 }
636
637 // Part of the oldest batch of blob files (specifically, #12 and #13) is
638 // ineligible for GC due to the age cutoff
639
640 {
641 constexpr double age_cutoff = 0.5;
642 constexpr double force_threshold = 0.0;
643 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
644
645 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
646 }
647
648 // Oldest batch is eligible based on age cutoff but its overall garbage ratio
649 // is below threshold
650
651 {
652 constexpr double age_cutoff = 1.0;
653 constexpr double force_threshold = 0.6;
654 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
655
656 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
657 }
658
659 // Oldest batch is eligible based on age cutoff and its overall garbage ratio
660 // meets threshold
661
662 {
663 constexpr double age_cutoff = 1.0;
664 constexpr double force_threshold = 0.5;
665 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
666
667 auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
668 ASSERT_EQ(ssts_to_be_compacted.size(), 1);
669
670 const autovector<std::pair<int, FileMetaData*>>
671 expected_ssts_to_be_compacted{{level, level_files[0]}};
672
673 ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
674 }
675}
676
677TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) {
678 // Add three L0 SSTs (1, 2, and 3) and four blob files (10, 11, 12, and 13).
679 // The first two SSTs have the same oldest blob file, namely, the very oldest
680 // one (10), while the third SST's oldest blob file reference points to the
681 // third blob file (12). Thus, the oldest batch of blob files contains the
682 // first two blob files 10 and 11, and assuming they are eligible for GC based
683 // on the age cutoff, compacting away the SSTs 1 and 2 will eliminate them.
684
685 constexpr int level = 0;
686
687 constexpr uint64_t first_sst = 1;
688 constexpr uint64_t second_sst = 2;
689 constexpr uint64_t third_sst = 3;
690
691 constexpr uint64_t first_blob = 10;
692 constexpr uint64_t second_blob = 11;
693 constexpr uint64_t third_blob = 12;
694 constexpr uint64_t fourth_blob = 13;
695
696 {
697 constexpr char smallest[] = "bar1";
698 constexpr char largest[] = "foo1";
699 constexpr uint64_t file_size = 1000;
700
701 Add(level, first_sst, smallest, largest, file_size, first_blob);
702 }
703
704 {
705 constexpr char smallest[] = "bar2";
706 constexpr char largest[] = "foo2";
707 constexpr uint64_t file_size = 2000;
708
709 Add(level, second_sst, smallest, largest, file_size, first_blob);
710 }
711
712 {
713 constexpr char smallest[] = "bar3";
714 constexpr char largest[] = "foo3";
715 constexpr uint64_t file_size = 3000;
716
717 Add(level, third_sst, smallest, largest, file_size, third_blob);
718 }
719
720 {
721 constexpr uint64_t total_blob_count = 10;
722 constexpr uint64_t total_blob_bytes = 100000;
723 constexpr uint64_t garbage_blob_count = 2;
724 constexpr uint64_t garbage_blob_bytes = 15000;
725
726 AddBlob(first_blob, total_blob_count, total_blob_bytes,
727 BlobFileMetaData::LinkedSsts{first_sst, second_sst},
728 garbage_blob_count, garbage_blob_bytes);
729 }
730
731 {
732 constexpr uint64_t total_blob_count = 4;
733 constexpr uint64_t total_blob_bytes = 400000;
734 constexpr uint64_t garbage_blob_count = 3;
735 constexpr uint64_t garbage_blob_bytes = 235000;
736
737 AddBlob(second_blob, total_blob_count, total_blob_bytes,
738 BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
739 garbage_blob_bytes);
740 }
741
742 {
743 constexpr uint64_t total_blob_count = 20;
744 constexpr uint64_t total_blob_bytes = 1000000;
745 constexpr uint64_t garbage_blob_count = 8;
746 constexpr uint64_t garbage_blob_bytes = 123456;
747
748 AddBlob(third_blob, total_blob_count, total_blob_bytes,
749 BlobFileMetaData::LinkedSsts{third_sst}, garbage_blob_count,
750 garbage_blob_bytes);
751 }
752
753 {
754 constexpr uint64_t total_blob_count = 128;
755 constexpr uint64_t total_blob_bytes = 789012345;
756 constexpr uint64_t garbage_blob_count = 67;
757 constexpr uint64_t garbage_blob_bytes = 88888888;
758
759 AddBlob(fourth_blob, total_blob_count, total_blob_bytes,
760 BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
761 garbage_blob_bytes);
762 }
763
764 UpdateVersionStorageInfo();
765
766 assert(vstorage_.num_levels() > 0);
767 const auto& level_files = vstorage_.LevelFiles(level);
768
769 assert(level_files.size() == 3);
770 assert(level_files[0] && level_files[0]->fd.GetNumber() == first_sst);
771 assert(level_files[1] && level_files[1]->fd.GetNumber() == second_sst);
772 assert(level_files[2] && level_files[2]->fd.GetNumber() == third_sst);
773
774 // No blob files eligible for GC due to the age cutoff
775
776 {
777 constexpr double age_cutoff = 0.1;
778 constexpr double force_threshold = 0.0;
779 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
780
781 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
782 }
783
784 // Part of the oldest batch of blob files (specifically, the second file) is
785 // ineligible for GC due to the age cutoff
786
787 {
788 constexpr double age_cutoff = 0.25;
789 constexpr double force_threshold = 0.0;
790 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
791
792 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
793 }
794
795 // Oldest batch is eligible based on age cutoff but its overall garbage ratio
796 // is below threshold
797
798 {
799 constexpr double age_cutoff = 0.5;
800 constexpr double force_threshold = 0.6;
801 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
802
803 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
804 }
805
806 // Oldest batch is eligible based on age cutoff and its overall garbage ratio
807 // meets threshold
808
809 {
810 constexpr double age_cutoff = 0.5;
811 constexpr double force_threshold = 0.5;
812 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
813
814 auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
815 ASSERT_EQ(ssts_to_be_compacted.size(), 2);
816
817 std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(),
818 [](const std::pair<int, FileMetaData*>& lhs,
819 const std::pair<int, FileMetaData*>& rhs) {
820 assert(lhs.second);
821 assert(rhs.second);
822 return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber();
823 });
824
825 const autovector<std::pair<int, FileMetaData*>>
826 expected_ssts_to_be_compacted{{level, level_files[0]},
827 {level, level_files[1]}};
828
829 ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
830 ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
831 }
832
833 // Now try the last two cases again with a greater than necessary age cutoff
834
835 // Oldest batch is eligible based on age cutoff but its overall garbage ratio
836 // is below threshold
837
838 {
839 constexpr double age_cutoff = 0.75;
840 constexpr double force_threshold = 0.6;
841 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
842
843 ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
844 }
845
846 // Oldest batch is eligible based on age cutoff and its overall garbage ratio
847 // meets threshold
848
849 {
850 constexpr double age_cutoff = 0.75;
851 constexpr double force_threshold = 0.5;
852 vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
853
854 auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
855 ASSERT_EQ(ssts_to_be_compacted.size(), 2);
856
857 std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(),
858 [](const std::pair<int, FileMetaData*>& lhs,
859 const std::pair<int, FileMetaData*>& rhs) {
860 assert(lhs.second);
861 assert(rhs.second);
862 return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber();
863 });
864
865 const autovector<std::pair<int, FileMetaData*>>
866 expected_ssts_to_be_compacted{{level, level_files[0]},
867 {level, level_files[1]}};
868
869 ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
870 ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
871 }
872}
873
20effc67
TL
874class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase {
875 public:
876 VersionStorageInfoTimestampTest()
1e59de90
TL
877 : VersionStorageInfoTestBase(test::BytewiseComparatorWithU64TsWrapper()) {
878 }
20effc67
TL
879 ~VersionStorageInfoTimestampTest() override {}
880 std::string Timestamp(uint64_t ts) const {
881 std::string ret;
882 PutFixed64(&ret, ts);
883 return ret;
884 }
885 std::string PackUserKeyAndTimestamp(const Slice& ukey, uint64_t ts) const {
886 std::string ret;
887 ret.assign(ukey.data(), ukey.size());
888 PutFixed64(&ret, ts);
889 return ret;
890 }
891};
892
893TEST_F(VersionStorageInfoTimestampTest, GetOverlappingInputs) {
894 Add(/*level=*/1, /*file_number=*/1, /*smallest=*/
895 {PackUserKeyAndTimestamp("a", /*ts=*/9), /*s=*/0, kTypeValue},
896 /*largest=*/
897 {PackUserKeyAndTimestamp("a", /*ts=*/8), /*s=*/0, kTypeValue},
898 /*file_size=*/100);
899 Add(/*level=*/1, /*file_number=*/2, /*smallest=*/
900 {PackUserKeyAndTimestamp("a", /*ts=*/5), /*s=*/0, kTypeValue},
901 /*largest=*/
902 {PackUserKeyAndTimestamp("b", /*ts=*/10), /*s=*/0, kTypeValue},
903 /*file_size=*/100);
904 Add(/*level=*/1, /*file_number=*/3, /*smallest=*/
905 {PackUserKeyAndTimestamp("c", /*ts=*/12), /*s=*/0, kTypeValue},
906 /*largest=*/
907 {PackUserKeyAndTimestamp("d", /*ts=*/1), /*s=*/0, kTypeValue},
908 /*file_size=*/100);
1e59de90
TL
909
910 UpdateVersionStorageInfo();
911
20effc67
TL
912 ASSERT_EQ(
913 "1,2",
914 GetOverlappingFiles(
915 /*level=*/1,
916 {PackUserKeyAndTimestamp("a", /*ts=*/12), /*s=*/0, kTypeValue},
917 {PackUserKeyAndTimestamp("a", /*ts=*/11), /*s=*/0, kTypeValue}));
918 ASSERT_EQ("3",
919 GetOverlappingFiles(
920 /*level=*/1,
921 {PackUserKeyAndTimestamp("c", /*ts=*/15), /*s=*/0, kTypeValue},
922 {PackUserKeyAndTimestamp("c", /*ts=*/2), /*s=*/0, kTypeValue}));
923}
11fdf7f2 924
7c673cae
FG
925class FindLevelFileTest : public testing::Test {
926 public:
927 LevelFilesBrief file_level_;
928 bool disjoint_sorted_files_;
929 Arena arena_;
930
1e59de90 931 FindLevelFileTest() : disjoint_sorted_files_(true) {}
7c673cae 932
494da23a 933 ~FindLevelFileTest() override {}
7c673cae
FG
934
935 void LevelFileInit(size_t num = 0) {
936 char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange));
1e59de90 937 file_level_.files = new (mem) FdWithKeyRange[num];
7c673cae
FG
938 file_level_.num_files = 0;
939 }
940
941 void Add(const char* smallest, const char* largest,
942 SequenceNumber smallest_seq = 100,
943 SequenceNumber largest_seq = 100) {
944 InternalKey smallest_key = InternalKey(smallest, smallest_seq, kTypeValue);
945 InternalKey largest_key = InternalKey(largest, largest_seq, kTypeValue);
946
947 Slice smallest_slice = smallest_key.Encode();
948 Slice largest_slice = largest_key.Encode();
949
1e59de90
TL
950 char* mem =
951 arena_.AllocateAligned(smallest_slice.size() + largest_slice.size());
7c673cae
FG
952 memcpy(mem, smallest_slice.data(), smallest_slice.size());
953 memcpy(mem + smallest_slice.size(), largest_slice.data(),
1e59de90 954 largest_slice.size());
7c673cae
FG
955
956 // add to file_level_
957 size_t num = file_level_.num_files;
958 auto& file = file_level_.files[num];
959 file.fd = FileDescriptor(num + 1, 0, 0);
960 file.smallest_key = Slice(mem, smallest_slice.size());
1e59de90 961 file.largest_key = Slice(mem + smallest_slice.size(), largest_slice.size());
7c673cae
FG
962 file_level_.num_files++;
963 }
964
965 int Find(const char* key) {
966 InternalKey target(key, 100, kTypeValue);
967 InternalKeyComparator cmp(BytewiseComparator());
968 return FindFile(cmp, file_level_, target.Encode());
969 }
970
971 bool Overlaps(const char* smallest, const char* largest) {
972 InternalKeyComparator cmp(BytewiseComparator());
973 Slice s(smallest != nullptr ? smallest : "");
974 Slice l(largest != nullptr ? largest : "");
975 return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, file_level_,
976 (smallest != nullptr ? &s : nullptr),
977 (largest != nullptr ? &l : nullptr));
978 }
979};
980
981TEST_F(FindLevelFileTest, LevelEmpty) {
982 LevelFileInit(0);
983
984 ASSERT_EQ(0, Find("foo"));
1e59de90
TL
985 ASSERT_TRUE(!Overlaps("a", "z"));
986 ASSERT_TRUE(!Overlaps(nullptr, "z"));
987 ASSERT_TRUE(!Overlaps("a", nullptr));
988 ASSERT_TRUE(!Overlaps(nullptr, nullptr));
7c673cae
FG
989}
990
991TEST_F(FindLevelFileTest, LevelSingle) {
992 LevelFileInit(1);
993
994 Add("p", "q");
995 ASSERT_EQ(0, Find("a"));
996 ASSERT_EQ(0, Find("p"));
997 ASSERT_EQ(0, Find("p1"));
998 ASSERT_EQ(0, Find("q"));
999 ASSERT_EQ(1, Find("q1"));
1000 ASSERT_EQ(1, Find("z"));
1001
1e59de90
TL
1002 ASSERT_TRUE(!Overlaps("a", "b"));
1003 ASSERT_TRUE(!Overlaps("z1", "z2"));
7c673cae
FG
1004 ASSERT_TRUE(Overlaps("a", "p"));
1005 ASSERT_TRUE(Overlaps("a", "q"));
1006 ASSERT_TRUE(Overlaps("a", "z"));
1007 ASSERT_TRUE(Overlaps("p", "p1"));
1008 ASSERT_TRUE(Overlaps("p", "q"));
1009 ASSERT_TRUE(Overlaps("p", "z"));
1010 ASSERT_TRUE(Overlaps("p1", "p2"));
1011 ASSERT_TRUE(Overlaps("p1", "z"));
1012 ASSERT_TRUE(Overlaps("q", "q"));
1013 ASSERT_TRUE(Overlaps("q", "q1"));
1014
1e59de90
TL
1015 ASSERT_TRUE(!Overlaps(nullptr, "j"));
1016 ASSERT_TRUE(!Overlaps("r", nullptr));
7c673cae
FG
1017 ASSERT_TRUE(Overlaps(nullptr, "p"));
1018 ASSERT_TRUE(Overlaps(nullptr, "p1"));
1019 ASSERT_TRUE(Overlaps("q", nullptr));
1020 ASSERT_TRUE(Overlaps(nullptr, nullptr));
1021}
1022
1023TEST_F(FindLevelFileTest, LevelMultiple) {
1024 LevelFileInit(4);
1025
1026 Add("150", "200");
1027 Add("200", "250");
1028 Add("300", "350");
1029 Add("400", "450");
1030 ASSERT_EQ(0, Find("100"));
1031 ASSERT_EQ(0, Find("150"));
1032 ASSERT_EQ(0, Find("151"));
1033 ASSERT_EQ(0, Find("199"));
1034 ASSERT_EQ(0, Find("200"));
1035 ASSERT_EQ(1, Find("201"));
1036 ASSERT_EQ(1, Find("249"));
1037 ASSERT_EQ(1, Find("250"));
1038 ASSERT_EQ(2, Find("251"));
1039 ASSERT_EQ(2, Find("299"));
1040 ASSERT_EQ(2, Find("300"));
1041 ASSERT_EQ(2, Find("349"));
1042 ASSERT_EQ(2, Find("350"));
1043 ASSERT_EQ(3, Find("351"));
1044 ASSERT_EQ(3, Find("400"));
1045 ASSERT_EQ(3, Find("450"));
1046 ASSERT_EQ(4, Find("451"));
1047
1e59de90
TL
1048 ASSERT_TRUE(!Overlaps("100", "149"));
1049 ASSERT_TRUE(!Overlaps("251", "299"));
1050 ASSERT_TRUE(!Overlaps("451", "500"));
1051 ASSERT_TRUE(!Overlaps("351", "399"));
7c673cae
FG
1052
1053 ASSERT_TRUE(Overlaps("100", "150"));
1054 ASSERT_TRUE(Overlaps("100", "200"));
1055 ASSERT_TRUE(Overlaps("100", "300"));
1056 ASSERT_TRUE(Overlaps("100", "400"));
1057 ASSERT_TRUE(Overlaps("100", "500"));
1058 ASSERT_TRUE(Overlaps("375", "400"));
1059 ASSERT_TRUE(Overlaps("450", "450"));
1060 ASSERT_TRUE(Overlaps("450", "500"));
1061}
1062
1063TEST_F(FindLevelFileTest, LevelMultipleNullBoundaries) {
1064 LevelFileInit(4);
1065
1066 Add("150", "200");
1067 Add("200", "250");
1068 Add("300", "350");
1069 Add("400", "450");
1e59de90
TL
1070 ASSERT_TRUE(!Overlaps(nullptr, "149"));
1071 ASSERT_TRUE(!Overlaps("451", nullptr));
7c673cae
FG
1072 ASSERT_TRUE(Overlaps(nullptr, nullptr));
1073 ASSERT_TRUE(Overlaps(nullptr, "150"));
1074 ASSERT_TRUE(Overlaps(nullptr, "199"));
1075 ASSERT_TRUE(Overlaps(nullptr, "200"));
1076 ASSERT_TRUE(Overlaps(nullptr, "201"));
1077 ASSERT_TRUE(Overlaps(nullptr, "400"));
1078 ASSERT_TRUE(Overlaps(nullptr, "800"));
1079 ASSERT_TRUE(Overlaps("100", nullptr));
1080 ASSERT_TRUE(Overlaps("200", nullptr));
1081 ASSERT_TRUE(Overlaps("449", nullptr));
1082 ASSERT_TRUE(Overlaps("450", nullptr));
1083}
1084
1085TEST_F(FindLevelFileTest, LevelOverlapSequenceChecks) {
1086 LevelFileInit(1);
1087
1088 Add("200", "200", 5000, 3000);
1e59de90
TL
1089 ASSERT_TRUE(!Overlaps("199", "199"));
1090 ASSERT_TRUE(!Overlaps("201", "300"));
7c673cae
FG
1091 ASSERT_TRUE(Overlaps("200", "200"));
1092 ASSERT_TRUE(Overlaps("190", "200"));
1093 ASSERT_TRUE(Overlaps("200", "210"));
1094}
1095
1096TEST_F(FindLevelFileTest, LevelOverlappingFiles) {
1097 LevelFileInit(2);
1098
1099 Add("150", "600");
1100 Add("400", "500");
1101 disjoint_sorted_files_ = false;
1e59de90
TL
1102 ASSERT_TRUE(!Overlaps("100", "149"));
1103 ASSERT_TRUE(!Overlaps("601", "700"));
7c673cae
FG
1104 ASSERT_TRUE(Overlaps("100", "150"));
1105 ASSERT_TRUE(Overlaps("100", "200"));
1106 ASSERT_TRUE(Overlaps("100", "300"));
1107 ASSERT_TRUE(Overlaps("100", "400"));
1108 ASSERT_TRUE(Overlaps("100", "500"));
1109 ASSERT_TRUE(Overlaps("375", "400"));
1110 ASSERT_TRUE(Overlaps("450", "450"));
1111 ASSERT_TRUE(Overlaps("450", "500"));
1112 ASSERT_TRUE(Overlaps("450", "700"));
1113 ASSERT_TRUE(Overlaps("600", "700"));
1114}
1115
494da23a 1116class VersionSetTestBase {
11fdf7f2 1117 public:
494da23a
TL
1118 const static std::string kColumnFamilyName1;
1119 const static std::string kColumnFamilyName2;
1120 const static std::string kColumnFamilyName3;
f67539c2 1121 int num_initial_edits_;
494da23a 1122
20effc67
TL
1123 explicit VersionSetTestBase(const std::string& name)
1124 : env_(nullptr),
1125 dbname_(test::PerThreadDBPath(name)),
1126 options_(),
1127 db_options_(options_),
1128 cf_options_(options_),
1e59de90 1129 immutable_options_(db_options_, cf_options_),
11fdf7f2
TL
1130 mutable_cf_options_(cf_options_),
1131 table_cache_(NewLRUCache(50000, 16)),
1132 write_buffer_manager_(db_options_.db_write_buffer_size),
11fdf7f2
TL
1133 shutting_down_(false),
1134 mock_table_factory_(std::make_shared<mock::MockTableFactory>()) {
1e59de90
TL
1135 EXPECT_OK(test::CreateEnvFromSystem(ConfigOptions(), &env_, &env_guard_));
1136 if (env_ == Env::Default() && getenv("MEM_ENV")) {
20effc67
TL
1137 env_guard_.reset(NewMemEnv(Env::Default()));
1138 env_ = env_guard_.get();
20effc67
TL
1139 }
1140 EXPECT_NE(nullptr, env_);
f67539c2 1141
20effc67
TL
1142 fs_ = env_->GetFileSystem();
1143 EXPECT_OK(fs_->CreateDirIfMissing(dbname_, IOOptions(), nullptr));
1144
1145 options_.env = env_;
f67539c2
TL
1146 db_options_.env = env_;
1147 db_options_.fs = fs_;
1e59de90
TL
1148 immutable_options_.env = env_;
1149 immutable_options_.fs = fs_;
1150 immutable_options_.clock = env_->GetSystemClock().get();
20effc67
TL
1151
1152 versions_.reset(
1153 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1154 &write_buffer_manager_, &write_controller_,
1e59de90
TL
1155 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1156 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
1157 reactive_versions_ = std::make_shared<ReactiveVersionSet>(
1158 dbname_, &db_options_, env_options_, table_cache_.get(),
1159 &write_buffer_manager_, &write_controller_, nullptr);
11fdf7f2
TL
1160 db_options_.db_paths.emplace_back(dbname_,
1161 std::numeric_limits<uint64_t>::max());
1162 }
1163
20effc67
TL
1164 virtual ~VersionSetTestBase() {
1165 if (getenv("KEEP_DB")) {
1166 fprintf(stdout, "DB is still at %s\n", dbname_.c_str());
1167 } else {
1168 Options options;
1169 options.env = env_;
1170 EXPECT_OK(DestroyDB(dbname_, options));
1171 }
1172 }
1173
1174 protected:
1175 virtual void PrepareManifest(
1176 std::vector<ColumnFamilyDescriptor>* column_families,
1177 SequenceNumber* last_seqno, std::unique_ptr<log::Writer>* log_writer) {
494da23a
TL
1178 assert(column_families != nullptr);
1179 assert(last_seqno != nullptr);
1180 assert(log_writer != nullptr);
11fdf7f2 1181 VersionEdit new_db;
f67539c2 1182 if (db_options_.write_dbid_to_manifest) {
20effc67
TL
1183 DBOptions tmp_db_options;
1184 tmp_db_options.env = env_;
1185 std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
f67539c2
TL
1186 std::string db_id;
1187 impl->GetDbIdentityFromIdentityFile(&db_id);
1188 new_db.SetDBId(db_id);
1189 }
11fdf7f2
TL
1190 new_db.SetLogNumber(0);
1191 new_db.SetNextFile(2);
1192 new_db.SetLastSequence(0);
1193
494da23a
TL
1194 const std::vector<std::string> cf_names = {
1195 kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
1196 kColumnFamilyName3};
11fdf7f2
TL
1197 const int kInitialNumOfCfs = static_cast<int>(cf_names.size());
1198 autovector<VersionEdit> new_cfs;
1199 uint64_t last_seq = 1;
1200 uint32_t cf_id = 1;
1201 for (int i = 1; i != kInitialNumOfCfs; ++i) {
1202 VersionEdit new_cf;
1203 new_cf.AddColumnFamily(cf_names[i]);
1204 new_cf.SetColumnFamily(cf_id++);
1205 new_cf.SetLogNumber(0);
1206 new_cf.SetNextFile(2);
1207 new_cf.SetLastSequence(last_seq++);
1208 new_cfs.emplace_back(new_cf);
1209 }
494da23a 1210 *last_seqno = last_seq;
f67539c2 1211 num_initial_edits_ = static_cast<int>(new_cfs.size() + 1);
1e59de90 1212 std::unique_ptr<WritableFileWriter> file_writer;
11fdf7f2 1213 const std::string manifest = DescriptorFileName(dbname_, 1);
1e59de90
TL
1214 const auto& fs = env_->GetFileSystem();
1215 Status s = WritableFileWriter::Create(
1216 fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer,
1217 nullptr);
11fdf7f2 1218 ASSERT_OK(s);
11fdf7f2 1219 {
494da23a 1220 log_writer->reset(new log::Writer(std::move(file_writer), 0, false));
11fdf7f2
TL
1221 std::string record;
1222 new_db.EncodeTo(&record);
494da23a 1223 s = (*log_writer)->AddRecord(record);
11fdf7f2 1224 for (const auto& e : new_cfs) {
494da23a 1225 record.clear();
11fdf7f2 1226 e.EncodeTo(&record);
494da23a 1227 s = (*log_writer)->AddRecord(record);
11fdf7f2
TL
1228 ASSERT_OK(s);
1229 }
1230 }
1231 ASSERT_OK(s);
11fdf7f2 1232
11fdf7f2
TL
1233 cf_options_.table_factory = mock_table_factory_;
1234 for (const auto& cf_name : cf_names) {
494da23a 1235 column_families->emplace_back(cf_name, cf_options_);
11fdf7f2 1236 }
494da23a
TL
1237 }
1238
1239 // Create DB with 3 column families.
1240 void NewDB() {
494da23a
TL
1241 SequenceNumber last_seqno;
1242 std::unique_ptr<log::Writer> log_writer;
f67539c2 1243 SetIdentityFile(env_, dbname_);
20effc67 1244 PrepareManifest(&column_families_, &last_seqno, &log_writer);
494da23a
TL
1245 log_writer.reset();
1246 // Make "CURRENT" file point to the new manifest file.
20effc67 1247 Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
494da23a 1248 ASSERT_OK(s);
11fdf7f2 1249
20effc67
TL
1250 EXPECT_OK(versions_->Recover(column_families_, false));
1251 EXPECT_EQ(column_families_.size(),
11fdf7f2 1252 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
11fdf7f2
TL
1253 }
1254
1e59de90
TL
1255 void ReopenDB() {
1256 versions_.reset(
1257 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1258 &write_buffer_manager_, &write_controller_,
1259 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1260 /*db_id*/ "", /*db_session_id*/ ""));
1261 EXPECT_OK(versions_->Recover(column_families_, false));
1262 }
1263
20effc67
TL
1264 void VerifyManifest(std::string* manifest_path) const {
1265 assert(manifest_path != nullptr);
1266 uint64_t manifest_file_number = 0;
1267 Status s = versions_->GetCurrentManifestPath(
1268 dbname_, fs_.get(), manifest_path, &manifest_file_number);
1269 ASSERT_OK(s);
1270 ASSERT_EQ(1, manifest_file_number);
1271 }
1272
1273 Status LogAndApplyToDefaultCF(VersionEdit& edit) {
1274 mutex_.Lock();
1275 Status s =
1276 versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
1e59de90 1277 mutable_cf_options_, &edit, &mutex_, nullptr);
20effc67
TL
1278 mutex_.Unlock();
1279 return s;
1280 }
1281
1282 Status LogAndApplyToDefaultCF(
1283 const autovector<std::unique_ptr<VersionEdit>>& edits) {
1284 autovector<VersionEdit*> vedits;
1285 for (auto& e : edits) {
1286 vedits.push_back(e.get());
1287 }
1288 mutex_.Lock();
1289 Status s =
1290 versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
1e59de90 1291 mutable_cf_options_, vedits, &mutex_, nullptr);
20effc67
TL
1292 mutex_.Unlock();
1293 return s;
1294 }
1295
1296 void CreateNewManifest() {
1297 constexpr FSDirectory* db_directory = nullptr;
1298 constexpr bool new_descriptor_log = true;
1299 mutex_.Lock();
1300 VersionEdit dummy;
1301 ASSERT_OK(versions_->LogAndApply(
1302 versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
1303 &dummy, &mutex_, db_directory, new_descriptor_log));
1304 mutex_.Unlock();
1305 }
1306
1e59de90
TL
1307 ColumnFamilyData* CreateColumnFamily(const std::string& cf_name,
1308 const ColumnFamilyOptions& cf_options) {
1309 VersionEdit new_cf;
1310 new_cf.AddColumnFamily(cf_name);
1311 uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID();
1312 new_cf.SetColumnFamily(new_id);
1313 new_cf.SetLogNumber(0);
1314 new_cf.SetComparatorName(cf_options.comparator->Name());
1315 Status s;
1316 mutex_.Lock();
1317 s = versions_->LogAndApply(/*column_family_data=*/nullptr,
1318 MutableCFOptions(cf_options), &new_cf, &mutex_,
1319 /*db_directory=*/nullptr,
1320 /*new_descriptor_log=*/false, &cf_options);
1321 mutex_.Unlock();
1322 EXPECT_OK(s);
1323 ColumnFamilyData* cfd =
1324 versions_->GetColumnFamilySet()->GetColumnFamily(cf_name);
1325 EXPECT_NE(nullptr, cfd);
1326 return cfd;
1327 }
1328
20effc67 1329 Env* mem_env_;
11fdf7f2 1330 Env* env_;
20effc67 1331 std::shared_ptr<Env> env_guard_;
f67539c2 1332 std::shared_ptr<FileSystem> fs_;
11fdf7f2
TL
1333 const std::string dbname_;
1334 EnvOptions env_options_;
20effc67 1335 Options options_;
11fdf7f2
TL
1336 ImmutableDBOptions db_options_;
1337 ColumnFamilyOptions cf_options_;
1e59de90 1338 ImmutableOptions immutable_options_;
11fdf7f2
TL
1339 MutableCFOptions mutable_cf_options_;
1340 std::shared_ptr<Cache> table_cache_;
1341 WriteController write_controller_;
1342 WriteBufferManager write_buffer_manager_;
494da23a 1343 std::shared_ptr<VersionSet> versions_;
f67539c2 1344 std::shared_ptr<ReactiveVersionSet> reactive_versions_;
11fdf7f2
TL
1345 InstrumentedMutex mutex_;
1346 std::atomic<bool> shutting_down_;
1347 std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
20effc67 1348 std::vector<ColumnFamilyDescriptor> column_families_;
11fdf7f2
TL
1349};
1350
494da23a
TL
1351const std::string VersionSetTestBase::kColumnFamilyName1 = "alice";
1352const std::string VersionSetTestBase::kColumnFamilyName2 = "bob";
1353const std::string VersionSetTestBase::kColumnFamilyName3 = "charles";
1354
1355class VersionSetTest : public VersionSetTestBase, public testing::Test {
1356 public:
20effc67 1357 VersionSetTest() : VersionSetTestBase("version_set_test") {}
494da23a
TL
1358};
1359
1360TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) {
11fdf7f2
TL
1361 NewDB();
1362 const int kGroupSize = 5;
494da23a
TL
1363 autovector<VersionEdit> edits;
1364 for (int i = 0; i != kGroupSize; ++i) {
1365 edits.emplace_back(VersionEdit());
1366 }
1367 autovector<ColumnFamilyData*> cfds;
1368 autovector<const MutableCFOptions*> all_mutable_cf_options;
1369 autovector<autovector<VersionEdit*>> edit_lists;
11fdf7f2 1370 for (int i = 0; i != kGroupSize; ++i) {
494da23a
TL
1371 cfds.emplace_back(versions_->GetColumnFamilySet()->GetDefault());
1372 all_mutable_cf_options.emplace_back(&mutable_cf_options_);
1373 autovector<VersionEdit*> edit_list;
1374 edit_list.emplace_back(&edits[i]);
1375 edit_lists.emplace_back(edit_list);
11fdf7f2
TL
1376 }
1377
494da23a
TL
1378 SyncPoint::GetInstance()->DisableProcessing();
1379 SyncPoint::GetInstance()->ClearAllCallBacks();
11fdf7f2
TL
1380 int count = 0;
1381 SyncPoint::GetInstance()->SetCallBack(
1382 "VersionSet::ProcessManifestWrites:SameColumnFamily", [&](void* arg) {
1383 uint32_t* cf_id = reinterpret_cast<uint32_t*>(arg);
f67539c2 1384 EXPECT_EQ(0u, *cf_id);
11fdf7f2
TL
1385 ++count;
1386 });
1387 SyncPoint::GetInstance()->EnableProcessing();
1388 mutex_.Lock();
1e59de90
TL
1389 Status s = versions_->LogAndApply(cfds, all_mutable_cf_options, edit_lists,
1390 &mutex_, nullptr);
11fdf7f2
TL
1391 mutex_.Unlock();
1392 EXPECT_OK(s);
1393 EXPECT_EQ(kGroupSize - 1, count);
1394}
494da23a 1395
20effc67
TL
1396TEST_F(VersionSetTest, PersistBlobFileStateInNewManifest) {
1397 // Initialize the database and add a couple of blob files, one with some
1398 // garbage in it, and one without any garbage.
1399 NewDB();
1400
1401 assert(versions_);
1402 assert(versions_->GetColumnFamilySet());
1403
1404 ColumnFamilyData* const cfd = versions_->GetColumnFamilySet()->GetDefault();
1405 assert(cfd);
1406
1407 Version* const version = cfd->current();
1408 assert(version);
1409
1410 VersionStorageInfo* const storage_info = version->storage_info();
1411 assert(storage_info);
1412
1413 {
1414 constexpr uint64_t blob_file_number = 123;
1415 constexpr uint64_t total_blob_count = 456;
1416 constexpr uint64_t total_blob_bytes = 77777777;
1417 constexpr char checksum_method[] = "SHA1";
1418 constexpr char checksum_value[] =
1e59de90
TL
1419 "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c"
1420 "\x52\x5c\xbd";
20effc67
TL
1421
1422 auto shared_meta = SharedBlobFileMetaData::Create(
1423 blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
1424 checksum_value);
1425
1426 constexpr uint64_t garbage_blob_count = 89;
1427 constexpr uint64_t garbage_blob_bytes = 1000000;
1428
1429 auto meta = BlobFileMetaData::Create(
1430 std::move(shared_meta), BlobFileMetaData::LinkedSsts(),
1431 garbage_blob_count, garbage_blob_bytes);
1432
1433 storage_info->AddBlobFile(std::move(meta));
1434 }
1435
1436 {
1437 constexpr uint64_t blob_file_number = 234;
1438 constexpr uint64_t total_blob_count = 555;
1439 constexpr uint64_t total_blob_bytes = 66666;
1440 constexpr char checksum_method[] = "CRC32";
1e59de90 1441 constexpr char checksum_value[] = "\x3d\x87\xff\x57";
20effc67
TL
1442
1443 auto shared_meta = SharedBlobFileMetaData::Create(
1444 blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
1445 checksum_value);
1446
1447 constexpr uint64_t garbage_blob_count = 0;
1448 constexpr uint64_t garbage_blob_bytes = 0;
1449
1450 auto meta = BlobFileMetaData::Create(
1451 std::move(shared_meta), BlobFileMetaData::LinkedSsts(),
1452 garbage_blob_count, garbage_blob_bytes);
1453
1454 storage_info->AddBlobFile(std::move(meta));
1455 }
1456
1457 // Force the creation of a new manifest file and make sure metadata for
1458 // the blob files is re-persisted.
1459 size_t addition_encoded = 0;
1460 SyncPoint::GetInstance()->SetCallBack(
1461 "BlobFileAddition::EncodeTo::CustomFields",
1462 [&](void* /* arg */) { ++addition_encoded; });
1463
1464 size_t garbage_encoded = 0;
1465 SyncPoint::GetInstance()->SetCallBack(
1466 "BlobFileGarbage::EncodeTo::CustomFields",
1467 [&](void* /* arg */) { ++garbage_encoded; });
1468 SyncPoint::GetInstance()->EnableProcessing();
1469
1470 CreateNewManifest();
1471
1472 ASSERT_EQ(addition_encoded, 2);
1473 ASSERT_EQ(garbage_encoded, 1);
1474
1475 SyncPoint::GetInstance()->DisableProcessing();
1476 SyncPoint::GetInstance()->ClearAllCallBacks();
1477}
1478
1479TEST_F(VersionSetTest, AddLiveBlobFiles) {
1480 // Initialize the database and add a blob file.
1481 NewDB();
1482
1483 assert(versions_);
1484 assert(versions_->GetColumnFamilySet());
1485
1486 ColumnFamilyData* const cfd = versions_->GetColumnFamilySet()->GetDefault();
1487 assert(cfd);
1488
1489 Version* const first_version = cfd->current();
1490 assert(first_version);
1491
1492 VersionStorageInfo* const first_storage_info = first_version->storage_info();
1493 assert(first_storage_info);
1494
1495 constexpr uint64_t first_blob_file_number = 234;
1496 constexpr uint64_t first_total_blob_count = 555;
1497 constexpr uint64_t first_total_blob_bytes = 66666;
1498 constexpr char first_checksum_method[] = "CRC32";
1e59de90 1499 constexpr char first_checksum_value[] = "\x3d\x87\xff\x57";
20effc67
TL
1500
1501 auto first_shared_meta = SharedBlobFileMetaData::Create(
1502 first_blob_file_number, first_total_blob_count, first_total_blob_bytes,
1503 first_checksum_method, first_checksum_value);
1504
1505 constexpr uint64_t garbage_blob_count = 0;
1506 constexpr uint64_t garbage_blob_bytes = 0;
1507
1508 auto first_meta = BlobFileMetaData::Create(
1509 std::move(first_shared_meta), BlobFileMetaData::LinkedSsts(),
1510 garbage_blob_count, garbage_blob_bytes);
1511
1512 first_storage_info->AddBlobFile(first_meta);
1513
1514 // Reference the version so it stays alive even after the following version
1515 // edit.
1516 first_version->Ref();
1517
1518 // Get live files directly from version.
1519 std::vector<uint64_t> version_table_files;
1520 std::vector<uint64_t> version_blob_files;
1521
1522 first_version->AddLiveFiles(&version_table_files, &version_blob_files);
1523
1524 ASSERT_EQ(version_blob_files.size(), 1);
1525 ASSERT_EQ(version_blob_files[0], first_blob_file_number);
1526
1527 // Create a new version containing an additional blob file.
1528 versions_->TEST_CreateAndAppendVersion(cfd);
1529
1530 Version* const second_version = cfd->current();
1531 assert(second_version);
1532 assert(second_version != first_version);
1533
1534 VersionStorageInfo* const second_storage_info =
1535 second_version->storage_info();
1536 assert(second_storage_info);
1537
1538 constexpr uint64_t second_blob_file_number = 456;
1539 constexpr uint64_t second_total_blob_count = 100;
1540 constexpr uint64_t second_total_blob_bytes = 2000000;
1541 constexpr char second_checksum_method[] = "CRC32B";
1e59de90 1542 constexpr char second_checksum_value[] = "\x6d\xbd\xf2\x3a";
20effc67
TL
1543
1544 auto second_shared_meta = SharedBlobFileMetaData::Create(
1545 second_blob_file_number, second_total_blob_count, second_total_blob_bytes,
1546 second_checksum_method, second_checksum_value);
1547
1548 auto second_meta = BlobFileMetaData::Create(
1549 std::move(second_shared_meta), BlobFileMetaData::LinkedSsts(),
1550 garbage_blob_count, garbage_blob_bytes);
1551
1552 second_storage_info->AddBlobFile(std::move(first_meta));
1553 second_storage_info->AddBlobFile(std::move(second_meta));
1554
1555 // Get all live files from version set. Note that the result contains
1556 // duplicates.
1557 std::vector<uint64_t> all_table_files;
1558 std::vector<uint64_t> all_blob_files;
1559
1560 versions_->AddLiveFiles(&all_table_files, &all_blob_files);
1561
1562 ASSERT_EQ(all_blob_files.size(), 3);
1563 ASSERT_EQ(all_blob_files[0], first_blob_file_number);
1564 ASSERT_EQ(all_blob_files[1], first_blob_file_number);
1565 ASSERT_EQ(all_blob_files[2], second_blob_file_number);
1566
1567 // Clean up previous version.
1568 first_version->Unref();
1569}
1570
1571TEST_F(VersionSetTest, ObsoleteBlobFile) {
1572 // Initialize the database and add a blob file that is entirely garbage
1573 // and thus can immediately be marked obsolete.
1574 NewDB();
1575
1576 VersionEdit edit;
1577
1578 constexpr uint64_t blob_file_number = 234;
1579 constexpr uint64_t total_blob_count = 555;
1580 constexpr uint64_t total_blob_bytes = 66666;
1581 constexpr char checksum_method[] = "CRC32";
1e59de90 1582 constexpr char checksum_value[] = "\x3d\x87\xff\x57";
20effc67
TL
1583
1584 edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
1585 checksum_method, checksum_value);
1586
1587 edit.AddBlobFileGarbage(blob_file_number, total_blob_count, total_blob_bytes);
1588
1589 mutex_.Lock();
1590 Status s =
1591 versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
1e59de90 1592 mutable_cf_options_, &edit, &mutex_, nullptr);
20effc67
TL
1593 mutex_.Unlock();
1594
1595 ASSERT_OK(s);
1596
1597 // Make sure blob files from the pending number range are not returned
1598 // as obsolete.
1599 {
1600 std::vector<ObsoleteFileInfo> table_files;
1601 std::vector<ObsoleteBlobFileInfo> blob_files;
1602 std::vector<std::string> manifest_files;
1603 constexpr uint64_t min_pending_output = blob_file_number;
1604
1605 versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files,
1606 min_pending_output);
1607
1608 ASSERT_TRUE(blob_files.empty());
1609 }
1610
1611 // Make sure the blob file is returned as obsolete if it's not in the pending
1612 // range.
1613 {
1614 std::vector<ObsoleteFileInfo> table_files;
1615 std::vector<ObsoleteBlobFileInfo> blob_files;
1616 std::vector<std::string> manifest_files;
1617 constexpr uint64_t min_pending_output = blob_file_number + 1;
1618
1619 versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files,
1620 min_pending_output);
1621
1622 ASSERT_EQ(blob_files.size(), 1);
1623 ASSERT_EQ(blob_files[0].GetBlobFileNumber(), blob_file_number);
1624 }
1625
1626 // Make sure it's not returned a second time.
1627 {
1628 std::vector<ObsoleteFileInfo> table_files;
1629 std::vector<ObsoleteBlobFileInfo> blob_files;
1630 std::vector<std::string> manifest_files;
1631 constexpr uint64_t min_pending_output = blob_file_number + 1;
1632
1633 versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files,
1634 min_pending_output);
1635
1636 ASSERT_TRUE(blob_files.empty());
1637 }
1638}
1639
1640TEST_F(VersionSetTest, WalEditsNotAppliedToVersion) {
1641 NewDB();
1642
1643 constexpr uint64_t kNumWals = 5;
1644
1645 autovector<std::unique_ptr<VersionEdit>> edits;
1646 // Add some WALs.
1647 for (uint64_t i = 1; i <= kNumWals; i++) {
1648 edits.emplace_back(new VersionEdit);
1649 // WAL's size equals its log number.
1650 edits.back()->AddWal(i, WalMetadata(i));
1651 }
1652 // Delete the first half of the WALs.
1653 edits.emplace_back(new VersionEdit);
1654 edits.back()->DeleteWalsBefore(kNumWals / 2 + 1);
1655
1656 autovector<Version*> versions;
1657 SyncPoint::GetInstance()->SetCallBack(
1658 "VersionSet::ProcessManifestWrites:NewVersion",
1659 [&](void* arg) { versions.push_back(reinterpret_cast<Version*>(arg)); });
1660 SyncPoint::GetInstance()->EnableProcessing();
1661
1e59de90 1662 ASSERT_OK(LogAndApplyToDefaultCF(edits));
20effc67
TL
1663
1664 SyncPoint::GetInstance()->DisableProcessing();
1665 SyncPoint::GetInstance()->ClearAllCallBacks();
1666
1667 // Since the edits are all WAL edits, no version should be created.
1668 ASSERT_EQ(versions.size(), 1);
1669 ASSERT_EQ(versions[0], nullptr);
1670}
1671
1672// Similar to WalEditsNotAppliedToVersion, but contains a non-WAL edit.
1673TEST_F(VersionSetTest, NonWalEditsAppliedToVersion) {
1674 NewDB();
1675
1676 const std::string kDBId = "db_db";
1677 constexpr uint64_t kNumWals = 5;
1678
1679 autovector<std::unique_ptr<VersionEdit>> edits;
1680 // Add some WALs.
1681 for (uint64_t i = 1; i <= kNumWals; i++) {
1682 edits.emplace_back(new VersionEdit);
1683 // WAL's size equals its log number.
1684 edits.back()->AddWal(i, WalMetadata(i));
1685 }
1686 // Delete the first half of the WALs.
1687 edits.emplace_back(new VersionEdit);
1688 edits.back()->DeleteWalsBefore(kNumWals / 2 + 1);
1689 edits.emplace_back(new VersionEdit);
1690 edits.back()->SetDBId(kDBId);
1691
1692 autovector<Version*> versions;
1693 SyncPoint::GetInstance()->SetCallBack(
1694 "VersionSet::ProcessManifestWrites:NewVersion",
1695 [&](void* arg) { versions.push_back(reinterpret_cast<Version*>(arg)); });
1696 SyncPoint::GetInstance()->EnableProcessing();
1697
1e59de90 1698 ASSERT_OK(LogAndApplyToDefaultCF(edits));
20effc67
TL
1699
1700 SyncPoint::GetInstance()->DisableProcessing();
1701 SyncPoint::GetInstance()->ClearAllCallBacks();
1702
1703 // Since the edits are all WAL edits, no version should be created.
1704 ASSERT_EQ(versions.size(), 1);
1705 ASSERT_NE(versions[0], nullptr);
1706}
1707
1708TEST_F(VersionSetTest, WalAddition) {
1709 NewDB();
1710
1711 constexpr WalNumber kLogNumber = 10;
1712 constexpr uint64_t kSizeInBytes = 111;
1713
1714 // A WAL is just created.
1715 {
1716 VersionEdit edit;
1717 edit.AddWal(kLogNumber);
1718
1719 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1720
1721 const auto& wals = versions_->GetWalSet().GetWals();
1722 ASSERT_EQ(wals.size(), 1);
1723 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1724 ASSERT_FALSE(wals.at(kLogNumber).HasSyncedSize());
1725 }
1726
1727 // The WAL is synced for several times before closing.
1728 {
1729 for (uint64_t size_delta = 100; size_delta > 0; size_delta /= 2) {
1730 uint64_t size = kSizeInBytes - size_delta;
1731 WalMetadata wal(size);
1732 VersionEdit edit;
1733 edit.AddWal(kLogNumber, wal);
1734
1735 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1736
1737 const auto& wals = versions_->GetWalSet().GetWals();
1738 ASSERT_EQ(wals.size(), 1);
1739 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1740 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1741 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), size);
1742 }
1743 }
1744
1745 // The WAL is closed.
1746 {
1747 WalMetadata wal(kSizeInBytes);
1748 VersionEdit edit;
1749 edit.AddWal(kLogNumber, wal);
1750
1751 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1752
1753 const auto& wals = versions_->GetWalSet().GetWals();
1754 ASSERT_EQ(wals.size(), 1);
1755 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1756 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1757 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSizeInBytes);
1758 }
1759
1760 // Recover a new VersionSet.
1761 {
1762 std::unique_ptr<VersionSet> new_versions(
1763 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1764 &write_buffer_manager_, &write_controller_,
1e59de90
TL
1765 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1766 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
1767 ASSERT_OK(new_versions->Recover(column_families_, /*read_only=*/false));
1768 const auto& wals = new_versions->GetWalSet().GetWals();
1769 ASSERT_EQ(wals.size(), 1);
1770 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1771 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1772 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSizeInBytes);
1773 }
1774}
1775
1776TEST_F(VersionSetTest, WalCloseWithoutSync) {
1777 NewDB();
1778
1779 constexpr WalNumber kLogNumber = 10;
1780 constexpr uint64_t kSizeInBytes = 111;
1781 constexpr uint64_t kSyncedSizeInBytes = kSizeInBytes / 2;
1782
1783 // A WAL is just created.
1784 {
1785 VersionEdit edit;
1786 edit.AddWal(kLogNumber);
1787
1788 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1789
1790 const auto& wals = versions_->GetWalSet().GetWals();
1791 ASSERT_EQ(wals.size(), 1);
1792 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1793 ASSERT_FALSE(wals.at(kLogNumber).HasSyncedSize());
1794 }
1795
1796 // The WAL is synced before closing.
1797 {
1798 WalMetadata wal(kSyncedSizeInBytes);
1799 VersionEdit edit;
1800 edit.AddWal(kLogNumber, wal);
1801
1802 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1803
1804 const auto& wals = versions_->GetWalSet().GetWals();
1805 ASSERT_EQ(wals.size(), 1);
1806 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1807 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1808 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes);
1809 }
1810
1811 // A new WAL with larger log number is created,
1812 // implicitly marking the current WAL closed.
1813 {
1814 VersionEdit edit;
1815 edit.AddWal(kLogNumber + 1);
1816 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1817
1818 const auto& wals = versions_->GetWalSet().GetWals();
1819 ASSERT_EQ(wals.size(), 2);
1820 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1821 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1822 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes);
1823 ASSERT_TRUE(wals.find(kLogNumber + 1) != wals.end());
1824 ASSERT_FALSE(wals.at(kLogNumber + 1).HasSyncedSize());
1825 }
1826
1827 // Recover a new VersionSet.
1828 {
1829 std::unique_ptr<VersionSet> new_versions(
1830 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1831 &write_buffer_manager_, &write_controller_,
1e59de90
TL
1832 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1833 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
1834 ASSERT_OK(new_versions->Recover(column_families_, false));
1835 const auto& wals = new_versions->GetWalSet().GetWals();
1836 ASSERT_EQ(wals.size(), 2);
1837 ASSERT_TRUE(wals.find(kLogNumber) != wals.end());
1838 ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize());
1839 ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes);
1840 }
1841}
1842
1843TEST_F(VersionSetTest, WalDeletion) {
1844 NewDB();
1845
1846 constexpr WalNumber kClosedLogNumber = 10;
1847 constexpr WalNumber kNonClosedLogNumber = 20;
1848 constexpr uint64_t kSizeInBytes = 111;
1849
1850 // Add a non-closed and a closed WAL.
1851 {
1852 VersionEdit edit;
1853 edit.AddWal(kClosedLogNumber, WalMetadata(kSizeInBytes));
1854 edit.AddWal(kNonClosedLogNumber);
1855
1856 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1857
1858 const auto& wals = versions_->GetWalSet().GetWals();
1859 ASSERT_EQ(wals.size(), 2);
1860 ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end());
1861 ASSERT_TRUE(wals.find(kClosedLogNumber) != wals.end());
1862 ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize());
1863 ASSERT_TRUE(wals.at(kClosedLogNumber).HasSyncedSize());
1864 ASSERT_EQ(wals.at(kClosedLogNumber).GetSyncedSizeInBytes(), kSizeInBytes);
1865 }
1866
1867 // Delete the closed WAL.
1868 {
1869 VersionEdit edit;
1870 edit.DeleteWalsBefore(kNonClosedLogNumber);
1871
1872 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1873
1874 const auto& wals = versions_->GetWalSet().GetWals();
1875 ASSERT_EQ(wals.size(), 1);
1876 ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end());
1877 ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize());
1878 }
1879
1880 // Recover a new VersionSet, only the non-closed WAL should show up.
1881 {
1882 std::unique_ptr<VersionSet> new_versions(
1883 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1884 &write_buffer_manager_, &write_controller_,
1e59de90
TL
1885 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1886 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
1887 ASSERT_OK(new_versions->Recover(column_families_, false));
1888 const auto& wals = new_versions->GetWalSet().GetWals();
1889 ASSERT_EQ(wals.size(), 1);
1890 ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end());
1891 ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize());
1892 }
1893
1894 // Force the creation of a new MANIFEST file,
1895 // only the non-closed WAL should be written to the new MANIFEST.
1896 {
1897 std::vector<WalAddition> wal_additions;
1898 SyncPoint::GetInstance()->SetCallBack(
1899 "VersionSet::WriteCurrentStateToManifest:SaveWal", [&](void* arg) {
1900 VersionEdit* edit = reinterpret_cast<VersionEdit*>(arg);
1901 ASSERT_TRUE(edit->IsWalAddition());
1902 for (auto& addition : edit->GetWalAdditions()) {
1903 wal_additions.push_back(addition);
1904 }
1905 });
1906 SyncPoint::GetInstance()->EnableProcessing();
1907
1908 CreateNewManifest();
1909
1910 SyncPoint::GetInstance()->DisableProcessing();
1911 SyncPoint::GetInstance()->ClearAllCallBacks();
1912
1913 ASSERT_EQ(wal_additions.size(), 1);
1914 ASSERT_EQ(wal_additions[0].GetLogNumber(), kNonClosedLogNumber);
1915 ASSERT_FALSE(wal_additions[0].GetMetadata().HasSyncedSize());
1916 }
1917
1918 // Recover from the new MANIFEST, only the non-closed WAL should show up.
1919 {
1920 std::unique_ptr<VersionSet> new_versions(
1921 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
1922 &write_buffer_manager_, &write_controller_,
1e59de90
TL
1923 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
1924 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
1925 ASSERT_OK(new_versions->Recover(column_families_, false));
1926 const auto& wals = new_versions->GetWalSet().GetWals();
1927 ASSERT_EQ(wals.size(), 1);
1928 ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end());
1929 ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize());
1930 }
1931}
1932
1933TEST_F(VersionSetTest, WalCreateTwice) {
1934 NewDB();
1935
1936 constexpr WalNumber kLogNumber = 10;
1937
1938 VersionEdit edit;
1939 edit.AddWal(kLogNumber);
1940
1941 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1942
1943 Status s = LogAndApplyToDefaultCF(edit);
1944 ASSERT_TRUE(s.IsCorruption());
1945 ASSERT_TRUE(s.ToString().find("WAL 10 is created more than once") !=
1946 std::string::npos)
1947 << s.ToString();
1948}
1949
1950TEST_F(VersionSetTest, WalCreateAfterClose) {
1951 NewDB();
1952
1953 constexpr WalNumber kLogNumber = 10;
1954 constexpr uint64_t kSizeInBytes = 111;
1955
1956 {
1957 // Add a closed WAL.
1958 VersionEdit edit;
1959 edit.AddWal(kLogNumber);
1960 WalMetadata wal(kSizeInBytes);
1961 edit.AddWal(kLogNumber, wal);
1962
1963 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1964 }
1965
1966 {
1967 // Create the same WAL again.
1968 VersionEdit edit;
1969 edit.AddWal(kLogNumber);
1970
1971 Status s = LogAndApplyToDefaultCF(edit);
1972 ASSERT_TRUE(s.IsCorruption());
1973 ASSERT_TRUE(s.ToString().find("WAL 10 is created more than once") !=
1974 std::string::npos)
1975 << s.ToString();
1976 }
1977}
1978
1979TEST_F(VersionSetTest, AddWalWithSmallerSize) {
1980 NewDB();
1e59de90 1981 assert(versions_);
20effc67
TL
1982
1983 constexpr WalNumber kLogNumber = 10;
1984 constexpr uint64_t kSizeInBytes = 111;
1985
1986 {
1987 // Add a closed WAL.
1988 VersionEdit edit;
1989 WalMetadata wal(kSizeInBytes);
1990 edit.AddWal(kLogNumber, wal);
1991
1992 ASSERT_OK(LogAndApplyToDefaultCF(edit));
1993 }
1e59de90
TL
1994 // Copy for future comparison.
1995 const std::map<WalNumber, WalMetadata> wals1 =
1996 versions_->GetWalSet().GetWals();
20effc67
TL
1997
1998 {
1999 // Add the same WAL with smaller synced size.
2000 VersionEdit edit;
2001 WalMetadata wal(kSizeInBytes / 2);
2002 edit.AddWal(kLogNumber, wal);
2003
2004 Status s = LogAndApplyToDefaultCF(edit);
1e59de90 2005 ASSERT_OK(s);
20effc67 2006 }
1e59de90
TL
2007 const std::map<WalNumber, WalMetadata> wals2 =
2008 versions_->GetWalSet().GetWals();
2009 ASSERT_EQ(wals1, wals2);
20effc67
TL
2010}
2011
2012TEST_F(VersionSetTest, DeleteWalsBeforeNonExistingWalNumber) {
2013 NewDB();
2014
2015 constexpr WalNumber kLogNumber0 = 10;
2016 constexpr WalNumber kLogNumber1 = 20;
2017 constexpr WalNumber kNonExistingNumber = 15;
2018 constexpr uint64_t kSizeInBytes = 111;
2019
2020 {
2021 // Add closed WALs.
2022 VersionEdit edit;
2023 WalMetadata wal(kSizeInBytes);
2024 edit.AddWal(kLogNumber0, wal);
2025 edit.AddWal(kLogNumber1, wal);
2026
2027 ASSERT_OK(LogAndApplyToDefaultCF(edit));
2028 }
2029
2030 {
2031 // Delete WALs before a non-existing WAL.
2032 VersionEdit edit;
2033 edit.DeleteWalsBefore(kNonExistingNumber);
2034
2035 ASSERT_OK(LogAndApplyToDefaultCF(edit));
2036 }
2037
2038 // Recover a new VersionSet, WAL0 is deleted, WAL1 is not.
2039 {
2040 std::unique_ptr<VersionSet> new_versions(
2041 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
2042 &write_buffer_manager_, &write_controller_,
1e59de90
TL
2043 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
2044 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
2045 ASSERT_OK(new_versions->Recover(column_families_, false));
2046 const auto& wals = new_versions->GetWalSet().GetWals();
2047 ASSERT_EQ(wals.size(), 1);
2048 ASSERT_TRUE(wals.find(kLogNumber1) != wals.end());
2049 }
2050}
2051
2052TEST_F(VersionSetTest, DeleteAllWals) {
2053 NewDB();
2054
2055 constexpr WalNumber kMaxLogNumber = 10;
2056 constexpr uint64_t kSizeInBytes = 111;
2057
2058 {
2059 // Add a closed WAL.
2060 VersionEdit edit;
2061 WalMetadata wal(kSizeInBytes);
2062 edit.AddWal(kMaxLogNumber, wal);
2063
2064 ASSERT_OK(LogAndApplyToDefaultCF(edit));
2065 }
2066
2067 {
2068 VersionEdit edit;
2069 edit.DeleteWalsBefore(kMaxLogNumber + 10);
2070
2071 ASSERT_OK(LogAndApplyToDefaultCF(edit));
2072 }
2073
2074 // Recover a new VersionSet, all WALs are deleted.
2075 {
2076 std::unique_ptr<VersionSet> new_versions(
2077 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
2078 &write_buffer_manager_, &write_controller_,
1e59de90
TL
2079 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
2080 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
2081 ASSERT_OK(new_versions->Recover(column_families_, false));
2082 const auto& wals = new_versions->GetWalSet().GetWals();
2083 ASSERT_EQ(wals.size(), 0);
2084 }
2085}
2086
2087TEST_F(VersionSetTest, AtomicGroupWithWalEdits) {
2088 NewDB();
2089
2090 constexpr int kAtomicGroupSize = 7;
2091 constexpr uint64_t kNumWals = 5;
2092 const std::string kDBId = "db_db";
2093
2094 int remaining = kAtomicGroupSize;
2095 autovector<std::unique_ptr<VersionEdit>> edits;
2096 // Add 5 WALs.
2097 for (uint64_t i = 1; i <= kNumWals; i++) {
2098 edits.emplace_back(new VersionEdit);
2099 // WAL's size equals its log number.
2100 edits.back()->AddWal(i, WalMetadata(i));
2101 edits.back()->MarkAtomicGroup(--remaining);
2102 }
2103 // One edit with the min log number set.
2104 edits.emplace_back(new VersionEdit);
2105 edits.back()->SetDBId(kDBId);
2106 edits.back()->MarkAtomicGroup(--remaining);
2107 // Delete the first added 4 WALs.
2108 edits.emplace_back(new VersionEdit);
2109 edits.back()->DeleteWalsBefore(kNumWals);
2110 edits.back()->MarkAtomicGroup(--remaining);
2111 ASSERT_EQ(remaining, 0);
2112
1e59de90 2113 ASSERT_OK(LogAndApplyToDefaultCF(edits));
20effc67
TL
2114
2115 // Recover a new VersionSet, the min log number and the last WAL should be
2116 // kept.
2117 {
2118 std::unique_ptr<VersionSet> new_versions(
2119 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
2120 &write_buffer_manager_, &write_controller_,
1e59de90
TL
2121 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
2122 /*db_id*/ "", /*db_session_id*/ ""));
20effc67
TL
2123 std::string db_id;
2124 ASSERT_OK(
2125 new_versions->Recover(column_families_, /*read_only=*/false, &db_id));
2126
2127 ASSERT_EQ(db_id, kDBId);
2128
2129 const auto& wals = new_versions->GetWalSet().GetWals();
2130 ASSERT_EQ(wals.size(), 1);
2131 ASSERT_TRUE(wals.find(kNumWals) != wals.end());
2132 ASSERT_TRUE(wals.at(kNumWals).HasSyncedSize());
2133 ASSERT_EQ(wals.at(kNumWals).GetSyncedSizeInBytes(), kNumWals);
2134 }
2135}
2136
1e59de90
TL
2137class VersionSetWithTimestampTest : public VersionSetTest {
2138 public:
2139 static const std::string kNewCfName;
2140
2141 explicit VersionSetWithTimestampTest() : VersionSetTest() {}
2142
2143 void SetUp() override {
2144 NewDB();
2145 Options options;
2146 options.comparator = test::BytewiseComparatorWithU64TsWrapper();
2147 cfd_ = CreateColumnFamily(kNewCfName, options);
2148 EXPECT_NE(nullptr, cfd_);
2149 EXPECT_NE(nullptr, cfd_->GetLatestMutableCFOptions());
2150 column_families_.emplace_back(kNewCfName, options);
2151 }
2152
2153 void TearDown() override {
2154 for (auto* e : edits_) {
2155 delete e;
2156 }
2157 edits_.clear();
2158 }
2159
2160 void GenVersionEditsToSetFullHistoryTsLow(
2161 const std::vector<uint64_t>& ts_lbs) {
2162 for (const auto ts_lb : ts_lbs) {
2163 VersionEdit* edit = new VersionEdit;
2164 edit->SetColumnFamily(cfd_->GetID());
2165 std::string ts_str = test::EncodeInt(ts_lb);
2166 edit->SetFullHistoryTsLow(ts_str);
2167 edits_.emplace_back(edit);
2168 }
2169 }
2170
2171 void VerifyFullHistoryTsLow(uint64_t expected_ts_low) {
2172 std::unique_ptr<VersionSet> vset(
2173 new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
2174 &write_buffer_manager_, &write_controller_,
2175 /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
2176 /*db_id*/ "", /*db_session_id*/ ""));
2177 ASSERT_OK(vset->Recover(column_families_, /*read_only=*/false,
2178 /*db_id=*/nullptr));
2179 for (auto* cfd : *(vset->GetColumnFamilySet())) {
2180 ASSERT_NE(nullptr, cfd);
2181 if (cfd->GetName() == kNewCfName) {
2182 ASSERT_EQ(test::EncodeInt(expected_ts_low), cfd->GetFullHistoryTsLow());
2183 } else {
2184 ASSERT_TRUE(cfd->GetFullHistoryTsLow().empty());
2185 }
2186 }
2187 }
2188
2189 void DoTest(const std::vector<uint64_t>& ts_lbs) {
2190 if (ts_lbs.empty()) {
2191 return;
2192 }
2193
2194 GenVersionEditsToSetFullHistoryTsLow(ts_lbs);
2195
2196 Status s;
2197 mutex_.Lock();
2198 s = versions_->LogAndApply(cfd_, *(cfd_->GetLatestMutableCFOptions()),
2199 edits_, &mutex_, nullptr);
2200 mutex_.Unlock();
2201 ASSERT_OK(s);
2202 VerifyFullHistoryTsLow(*std::max_element(ts_lbs.begin(), ts_lbs.end()));
2203 }
2204
2205 protected:
2206 ColumnFamilyData* cfd_{nullptr};
2207 // edits_ must contain and own pointers to heap-alloc VersionEdit objects.
2208 autovector<VersionEdit*> edits_;
2209};
2210
2211const std::string VersionSetWithTimestampTest::kNewCfName("new_cf");
2212
2213TEST_F(VersionSetWithTimestampTest, SetFullHistoryTsLbOnce) {
2214 constexpr uint64_t kTsLow = 100;
2215 DoTest({kTsLow});
2216}
2217
2218// Simulate the application increasing full_history_ts_low.
2219TEST_F(VersionSetWithTimestampTest, IncreaseFullHistoryTsLb) {
2220 const std::vector<uint64_t> ts_lbs = {100, 101, 102, 103};
2221 DoTest(ts_lbs);
2222}
2223
2224// Simulate the application trying to decrease full_history_ts_low
2225// unsuccessfully. If the application calls public API sequentially to
2226// decrease the lower bound ts, RocksDB will return an InvalidArgument
2227// status before involving VersionSet. Only when multiple threads trying
2228// to decrease the lower bound concurrently will this case ever happen. Even
2229// so, the lower bound cannot be decreased. The application will be notified
2230// via return value of the API.
2231TEST_F(VersionSetWithTimestampTest, TryDecreaseFullHistoryTsLb) {
2232 const std::vector<uint64_t> ts_lbs = {103, 102, 101, 100};
2233 DoTest(ts_lbs);
2234}
2235
f67539c2
TL
2236class VersionSetAtomicGroupTest : public VersionSetTestBase,
2237 public testing::Test {
2238 public:
20effc67
TL
2239 VersionSetAtomicGroupTest()
2240 : VersionSetTestBase("version_set_atomic_group_test") {}
494da23a 2241
f67539c2
TL
2242 void SetUp() override {
2243 PrepareManifest(&column_families_, &last_seqno_, &log_writer_);
2244 SetupTestSyncPoints();
494da23a 2245 }
494da23a 2246
f67539c2
TL
2247 void SetupValidAtomicGroup(int atomic_group_size) {
2248 edits_.resize(atomic_group_size);
2249 int remaining = atomic_group_size;
2250 for (size_t i = 0; i != edits_.size(); ++i) {
2251 edits_[i].SetLogNumber(0);
2252 edits_[i].SetNextFile(2);
2253 edits_[i].MarkAtomicGroup(--remaining);
2254 edits_[i].SetLastSequence(last_seqno_++);
2255 }
20effc67 2256 ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
f67539c2 2257 }
494da23a 2258
f67539c2
TL
2259 void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) {
2260 edits_.resize(atomic_group_size);
2261 int remaining = atomic_group_size;
2262 for (size_t i = 0; i != edits_.size(); ++i) {
2263 edits_[i].SetLogNumber(0);
2264 edits_[i].SetNextFile(2);
2265 edits_[i].MarkAtomicGroup(--remaining);
2266 edits_[i].SetLastSequence(last_seqno_++);
2267 }
20effc67 2268 ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
f67539c2 2269 }
494da23a 2270
f67539c2
TL
2271 void SetupCorruptedAtomicGroup(int atomic_group_size) {
2272 edits_.resize(atomic_group_size);
2273 int remaining = atomic_group_size;
2274 for (size_t i = 0; i != edits_.size(); ++i) {
2275 edits_[i].SetLogNumber(0);
2276 edits_[i].SetNextFile(2);
2277 if (i != ((size_t)atomic_group_size / 2)) {
2278 edits_[i].MarkAtomicGroup(--remaining);
2279 }
2280 edits_[i].SetLastSequence(last_seqno_++);
2281 }
20effc67 2282 ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
f67539c2 2283 }
494da23a 2284
f67539c2
TL
2285 void SetupIncorrectAtomicGroup(int atomic_group_size) {
2286 edits_.resize(atomic_group_size);
2287 int remaining = atomic_group_size;
2288 for (size_t i = 0; i != edits_.size(); ++i) {
2289 edits_[i].SetLogNumber(0);
2290 edits_[i].SetNextFile(2);
2291 if (i != 1) {
2292 edits_[i].MarkAtomicGroup(--remaining);
2293 } else {
2294 edits_[i].MarkAtomicGroup(remaining--);
2295 }
2296 edits_[i].SetLastSequence(last_seqno_++);
2297 }
20effc67 2298 ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
f67539c2 2299 }
494da23a 2300
f67539c2
TL
2301 void SetupTestSyncPoints() {
2302 SyncPoint::GetInstance()->DisableProcessing();
2303 SyncPoint::GetInstance()->ClearAllCallBacks();
2304 SyncPoint::GetInstance()->SetCallBack(
2305 "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", [&](void* arg) {
2306 VersionEdit* e = reinterpret_cast<VersionEdit*>(arg);
2307 EXPECT_EQ(edits_.front().DebugString(),
2308 e->DebugString()); // compare based on value
2309 first_in_atomic_group_ = true;
2310 });
2311 SyncPoint::GetInstance()->SetCallBack(
2312 "AtomicGroupReadBuffer::AddEdit:LastInAtomicGroup", [&](void* arg) {
2313 VersionEdit* e = reinterpret_cast<VersionEdit*>(arg);
2314 EXPECT_EQ(edits_.back().DebugString(),
2315 e->DebugString()); // compare based on value
2316 EXPECT_TRUE(first_in_atomic_group_);
2317 last_in_atomic_group_ = true;
2318 });
20effc67
TL
2319 SyncPoint::GetInstance()->SetCallBack(
2320 "VersionEditHandlerBase::Iterate:Finish", [&](void* arg) {
1e59de90 2321 num_recovered_edits_ = *reinterpret_cast<size_t*>(arg);
20effc67 2322 });
f67539c2
TL
2323 SyncPoint::GetInstance()->SetCallBack(
2324 "AtomicGroupReadBuffer::AddEdit:AtomicGroup",
2325 [&](void* /* arg */) { ++num_edits_in_atomic_group_; });
2326 SyncPoint::GetInstance()->SetCallBack(
2327 "AtomicGroupReadBuffer::AddEdit:AtomicGroupMixedWithNormalEdits",
2328 [&](void* arg) {
2329 corrupted_edit_ = *reinterpret_cast<VersionEdit*>(arg);
2330 });
2331 SyncPoint::GetInstance()->SetCallBack(
2332 "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize",
2333 [&](void* arg) {
2334 edit_with_incorrect_group_size_ =
2335 *reinterpret_cast<VersionEdit*>(arg);
2336 });
2337 SyncPoint::GetInstance()->EnableProcessing();
2338 }
494da23a 2339
f67539c2
TL
2340 void AddNewEditsToLog(int num_edits) {
2341 for (int i = 0; i < num_edits; i++) {
2342 std::string record;
2343 edits_[i].EncodeTo(&record);
2344 ASSERT_OK(log_writer_->AddRecord(record));
2345 }
494da23a 2346 }
f67539c2
TL
2347
2348 void TearDown() override {
2349 SyncPoint::GetInstance()->DisableProcessing();
2350 SyncPoint::GetInstance()->ClearAllCallBacks();
2351 log_writer_.reset();
494da23a 2352 }
494da23a 2353
f67539c2
TL
2354 protected:
2355 std::vector<ColumnFamilyDescriptor> column_families_;
2356 SequenceNumber last_seqno_;
2357 std::vector<VersionEdit> edits_;
2358 bool first_in_atomic_group_ = false;
2359 bool last_in_atomic_group_ = false;
2360 int num_edits_in_atomic_group_ = 0;
1e59de90 2361 size_t num_recovered_edits_ = 0;
f67539c2
TL
2362 VersionEdit corrupted_edit_;
2363 VersionEdit edit_with_incorrect_group_size_;
2364 std::unique_ptr<log::Writer> log_writer_;
2365};
494da23a 2366
f67539c2
TL
2367TEST_F(VersionSetAtomicGroupTest, HandleValidAtomicGroupWithVersionSetRecover) {
2368 const int kAtomicGroupSize = 3;
2369 SetupValidAtomicGroup(kAtomicGroupSize);
2370 AddNewEditsToLog(kAtomicGroupSize);
2371 EXPECT_OK(versions_->Recover(column_families_, false));
2372 EXPECT_EQ(column_families_.size(),
2373 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2374 EXPECT_TRUE(first_in_atomic_group_);
2375 EXPECT_TRUE(last_in_atomic_group_);
2376 EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_);
f67539c2 2377}
494da23a 2378
f67539c2
TL
2379TEST_F(VersionSetAtomicGroupTest,
2380 HandleValidAtomicGroupWithReactiveVersionSetRecover) {
2381 const int kAtomicGroupSize = 3;
2382 SetupValidAtomicGroup(kAtomicGroupSize);
2383 AddNewEditsToLog(kAtomicGroupSize);
2384 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2385 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2386 std::unique_ptr<Status> manifest_reader_status;
2387 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2388 &manifest_reporter,
2389 &manifest_reader_status));
2390 EXPECT_EQ(column_families_.size(),
2391 reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2392 EXPECT_TRUE(first_in_atomic_group_);
2393 EXPECT_TRUE(last_in_atomic_group_);
2394 // The recover should clean up the replay buffer.
2395 EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0);
2396 EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0);
2397 EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_);
f67539c2 2398}
494da23a 2399
f67539c2
TL
2400TEST_F(VersionSetAtomicGroupTest,
2401 HandleValidAtomicGroupWithReactiveVersionSetReadAndApply) {
2402 const int kAtomicGroupSize = 3;
2403 SetupValidAtomicGroup(kAtomicGroupSize);
2404 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2405 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2406 std::unique_ptr<Status> manifest_reader_status;
2407 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2408 &manifest_reporter,
2409 &manifest_reader_status));
1e59de90 2410 EXPECT_EQ(num_initial_edits_, num_recovered_edits_);
f67539c2
TL
2411 AddNewEditsToLog(kAtomicGroupSize);
2412 InstrumentedMutex mu;
2413 std::unordered_set<ColumnFamilyData*> cfds_changed;
2414 mu.Lock();
1e59de90
TL
2415 EXPECT_OK(reactive_versions_->ReadAndApply(
2416 &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed));
f67539c2
TL
2417 mu.Unlock();
2418 EXPECT_TRUE(first_in_atomic_group_);
2419 EXPECT_TRUE(last_in_atomic_group_);
2420 // The recover should clean up the replay buffer.
2421 EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0);
2422 EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0);
1e59de90 2423 EXPECT_EQ(kAtomicGroupSize, num_recovered_edits_);
f67539c2 2424}
494da23a 2425
f67539c2
TL
2426TEST_F(VersionSetAtomicGroupTest,
2427 HandleIncompleteTrailingAtomicGroupWithVersionSetRecover) {
2428 const int kAtomicGroupSize = 4;
2429 const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1;
2430 SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize);
2431 AddNewEditsToLog(kNumberOfPersistedVersionEdits);
2432 EXPECT_OK(versions_->Recover(column_families_, false));
2433 EXPECT_EQ(column_families_.size(),
494da23a 2434 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
f67539c2
TL
2435 EXPECT_TRUE(first_in_atomic_group_);
2436 EXPECT_FALSE(last_in_atomic_group_);
2437 EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_);
2438 EXPECT_EQ(num_initial_edits_, num_recovered_edits_);
494da23a
TL
2439}
2440
f67539c2
TL
2441TEST_F(VersionSetAtomicGroupTest,
2442 HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetRecover) {
494da23a 2443 const int kAtomicGroupSize = 4;
f67539c2
TL
2444 const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1;
2445 SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize);
2446 AddNewEditsToLog(kNumberOfPersistedVersionEdits);
2447 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2448 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2449 std::unique_ptr<Status> manifest_reader_status;
2450 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2451 &manifest_reporter,
2452 &manifest_reader_status));
2453 EXPECT_EQ(column_families_.size(),
2454 reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2455 EXPECT_TRUE(first_in_atomic_group_);
2456 EXPECT_FALSE(last_in_atomic_group_);
2457 EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_);
2458 // Reactive version set should store the edits in the replay buffer.
2459 EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() ==
2460 kNumberOfPersistedVersionEdits);
2461 EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize);
2462 // Write the last record. The reactive version set should now apply all
2463 // edits.
2464 std::string last_record;
2465 edits_[kAtomicGroupSize - 1].EncodeTo(&last_record);
2466 EXPECT_OK(log_writer_->AddRecord(last_record));
2467 InstrumentedMutex mu;
2468 std::unordered_set<ColumnFamilyData*> cfds_changed;
2469 mu.Lock();
1e59de90
TL
2470 EXPECT_OK(reactive_versions_->ReadAndApply(
2471 &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed));
f67539c2
TL
2472 mu.Unlock();
2473 // Reactive version set should be empty now.
2474 EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0);
2475 EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0);
2476 EXPECT_EQ(num_initial_edits_, num_recovered_edits_);
f67539c2 2477}
494da23a 2478
f67539c2
TL
2479TEST_F(VersionSetAtomicGroupTest,
2480 HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetReadAndApply) {
2481 const int kAtomicGroupSize = 4;
2482 const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1;
2483 SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize);
2484 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2485 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2486 std::unique_ptr<Status> manifest_reader_status;
2487 // No edits in an atomic group.
2488 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2489 &manifest_reporter,
2490 &manifest_reader_status));
2491 EXPECT_EQ(column_families_.size(),
2492 reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
1e59de90 2493 EXPECT_EQ(num_initial_edits_, num_recovered_edits_);
f67539c2
TL
2494 // Write a few edits in an atomic group.
2495 AddNewEditsToLog(kNumberOfPersistedVersionEdits);
2496 InstrumentedMutex mu;
2497 std::unordered_set<ColumnFamilyData*> cfds_changed;
2498 mu.Lock();
1e59de90
TL
2499 EXPECT_OK(reactive_versions_->ReadAndApply(
2500 &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed));
f67539c2
TL
2501 mu.Unlock();
2502 EXPECT_TRUE(first_in_atomic_group_);
2503 EXPECT_FALSE(last_in_atomic_group_);
2504 EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_);
2505 // Reactive version set should store the edits in the replay buffer.
2506 EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() ==
2507 kNumberOfPersistedVersionEdits);
2508 EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize);
f67539c2 2509}
494da23a 2510
f67539c2
TL
2511TEST_F(VersionSetAtomicGroupTest,
2512 HandleCorruptedAtomicGroupWithVersionSetRecover) {
2513 const int kAtomicGroupSize = 4;
2514 SetupCorruptedAtomicGroup(kAtomicGroupSize);
2515 AddNewEditsToLog(kAtomicGroupSize);
2516 EXPECT_NOK(versions_->Recover(column_families_, false));
2517 EXPECT_EQ(column_families_.size(),
494da23a 2518 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
f67539c2
TL
2519 EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(),
2520 corrupted_edit_.DebugString());
494da23a
TL
2521}
2522
f67539c2
TL
2523TEST_F(VersionSetAtomicGroupTest,
2524 HandleCorruptedAtomicGroupWithReactiveVersionSetRecover) {
2525 const int kAtomicGroupSize = 4;
2526 SetupCorruptedAtomicGroup(kAtomicGroupSize);
2527 AddNewEditsToLog(kAtomicGroupSize);
2528 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2529 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2530 std::unique_ptr<Status> manifest_reader_status;
2531 EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader,
2532 &manifest_reporter,
2533 &manifest_reader_status));
2534 EXPECT_EQ(column_families_.size(),
2535 reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2536 EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(),
2537 corrupted_edit_.DebugString());
2538}
494da23a 2539
f67539c2
TL
2540TEST_F(VersionSetAtomicGroupTest,
2541 HandleCorruptedAtomicGroupWithReactiveVersionSetReadAndApply) {
494da23a 2542 const int kAtomicGroupSize = 4;
f67539c2
TL
2543 SetupCorruptedAtomicGroup(kAtomicGroupSize);
2544 InstrumentedMutex mu;
2545 std::unordered_set<ColumnFamilyData*> cfds_changed;
2546 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2547 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2548 std::unique_ptr<Status> manifest_reader_status;
2549 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2550 &manifest_reporter,
2551 &manifest_reader_status));
2552 // Write the corrupted edits.
2553 AddNewEditsToLog(kAtomicGroupSize);
2554 mu.Lock();
1e59de90
TL
2555 EXPECT_NOK(reactive_versions_->ReadAndApply(
2556 &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed));
f67539c2
TL
2557 mu.Unlock();
2558 EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(),
2559 corrupted_edit_.DebugString());
2560}
494da23a 2561
f67539c2
TL
2562TEST_F(VersionSetAtomicGroupTest,
2563 HandleIncorrectAtomicGroupSizeWithVersionSetRecover) {
2564 const int kAtomicGroupSize = 4;
2565 SetupIncorrectAtomicGroup(kAtomicGroupSize);
2566 AddNewEditsToLog(kAtomicGroupSize);
2567 EXPECT_NOK(versions_->Recover(column_families_, false));
2568 EXPECT_EQ(column_families_.size(),
2569 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2570 EXPECT_EQ(edits_[1].DebugString(),
2571 edit_with_incorrect_group_size_.DebugString());
2572}
494da23a 2573
f67539c2
TL
2574TEST_F(VersionSetAtomicGroupTest,
2575 HandleIncorrectAtomicGroupSizeWithReactiveVersionSetRecover) {
2576 const int kAtomicGroupSize = 4;
2577 SetupIncorrectAtomicGroup(kAtomicGroupSize);
2578 AddNewEditsToLog(kAtomicGroupSize);
2579 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2580 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2581 std::unique_ptr<Status> manifest_reader_status;
2582 EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader,
2583 &manifest_reporter,
2584 &manifest_reader_status));
2585 EXPECT_EQ(column_families_.size(),
2586 reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2587 EXPECT_EQ(edits_[1].DebugString(),
2588 edit_with_incorrect_group_size_.DebugString());
2589}
494da23a 2590
f67539c2
TL
2591TEST_F(VersionSetAtomicGroupTest,
2592 HandleIncorrectAtomicGroupSizeWithReactiveVersionSetReadAndApply) {
2593 const int kAtomicGroupSize = 4;
2594 SetupIncorrectAtomicGroup(kAtomicGroupSize);
2595 InstrumentedMutex mu;
2596 std::unordered_set<ColumnFamilyData*> cfds_changed;
2597 std::unique_ptr<log::FragmentBufferedReader> manifest_reader;
2598 std::unique_ptr<log::Reader::Reporter> manifest_reporter;
2599 std::unique_ptr<Status> manifest_reader_status;
2600 EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader,
2601 &manifest_reporter,
2602 &manifest_reader_status));
2603 AddNewEditsToLog(kAtomicGroupSize);
2604 mu.Lock();
1e59de90
TL
2605 EXPECT_NOK(reactive_versions_->ReadAndApply(
2606 &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed));
f67539c2
TL
2607 mu.Unlock();
2608 EXPECT_EQ(edits_[1].DebugString(),
2609 edit_with_incorrect_group_size_.DebugString());
494da23a
TL
2610}
2611
2612class VersionSetTestDropOneCF : public VersionSetTestBase,
2613 public testing::TestWithParam<std::string> {
2614 public:
20effc67
TL
2615 VersionSetTestDropOneCF()
2616 : VersionSetTestBase("version_set_test_drop_one_cf") {}
494da23a
TL
2617};
2618
2619// This test simulates the following execution sequence
2620// Time thread1 bg_flush_thr
2621// | Prepare version edits (e1,e2,e3) for atomic
2622// | flush cf1, cf2, cf3
2623// | Enqueue e to drop cfi
2624// | to manifest_writers_
2625// | Enqueue (e1,e2,e3) to manifest_writers_
2626// |
2627// | Apply e,
2628// | cfi.IsDropped() is true
2629// | Apply (e1,e2,e3),
2630// | since cfi.IsDropped() == true, we need to
2631// | drop ei and write the rest to MANIFEST.
2632// V
2633//
2634// Repeat the test for i = 1, 2, 3 to simulate dropping the first, middle and
2635// last column family in an atomic group.
2636TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
2637 std::vector<ColumnFamilyDescriptor> column_families;
2638 SequenceNumber last_seqno;
2639 std::unique_ptr<log::Writer> log_writer;
2640 PrepareManifest(&column_families, &last_seqno, &log_writer);
20effc67 2641 Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
494da23a
TL
2642 ASSERT_OK(s);
2643
2644 EXPECT_OK(versions_->Recover(column_families, false /* read_only */));
2645 EXPECT_EQ(column_families.size(),
2646 versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
2647
2648 const int kAtomicGroupSize = 3;
2649 const std::vector<std::string> non_default_cf_names = {
2650 kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3};
2651
2652 // Drop one column family
2653 VersionEdit drop_cf_edit;
2654 drop_cf_edit.DropColumnFamily();
2655 const std::string cf_to_drop_name(GetParam());
2656 auto cfd_to_drop =
2657 versions_->GetColumnFamilySet()->GetColumnFamily(cf_to_drop_name);
2658 ASSERT_NE(nullptr, cfd_to_drop);
2659 // Increase its refcount because cfd_to_drop is used later, and we need to
2660 // prevent it from being deleted.
2661 cfd_to_drop->Ref();
2662 drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID());
2663 mutex_.Lock();
2664 s = versions_->LogAndApply(cfd_to_drop,
2665 *cfd_to_drop->GetLatestMutableCFOptions(),
1e59de90 2666 &drop_cf_edit, &mutex_, nullptr);
494da23a
TL
2667 mutex_.Unlock();
2668 ASSERT_OK(s);
2669
2670 std::vector<VersionEdit> edits(kAtomicGroupSize);
2671 uint32_t remaining = kAtomicGroupSize;
2672 size_t i = 0;
2673 autovector<ColumnFamilyData*> cfds;
2674 autovector<const MutableCFOptions*> mutable_cf_options_list;
2675 autovector<autovector<VersionEdit*>> edit_lists;
2676 for (const auto& cf_name : non_default_cf_names) {
2677 auto cfd = (cf_name != cf_to_drop_name)
2678 ? versions_->GetColumnFamilySet()->GetColumnFamily(cf_name)
2679 : cfd_to_drop;
2680 ASSERT_NE(nullptr, cfd);
2681 cfds.push_back(cfd);
2682 mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions());
2683 edits[i].SetColumnFamily(cfd->GetID());
2684 edits[i].SetLogNumber(0);
2685 edits[i].SetNextFile(2);
2686 edits[i].MarkAtomicGroup(--remaining);
2687 edits[i].SetLastSequence(last_seqno++);
2688 autovector<VersionEdit*> tmp_edits;
2689 tmp_edits.push_back(&edits[i]);
2690 edit_lists.emplace_back(tmp_edits);
2691 ++i;
2692 }
2693 int called = 0;
2694 SyncPoint::GetInstance()->DisableProcessing();
2695 SyncPoint::GetInstance()->ClearAllCallBacks();
2696 SyncPoint::GetInstance()->SetCallBack(
2697 "VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", [&](void* arg) {
2698 std::vector<VersionEdit*>* tmp_edits =
2699 reinterpret_cast<std::vector<VersionEdit*>*>(arg);
2700 EXPECT_EQ(kAtomicGroupSize - 1, tmp_edits->size());
2701 for (const auto e : *tmp_edits) {
2702 bool found = false;
2703 for (const auto& e2 : edits) {
2704 if (&e2 == e) {
2705 found = true;
2706 break;
2707 }
2708 }
2709 ASSERT_TRUE(found);
2710 }
2711 ++called;
2712 });
2713 SyncPoint::GetInstance()->EnableProcessing();
2714 mutex_.Lock();
1e59de90
TL
2715 s = versions_->LogAndApply(cfds, mutable_cf_options_list, edit_lists, &mutex_,
2716 nullptr);
494da23a
TL
2717 mutex_.Unlock();
2718 ASSERT_OK(s);
2719 ASSERT_EQ(1, called);
1e59de90 2720 cfd_to_drop->UnrefAndTryDelete();
494da23a
TL
2721}
2722
2723INSTANTIATE_TEST_CASE_P(
2724 AtomicGroup, VersionSetTestDropOneCF,
2725 testing::Values(VersionSetTestBase::kColumnFamilyName1,
2726 VersionSetTestBase::kColumnFamilyName2,
2727 VersionSetTestBase::kColumnFamilyName3));
20effc67
TL
2728
2729class EmptyDefaultCfNewManifest : public VersionSetTestBase,
2730 public testing::Test {
2731 public:
2732 EmptyDefaultCfNewManifest() : VersionSetTestBase("version_set_new_db_test") {}
2733 // Emulate DBImpl::NewDB()
2734 void PrepareManifest(std::vector<ColumnFamilyDescriptor>* /*column_families*/,
2735 SequenceNumber* /*last_seqno*/,
2736 std::unique_ptr<log::Writer>* log_writer) override {
2737 assert(log_writer != nullptr);
2738 VersionEdit new_db;
2739 new_db.SetLogNumber(0);
20effc67 2740 const std::string manifest_path = DescriptorFileName(dbname_, 1);
1e59de90
TL
2741 const auto& fs = env_->GetFileSystem();
2742 std::unique_ptr<WritableFileWriter> file_writer;
2743 Status s = WritableFileWriter::Create(
2744 fs, manifest_path, fs->OptimizeForManifestWrite(env_options_),
2745 &file_writer, nullptr);
20effc67 2746 ASSERT_OK(s);
20effc67
TL
2747 log_writer->reset(new log::Writer(std::move(file_writer), 0, true));
2748 std::string record;
2749 ASSERT_TRUE(new_db.EncodeTo(&record));
2750 s = (*log_writer)->AddRecord(record);
2751 ASSERT_OK(s);
2752 // Create new column family
2753 VersionEdit new_cf;
2754 new_cf.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1);
2755 new_cf.SetColumnFamily(1);
2756 new_cf.SetLastSequence(2);
2757 new_cf.SetNextFile(2);
2758 record.clear();
2759 ASSERT_TRUE(new_cf.EncodeTo(&record));
2760 s = (*log_writer)->AddRecord(record);
2761 ASSERT_OK(s);
2762 }
2763
2764 protected:
2765 bool write_dbid_to_manifest_ = false;
2766 std::unique_ptr<log::Writer> log_writer_;
2767};
2768
2769// Create db, create column family. Cf creation will switch to a new MANIFEST.
2770// Then reopen db, trying to recover.
2771TEST_F(EmptyDefaultCfNewManifest, Recover) {
2772 PrepareManifest(nullptr, nullptr, &log_writer_);
2773 log_writer_.reset();
2774 Status s =
2775 SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
2776 ASSERT_OK(s);
2777 std::string manifest_path;
2778 VerifyManifest(&manifest_path);
2779 std::vector<ColumnFamilyDescriptor> column_families;
2780 column_families.emplace_back(kDefaultColumnFamilyName, cf_options_);
2781 column_families.emplace_back(VersionSetTestBase::kColumnFamilyName1,
2782 cf_options_);
2783 std::string db_id;
2784 bool has_missing_table_file = false;
2785 s = versions_->TryRecoverFromOneManifest(
2786 manifest_path, column_families, false, &db_id, &has_missing_table_file);
2787 ASSERT_OK(s);
2788 ASSERT_FALSE(has_missing_table_file);
2789}
2790
2791class VersionSetTestEmptyDb
2792 : public VersionSetTestBase,
2793 public testing::TestWithParam<
2794 std::tuple<bool, bool, std::vector<std::string>>> {
2795 public:
2796 static const std::string kUnknownColumnFamilyName;
2797 VersionSetTestEmptyDb() : VersionSetTestBase("version_set_test_empty_db") {}
2798
2799 protected:
2800 void PrepareManifest(std::vector<ColumnFamilyDescriptor>* /*column_families*/,
2801 SequenceNumber* /*last_seqno*/,
2802 std::unique_ptr<log::Writer>* log_writer) override {
2803 assert(nullptr != log_writer);
2804 VersionEdit new_db;
2805 if (db_options_.write_dbid_to_manifest) {
2806 DBOptions tmp_db_options;
2807 tmp_db_options.env = env_;
2808 std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
2809 std::string db_id;
2810 impl->GetDbIdentityFromIdentityFile(&db_id);
2811 new_db.SetDBId(db_id);
2812 }
2813 const std::string manifest_path = DescriptorFileName(dbname_, 1);
1e59de90
TL
2814 const auto& fs = env_->GetFileSystem();
2815 std::unique_ptr<WritableFileWriter> file_writer;
2816 Status s = WritableFileWriter::Create(
2817 fs, manifest_path, fs->OptimizeForManifestWrite(env_options_),
2818 &file_writer, nullptr);
20effc67 2819 ASSERT_OK(s);
20effc67
TL
2820 {
2821 log_writer->reset(new log::Writer(std::move(file_writer), 0, false));
2822 std::string record;
2823 new_db.EncodeTo(&record);
2824 s = (*log_writer)->AddRecord(record);
2825 ASSERT_OK(s);
2826 }
2827 }
2828
2829 std::unique_ptr<log::Writer> log_writer_;
2830};
2831
2832const std::string VersionSetTestEmptyDb::kUnknownColumnFamilyName = "unknown";
2833
2834TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
2835 db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
2836 PrepareManifest(nullptr, nullptr, &log_writer_);
2837 log_writer_.reset();
2838 Status s =
2839 SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
2840 ASSERT_OK(s);
2841
2842 std::string manifest_path;
2843 VerifyManifest(&manifest_path);
2844
2845 bool read_only = std::get<1>(GetParam());
2846 const std::vector<std::string> cf_names = std::get<2>(GetParam());
2847
2848 std::vector<ColumnFamilyDescriptor> column_families;
2849 for (const auto& cf_name : cf_names) {
2850 column_families.emplace_back(cf_name, cf_options_);
2851 }
2852
2853 std::string db_id;
2854 bool has_missing_table_file = false;
2855 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
2856 read_only, &db_id,
2857 &has_missing_table_file);
2858 auto iter =
2859 std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
2860 if (iter == cf_names.end()) {
2861 ASSERT_TRUE(s.IsInvalidArgument());
2862 } else {
1e59de90 2863 ASSERT_NE(s.ToString().find(manifest_path), std::string::npos);
20effc67
TL
2864 ASSERT_TRUE(s.IsCorruption());
2865 }
2866}
2867
2868TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) {
2869 db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
2870 PrepareManifest(nullptr, nullptr, &log_writer_);
2871 // Only a subset of column families in the MANIFEST.
2872 VersionEdit new_cf1;
2873 new_cf1.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1);
2874 new_cf1.SetColumnFamily(1);
2875 Status s;
2876 {
2877 std::string record;
2878 new_cf1.EncodeTo(&record);
2879 s = log_writer_->AddRecord(record);
2880 ASSERT_OK(s);
2881 }
2882 log_writer_.reset();
2883 s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
2884 ASSERT_OK(s);
2885
2886 std::string manifest_path;
2887 VerifyManifest(&manifest_path);
2888
2889 bool read_only = std::get<1>(GetParam());
2890 const std::vector<std::string>& cf_names = std::get<2>(GetParam());
2891 std::vector<ColumnFamilyDescriptor> column_families;
2892 for (const auto& cf_name : cf_names) {
2893 column_families.emplace_back(cf_name, cf_options_);
2894 }
2895 std::string db_id;
2896 bool has_missing_table_file = false;
2897 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
2898 read_only, &db_id,
2899 &has_missing_table_file);
2900 auto iter =
2901 std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
2902 if (iter == cf_names.end()) {
2903 ASSERT_TRUE(s.IsInvalidArgument());
2904 } else {
1e59de90 2905 ASSERT_NE(s.ToString().find(manifest_path), std::string::npos);
20effc67
TL
2906 ASSERT_TRUE(s.IsCorruption());
2907 }
2908}
2909
2910TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) {
2911 db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
2912 PrepareManifest(nullptr, nullptr, &log_writer_);
2913 // Write all column families but no log_number, next_file_number and
2914 // last_sequence.
2915 const std::vector<std::string> all_cf_names = {
2916 kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
2917 kColumnFamilyName3};
2918 uint32_t cf_id = 1;
2919 Status s;
2920 for (size_t i = 1; i != all_cf_names.size(); ++i) {
2921 VersionEdit new_cf;
2922 new_cf.AddColumnFamily(all_cf_names[i]);
2923 new_cf.SetColumnFamily(cf_id++);
2924 std::string record;
2925 ASSERT_TRUE(new_cf.EncodeTo(&record));
2926 s = log_writer_->AddRecord(record);
2927 ASSERT_OK(s);
2928 }
2929 log_writer_.reset();
2930 s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
2931 ASSERT_OK(s);
2932
2933 std::string manifest_path;
2934 VerifyManifest(&manifest_path);
2935
2936 bool read_only = std::get<1>(GetParam());
2937 const std::vector<std::string>& cf_names = std::get<2>(GetParam());
2938 std::vector<ColumnFamilyDescriptor> column_families;
2939 for (const auto& cf_name : cf_names) {
2940 column_families.emplace_back(cf_name, cf_options_);
2941 }
2942 std::string db_id;
2943 bool has_missing_table_file = false;
2944 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
2945 read_only, &db_id,
2946 &has_missing_table_file);
2947 auto iter =
2948 std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
2949 if (iter == cf_names.end()) {
2950 ASSERT_TRUE(s.IsInvalidArgument());
2951 } else {
1e59de90 2952 ASSERT_NE(s.ToString().find(manifest_path), std::string::npos);
20effc67
TL
2953 ASSERT_TRUE(s.IsCorruption());
2954 }
2955}
2956
2957TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) {
2958 db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
2959 PrepareManifest(nullptr, nullptr, &log_writer_);
2960 // Write all column families but no log_number, next_file_number and
2961 // last_sequence.
2962 const std::vector<std::string> all_cf_names = {
2963 kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
2964 kColumnFamilyName3};
2965 uint32_t cf_id = 1;
2966 Status s;
2967 for (size_t i = 1; i != all_cf_names.size(); ++i) {
2968 VersionEdit new_cf;
2969 new_cf.AddColumnFamily(all_cf_names[i]);
2970 new_cf.SetColumnFamily(cf_id++);
2971 std::string record;
2972 ASSERT_TRUE(new_cf.EncodeTo(&record));
2973 s = log_writer_->AddRecord(record);
2974 ASSERT_OK(s);
2975 }
2976 {
2977 VersionEdit tmp_edit;
2978 tmp_edit.SetColumnFamily(4);
2979 tmp_edit.SetLogNumber(0);
2980 tmp_edit.SetNextFile(2);
2981 tmp_edit.SetLastSequence(0);
2982 std::string record;
2983 ASSERT_TRUE(tmp_edit.EncodeTo(&record));
2984 s = log_writer_->AddRecord(record);
2985 ASSERT_OK(s);
2986 }
2987 log_writer_.reset();
2988 s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
2989 ASSERT_OK(s);
2990
2991 std::string manifest_path;
2992 VerifyManifest(&manifest_path);
2993
2994 bool read_only = std::get<1>(GetParam());
2995 const std::vector<std::string>& cf_names = std::get<2>(GetParam());
2996 std::vector<ColumnFamilyDescriptor> column_families;
2997 for (const auto& cf_name : cf_names) {
2998 column_families.emplace_back(cf_name, cf_options_);
2999 }
3000 std::string db_id;
3001 bool has_missing_table_file = false;
3002 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
3003 read_only, &db_id,
3004 &has_missing_table_file);
3005 auto iter =
3006 std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
3007 if (iter == cf_names.end()) {
3008 ASSERT_TRUE(s.IsInvalidArgument());
3009 } else {
1e59de90 3010 ASSERT_NE(s.ToString().find(manifest_path), std::string::npos);
20effc67
TL
3011 ASSERT_TRUE(s.IsCorruption());
3012 }
3013}
3014
3015TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) {
3016 db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
3017 PrepareManifest(nullptr, nullptr, &log_writer_);
3018 // Write all column families but no log_number, next_file_number and
3019 // last_sequence.
3020 const std::vector<std::string> all_cf_names = {
3021 kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
3022 kColumnFamilyName3};
3023 uint32_t cf_id = 1;
3024 Status s;
3025 for (size_t i = 1; i != all_cf_names.size(); ++i) {
3026 VersionEdit new_cf;
3027 new_cf.AddColumnFamily(all_cf_names[i]);
3028 new_cf.SetColumnFamily(cf_id++);
3029 std::string record;
3030 ASSERT_TRUE(new_cf.EncodeTo(&record));
3031 s = log_writer_->AddRecord(record);
3032 ASSERT_OK(s);
3033 }
3034 {
3035 VersionEdit tmp_edit;
3036 tmp_edit.SetLogNumber(0);
3037 tmp_edit.SetNextFile(2);
3038 tmp_edit.SetLastSequence(0);
3039 std::string record;
3040 ASSERT_TRUE(tmp_edit.EncodeTo(&record));
3041 s = log_writer_->AddRecord(record);
3042 ASSERT_OK(s);
3043 }
3044 log_writer_.reset();
3045 s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
3046 ASSERT_OK(s);
3047
3048 std::string manifest_path;
3049 VerifyManifest(&manifest_path);
3050
3051 bool read_only = std::get<1>(GetParam());
3052 const std::vector<std::string>& cf_names = std::get<2>(GetParam());
3053 std::vector<ColumnFamilyDescriptor> column_families;
3054 for (const auto& cf_name : cf_names) {
3055 column_families.emplace_back(cf_name, cf_options_);
3056 }
3057 std::string db_id;
3058 bool has_missing_table_file = false;
3059 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
3060 read_only, &db_id,
3061 &has_missing_table_file);
3062 auto iter =
3063 std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
3064 if (iter == cf_names.end()) {
3065 ASSERT_TRUE(s.IsInvalidArgument());
3066 } else if (read_only) {
3067 ASSERT_OK(s);
3068 ASSERT_FALSE(has_missing_table_file);
3069 } else if (cf_names.size() == all_cf_names.size()) {
3070 ASSERT_OK(s);
3071 ASSERT_FALSE(has_missing_table_file);
3072 } else if (cf_names.size() < all_cf_names.size()) {
3073 ASSERT_TRUE(s.IsInvalidArgument());
3074 } else {
3075 ASSERT_OK(s);
3076 ASSERT_FALSE(has_missing_table_file);
3077 ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(
3078 kUnknownColumnFamilyName);
3079 ASSERT_EQ(nullptr, cfd);
3080 }
3081}
3082
3083INSTANTIATE_TEST_CASE_P(
3084 BestEffortRecovery, VersionSetTestEmptyDb,
3085 testing::Combine(
3086 /*write_dbid_to_manifest=*/testing::Bool(),
3087 /*read_only=*/testing::Bool(),
3088 /*cf_names=*/
3089 testing::Values(
3090 std::vector<std::string>(),
3091 std::vector<std::string>({kDefaultColumnFamilyName}),
3092 std::vector<std::string>({VersionSetTestBase::kColumnFamilyName1,
3093 VersionSetTestBase::kColumnFamilyName2,
3094 VersionSetTestBase::kColumnFamilyName3}),
3095 std::vector<std::string>({kDefaultColumnFamilyName,
3096 VersionSetTestBase::kColumnFamilyName1}),
3097 std::vector<std::string>({kDefaultColumnFamilyName,
3098 VersionSetTestBase::kColumnFamilyName1,
3099 VersionSetTestBase::kColumnFamilyName2,
3100 VersionSetTestBase::kColumnFamilyName3}),
3101 std::vector<std::string>(
3102 {kDefaultColumnFamilyName,
3103 VersionSetTestBase::kColumnFamilyName1,
3104 VersionSetTestBase::kColumnFamilyName2,
3105 VersionSetTestBase::kColumnFamilyName3,
3106 VersionSetTestEmptyDb::kUnknownColumnFamilyName}))));
3107
3108class VersionSetTestMissingFiles : public VersionSetTestBase,
3109 public testing::Test {
3110 public:
3111 VersionSetTestMissingFiles()
3112 : VersionSetTestBase("version_set_test_missing_files"),
3113 block_based_table_options_(),
3114 table_factory_(std::make_shared<BlockBasedTableFactory>(
3115 block_based_table_options_)),
3116 internal_comparator_(
3117 std::make_shared<InternalKeyComparator>(options_.comparator)) {}
3118
3119 protected:
3120 void PrepareManifest(std::vector<ColumnFamilyDescriptor>* column_families,
3121 SequenceNumber* last_seqno,
3122 std::unique_ptr<log::Writer>* log_writer) override {
3123 assert(column_families != nullptr);
3124 assert(last_seqno != nullptr);
3125 assert(log_writer != nullptr);
3126 const std::string manifest = DescriptorFileName(dbname_, 1);
1e59de90
TL
3127 const auto& fs = env_->GetFileSystem();
3128 std::unique_ptr<WritableFileWriter> file_writer;
3129 Status s = WritableFileWriter::Create(
3130 fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer,
3131 nullptr);
20effc67 3132 ASSERT_OK(s);
20effc67
TL
3133 log_writer->reset(new log::Writer(std::move(file_writer), 0, false));
3134 VersionEdit new_db;
3135 if (db_options_.write_dbid_to_manifest) {
3136 DBOptions tmp_db_options;
3137 tmp_db_options.env = env_;
3138 std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
3139 std::string db_id;
3140 impl->GetDbIdentityFromIdentityFile(&db_id);
3141 new_db.SetDBId(db_id);
3142 }
3143 {
3144 std::string record;
3145 ASSERT_TRUE(new_db.EncodeTo(&record));
3146 s = (*log_writer)->AddRecord(record);
3147 ASSERT_OK(s);
3148 }
3149 const std::vector<std::string> cf_names = {
3150 kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
3151 kColumnFamilyName3};
3152 uint32_t cf_id = 1; // default cf id is 0
3153 cf_options_.table_factory = table_factory_;
3154 for (const auto& cf_name : cf_names) {
3155 column_families->emplace_back(cf_name, cf_options_);
3156 if (cf_name == kDefaultColumnFamilyName) {
3157 continue;
3158 }
3159 VersionEdit new_cf;
3160 new_cf.AddColumnFamily(cf_name);
3161 new_cf.SetColumnFamily(cf_id);
3162 std::string record;
3163 ASSERT_TRUE(new_cf.EncodeTo(&record));
3164 s = (*log_writer)->AddRecord(record);
3165 ASSERT_OK(s);
3166
3167 VersionEdit cf_files;
3168 cf_files.SetColumnFamily(cf_id);
3169 cf_files.SetLogNumber(0);
3170 record.clear();
3171 ASSERT_TRUE(cf_files.EncodeTo(&record));
3172 s = (*log_writer)->AddRecord(record);
3173 ASSERT_OK(s);
3174 ++cf_id;
3175 }
3176 SequenceNumber seq = 2;
3177 {
3178 VersionEdit edit;
3179 edit.SetNextFile(7);
3180 edit.SetLastSequence(seq);
3181 std::string record;
3182 ASSERT_TRUE(edit.EncodeTo(&record));
3183 s = (*log_writer)->AddRecord(record);
3184 ASSERT_OK(s);
3185 }
3186 *last_seqno = seq + 1;
3187 }
3188
3189 struct SstInfo {
3190 uint64_t file_number;
3191 std::string column_family;
3192 std::string key; // the only key
3193 int level = 0;
3194 SstInfo(uint64_t file_num, const std::string& cf_name,
3195 const std::string& _key)
3196 : SstInfo(file_num, cf_name, _key, 0) {}
3197 SstInfo(uint64_t file_num, const std::string& cf_name,
3198 const std::string& _key, int lvl)
3199 : file_number(file_num),
3200 column_family(cf_name),
3201 key(_key),
3202 level(lvl) {}
3203 };
3204
3205 // Create dummy sst, return their metadata. Note that only file name and size
3206 // are used.
3207 void CreateDummyTableFiles(const std::vector<SstInfo>& file_infos,
3208 std::vector<FileMetaData>* file_metas) {
3209 assert(file_metas != nullptr);
3210 for (const auto& info : file_infos) {
3211 uint64_t file_num = info.file_number;
3212 std::string fname = MakeTableFileName(dbname_, file_num);
3213 std::unique_ptr<FSWritableFile> file;
3214 Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr);
3215 ASSERT_OK(s);
1e59de90
TL
3216 std::unique_ptr<WritableFileWriter> fwriter(new WritableFileWriter(
3217 std::move(file), fname, FileOptions(), env_->GetSystemClock().get()));
3218 IntTblPropCollectorFactories int_tbl_prop_collector_factories;
20effc67
TL
3219
3220 std::unique_ptr<TableBuilder> builder(table_factory_->NewTableBuilder(
3221 TableBuilderOptions(
1e59de90 3222 immutable_options_, mutable_cf_options_, *internal_comparator_,
20effc67 3223 &int_tbl_prop_collector_factories, kNoCompression,
1e59de90
TL
3224 CompressionOptions(),
3225 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3226 info.column_family, info.level),
20effc67
TL
3227 fwriter.get()));
3228 InternalKey ikey(info.key, 0, ValueType::kTypeValue);
3229 builder->Add(ikey.Encode(), "value");
3230 ASSERT_OK(builder->Finish());
1e59de90 3231 ASSERT_OK(fwriter->Flush());
20effc67
TL
3232 uint64_t file_size = 0;
3233 s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr);
3234 ASSERT_OK(s);
3235 ASSERT_NE(0, file_size);
1e59de90
TL
3236 file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
3237 ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
3238 0, kUnknownFileChecksum,
3239 kUnknownFileChecksumFuncName, kNullUniqueId64x2);
20effc67
TL
3240 }
3241 }
3242
3243 // This method updates last_sequence_.
3244 void WriteFileAdditionAndDeletionToManifest(
3245 uint32_t cf, const std::vector<std::pair<int, FileMetaData>>& added_files,
3246 const std::vector<std::pair<int, uint64_t>>& deleted_files) {
3247 VersionEdit edit;
3248 edit.SetColumnFamily(cf);
3249 for (const auto& elem : added_files) {
3250 int level = elem.first;
3251 edit.AddFile(level, elem.second);
3252 }
3253 for (const auto& elem : deleted_files) {
3254 int level = elem.first;
3255 edit.DeleteFile(level, elem.second);
3256 }
3257 edit.SetLastSequence(last_seqno_);
3258 ++last_seqno_;
3259 assert(log_writer_.get() != nullptr);
3260 std::string record;
3261 ASSERT_TRUE(edit.EncodeTo(&record));
3262 Status s = log_writer_->AddRecord(record);
3263 ASSERT_OK(s);
3264 }
3265
3266 BlockBasedTableOptions block_based_table_options_;
3267 std::shared_ptr<TableFactory> table_factory_;
3268 std::shared_ptr<InternalKeyComparator> internal_comparator_;
3269 std::vector<ColumnFamilyDescriptor> column_families_;
3270 SequenceNumber last_seqno_;
3271 std::unique_ptr<log::Writer> log_writer_;
3272};
3273
3274TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
3275 std::vector<SstInfo> existing_files = {
3276 SstInfo(100, kDefaultColumnFamilyName, "a"),
3277 SstInfo(102, kDefaultColumnFamilyName, "b"),
3278 SstInfo(103, kDefaultColumnFamilyName, "c"),
3279 SstInfo(107, kDefaultColumnFamilyName, "d"),
3280 SstInfo(110, kDefaultColumnFamilyName, "e")};
3281 std::vector<FileMetaData> file_metas;
3282 CreateDummyTableFiles(existing_files, &file_metas);
3283
3284 PrepareManifest(&column_families_, &last_seqno_, &log_writer_);
3285 std::vector<std::pair<int, FileMetaData>> added_files;
3286 for (uint64_t file_num = 10; file_num < 15; ++file_num) {
3287 std::string smallest_ukey = "a";
3288 std::string largest_ukey = "b";
3289 InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue);
3290 InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue);
1e59de90
TL
3291 FileMetaData meta = FileMetaData(
3292 file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3293 largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3294 kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
20effc67
TL
3295 added_files.emplace_back(0, meta);
3296 }
3297 WriteFileAdditionAndDeletionToManifest(
3298 /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
3299 std::vector<std::pair<int, uint64_t>> deleted_files;
3300 deleted_files.emplace_back(0, 10);
3301 WriteFileAdditionAndDeletionToManifest(
3302 /*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
3303 log_writer_.reset();
3304 Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
3305 ASSERT_OK(s);
3306 std::string manifest_path;
3307 VerifyManifest(&manifest_path);
3308 std::string db_id;
3309 bool has_missing_table_file = false;
3310 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
3311 /*read_only=*/false, &db_id,
3312 &has_missing_table_file);
3313 ASSERT_OK(s);
3314 ASSERT_TRUE(has_missing_table_file);
3315 for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
3316 VersionStorageInfo* vstorage = cfd->current()->storage_info();
3317 const std::vector<FileMetaData*>& files = vstorage->LevelFiles(0);
3318 ASSERT_TRUE(files.empty());
3319 }
3320}
3321
3322TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
3323 std::vector<SstInfo> existing_files = {
3324 SstInfo(100, kDefaultColumnFamilyName, "a"),
3325 SstInfo(102, kDefaultColumnFamilyName, "b"),
3326 SstInfo(103, kDefaultColumnFamilyName, "c"),
3327 SstInfo(107, kDefaultColumnFamilyName, "d"),
3328 SstInfo(110, kDefaultColumnFamilyName, "e")};
3329 std::vector<FileMetaData> file_metas;
3330 CreateDummyTableFiles(existing_files, &file_metas);
3331
3332 PrepareManifest(&column_families_, &last_seqno_, &log_writer_);
3333 std::vector<std::pair<int, FileMetaData>> added_files;
3334 for (size_t i = 3; i != 5; ++i) {
3335 added_files.emplace_back(0, file_metas[i]);
3336 }
3337 WriteFileAdditionAndDeletionToManifest(
3338 /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
3339
3340 added_files.clear();
3341 for (uint64_t file_num = 120; file_num < 130; ++file_num) {
3342 std::string smallest_ukey = "a";
3343 std::string largest_ukey = "b";
3344 InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue);
3345 InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue);
1e59de90
TL
3346 FileMetaData meta = FileMetaData(
3347 file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3348 largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3349 kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
20effc67
TL
3350 added_files.emplace_back(0, meta);
3351 }
3352 WriteFileAdditionAndDeletionToManifest(
3353 /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
3354 log_writer_.reset();
3355 Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
3356 ASSERT_OK(s);
3357 std::string manifest_path;
3358 VerifyManifest(&manifest_path);
3359 std::string db_id;
3360 bool has_missing_table_file = false;
3361 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
3362 /*read_only=*/false, &db_id,
3363 &has_missing_table_file);
3364 ASSERT_OK(s);
3365 ASSERT_TRUE(has_missing_table_file);
3366 for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
3367 VersionStorageInfo* vstorage = cfd->current()->storage_info();
3368 const std::vector<FileMetaData*>& files = vstorage->LevelFiles(0);
3369 if (cfd->GetName() == kDefaultColumnFamilyName) {
3370 ASSERT_EQ(2, files.size());
3371 for (const auto* fmeta : files) {
3372 if (fmeta->fd.GetNumber() != 107 && fmeta->fd.GetNumber() != 110) {
3373 ASSERT_FALSE(true);
3374 }
3375 }
3376 } else {
3377 ASSERT_TRUE(files.empty());
3378 }
3379 }
3380}
3381
3382TEST_F(VersionSetTestMissingFiles, NoFileMissing) {
3383 std::vector<SstInfo> existing_files = {
3384 SstInfo(100, kDefaultColumnFamilyName, "a"),
3385 SstInfo(102, kDefaultColumnFamilyName, "b"),
3386 SstInfo(103, kDefaultColumnFamilyName, "c"),
3387 SstInfo(107, kDefaultColumnFamilyName, "d"),
3388 SstInfo(110, kDefaultColumnFamilyName, "e")};
3389 std::vector<FileMetaData> file_metas;
3390 CreateDummyTableFiles(existing_files, &file_metas);
3391
3392 PrepareManifest(&column_families_, &last_seqno_, &log_writer_);
3393 std::vector<std::pair<int, FileMetaData>> added_files;
3394 for (const auto& meta : file_metas) {
3395 added_files.emplace_back(0, meta);
3396 }
3397 WriteFileAdditionAndDeletionToManifest(
3398 /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
3399 std::vector<std::pair<int, uint64_t>> deleted_files;
3400 deleted_files.emplace_back(/*level=*/0, 100);
3401 WriteFileAdditionAndDeletionToManifest(
3402 /*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
3403 log_writer_.reset();
3404 Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
3405 ASSERT_OK(s);
3406 std::string manifest_path;
3407 VerifyManifest(&manifest_path);
3408 std::string db_id;
3409 bool has_missing_table_file = false;
3410 s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
3411 /*read_only=*/false, &db_id,
3412 &has_missing_table_file);
3413 ASSERT_OK(s);
3414 ASSERT_FALSE(has_missing_table_file);
3415 for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
3416 VersionStorageInfo* vstorage = cfd->current()->storage_info();
3417 const std::vector<FileMetaData*>& files = vstorage->LevelFiles(0);
3418 if (cfd->GetName() == kDefaultColumnFamilyName) {
3419 ASSERT_EQ(existing_files.size() - deleted_files.size(), files.size());
3420 bool has_deleted_file = false;
3421 for (const auto* fmeta : files) {
3422 if (fmeta->fd.GetNumber() == 100) {
3423 has_deleted_file = true;
3424 break;
3425 }
3426 }
3427 ASSERT_FALSE(has_deleted_file);
3428 } else {
3429 ASSERT_TRUE(files.empty());
3430 }
3431 }
3432}
3433
1e59de90
TL
3434TEST_F(VersionSetTestMissingFiles, MinLogNumberToKeep2PC) {
3435 db_options_.allow_2pc = true;
3436 NewDB();
3437
3438 SstInfo sst(100, kDefaultColumnFamilyName, "a");
3439 std::vector<FileMetaData> file_metas;
3440 CreateDummyTableFiles({sst}, &file_metas);
3441
3442 constexpr WalNumber kMinWalNumberToKeep2PC = 10;
3443 VersionEdit edit;
3444 edit.AddFile(0, file_metas[0]);
3445 edit.SetMinLogNumberToKeep(kMinWalNumberToKeep2PC);
3446 ASSERT_OK(LogAndApplyToDefaultCF(edit));
3447 ASSERT_EQ(versions_->min_log_number_to_keep(), kMinWalNumberToKeep2PC);
3448
3449 for (int i = 0; i < 3; i++) {
3450 CreateNewManifest();
3451 ReopenDB();
3452 ASSERT_EQ(versions_->min_log_number_to_keep(), kMinWalNumberToKeep2PC);
3453 }
3454}
3455
3456class ChargeFileMetadataTest : public DBTestBase {
3457 public:
3458 ChargeFileMetadataTest()
3459 : DBTestBase("charge_file_metadata_test", /*env_do_fsync=*/true) {}
3460};
3461
3462class ChargeFileMetadataTestWithParam
3463 : public ChargeFileMetadataTest,
3464 public testing::WithParamInterface<CacheEntryRoleOptions::Decision> {
3465 public:
3466 ChargeFileMetadataTestWithParam() {}
3467};
3468
3469#ifndef ROCKSDB_LITE
3470INSTANTIATE_TEST_CASE_P(
3471 ChargeFileMetadataTestWithParam, ChargeFileMetadataTestWithParam,
3472 ::testing::Values(CacheEntryRoleOptions::Decision::kEnabled,
3473 CacheEntryRoleOptions::Decision::kDisabled));
3474
3475TEST_P(ChargeFileMetadataTestWithParam, Basic) {
3476 Options options;
3477 BlockBasedTableOptions table_options;
3478 CacheEntryRoleOptions::Decision charge_file_metadata = GetParam();
3479 table_options.cache_usage_options.options_overrides.insert(
3480 {CacheEntryRole::kFileMetadata, {/*.charged = */ charge_file_metadata}});
3481 std::shared_ptr<TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>
3482 file_metadata_charge_only_cache = std::make_shared<
3483 TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>(
3484 NewLRUCache(
3485 4 * CacheReservationManagerImpl<
3486 CacheEntryRole::kFileMetadata>::GetDummyEntrySize(),
3487 0 /* num_shard_bits */, true /* strict_capacity_limit */));
3488 table_options.block_cache = file_metadata_charge_only_cache;
3489 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3490 options.create_if_missing = true;
3491 options.disable_auto_compactions = true;
3492 DestroyAndReopen(options);
3493
3494 // Create 128 file metadata, each of which is roughly 1024 bytes.
3495 // This results in 1 *
3496 // CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize()
3497 // cache reservation for file metadata.
3498 for (int i = 1; i <= 128; ++i) {
3499 ASSERT_OK(Put(std::string(1024, 'a'), "va"));
3500 ASSERT_OK(Put("b", "vb"));
3501 ASSERT_OK(Flush());
3502 }
3503 if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
3504 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
3505 1 * CacheReservationManagerImpl<
3506 CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
3507
3508 } else {
3509 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
3510 }
3511
3512 // Create another 128 file metadata.
3513 // This increases the file metadata cache reservation to 2 *
3514 // CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize().
3515 for (int i = 1; i <= 128; ++i) {
3516 ASSERT_OK(Put(std::string(1024, 'a'), "vva"));
3517 ASSERT_OK(Put("b", "vvb"));
3518 ASSERT_OK(Flush());
3519 }
3520 if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
3521 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
3522 2 * CacheReservationManagerImpl<
3523 CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
3524 } else {
3525 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
3526 }
3527 // Compaction will create 1 new file metadata, obsolete and delete all 256
3528 // file metadata above. This results in 1 *
3529 // CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize()
3530 // cache reservation for file metadata.
3531 SyncPoint::GetInstance()->LoadDependency(
3532 {{"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
3533 "ChargeFileMetadataTestWithParam::"
3534 "PreVerifyingCacheReservationRelease"}});
3535 SyncPoint::GetInstance()->EnableProcessing();
3536 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
3537 ASSERT_EQ("0,1", FilesPerLevel(0));
3538 TEST_SYNC_POINT(
3539 "ChargeFileMetadataTestWithParam::PreVerifyingCacheReservationRelease");
3540 if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
3541 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
3542 1 * CacheReservationManagerImpl<
3543 CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
3544 } else {
3545 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
3546 }
3547 SyncPoint::GetInstance()->DisableProcessing();
3548
3549 // Destroying the db will delete the remaining 1 new file metadata
3550 // This results in no cache reservation for file metadata.
3551 Destroy(options);
3552 EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
3553 0 * CacheReservationManagerImpl<
3554 CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
3555
3556 // Reopen the db with a smaller cache in order to test failure in allocating
3557 // file metadata due to memory limit based on cache capacity
3558 file_metadata_charge_only_cache = std::make_shared<
3559 TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>(
3560 NewLRUCache(1 * CacheReservationManagerImpl<
3561 CacheEntryRole::kFileMetadata>::GetDummyEntrySize(),
3562 0 /* num_shard_bits */, true /* strict_capacity_limit */));
3563 table_options.block_cache = file_metadata_charge_only_cache;
3564 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3565 Reopen(options);
3566 ASSERT_OK(Put(std::string(1024, 'a'), "va"));
3567 ASSERT_OK(Put("b", "vb"));
3568 Status s = Flush();
3569 if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
3570 EXPECT_TRUE(s.IsMemoryLimit());
3571 EXPECT_TRUE(s.ToString().find(
3572 kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
3573 CacheEntryRole::kFileMetadata)]) != std::string::npos);
3574 EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") !=
3575 std::string::npos);
3576 } else {
3577 EXPECT_TRUE(s.ok());
3578 }
3579}
3580#endif // ROCKSDB_LITE
f67539c2 3581} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
3582
3583int main(int argc, char** argv) {
1e59de90 3584 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae
FG
3585 ::testing::InitGoogleTest(&argc, argv);
3586 return RUN_ALL_TESTS();
3587}