]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_sst_test.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / db / db_sst_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "db/db_test_util.h"
1e59de90 11#include "env/mock_env.h"
f67539c2 12#include "file/sst_file_manager_impl.h"
7c673cae
FG
13#include "port/port.h"
14#include "port/stack_trace.h"
1e59de90 15#include "rocksdb/cache.h"
7c673cae 16#include "rocksdb/sst_file_manager.h"
1e59de90 17#include "rocksdb/table.h"
20effc67 18#include "util/random.h"
7c673cae 19
f67539c2 20namespace ROCKSDB_NAMESPACE {
7c673cae
FG
21
22class DBSSTTest : public DBTestBase {
23 public:
1e59de90 24 DBSSTTest() : DBTestBase("db_sst_test", /*env_do_fsync=*/true) {}
7c673cae
FG
25};
26
11fdf7f2
TL
27#ifndef ROCKSDB_LITE
28// A class which remembers the name of each flushed file.
29class FlushedFileCollector : public EventListener {
30 public:
31 FlushedFileCollector() {}
494da23a 32 ~FlushedFileCollector() override {}
11fdf7f2 33
494da23a 34 void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
11fdf7f2
TL
35 std::lock_guard<std::mutex> lock(mutex_);
36 flushed_files_.push_back(info.file_path);
37 }
38
39 std::vector<std::string> GetFlushedFiles() {
40 std::lock_guard<std::mutex> lock(mutex_);
41 std::vector<std::string> result;
42 for (auto fname : flushed_files_) {
43 result.push_back(fname);
44 }
45 return result;
46 }
47 void ClearFlushedFiles() {
48 std::lock_guard<std::mutex> lock(mutex_);
49 flushed_files_.clear();
50 }
51
52 private:
53 std::vector<std::string> flushed_files_;
54 std::mutex mutex_;
55};
56#endif // ROCKSDB_LITE
57
7c673cae
FG
58TEST_F(DBSSTTest, DontDeletePendingOutputs) {
59 Options options;
60 options.env = env_;
61 options.create_if_missing = true;
62 DestroyAndReopen(options);
63
64 // Every time we write to a table file, call FOF/POF with full DB scan. This
65 // will make sure our pending_outputs_ protection work correctly
66 std::function<void()> purge_obsolete_files_function = [&]() {
67 JobContext job_context(0);
68 dbfull()->TEST_LockMutex();
69 dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
70 dbfull()->TEST_UnlockMutex();
71 dbfull()->PurgeObsoleteFiles(job_context);
72 job_context.Clean();
73 };
74
75 env_->table_write_callback_ = &purge_obsolete_files_function;
76
77 for (int i = 0; i < 2; ++i) {
78 ASSERT_OK(Put("a", "begin"));
79 ASSERT_OK(Put("z", "end"));
80 ASSERT_OK(Flush());
81 }
82
83 // If pending output guard does not work correctly, PurgeObsoleteFiles() will
84 // delete the file that Compaction is trying to create, causing this: error
85 // db/db_test.cc:975: IO error:
86 // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory
87 Compact("a", "b");
88}
89
90// 1 Create some SST files by inserting K-V pairs into DB
91// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file
92// 3 Open DB and check if all key can be read
93TEST_F(DBSSTTest, SSTsWithLdbSuffixHandling) {
94 Options options = CurrentOptions();
95 options.write_buffer_size = 110 << 10; // 110KB
96 options.num_levels = 4;
97 DestroyAndReopen(options);
98
99 Random rnd(301);
100 int key_id = 0;
101 for (int i = 0; i < 10; ++i) {
102 GenerateNewFile(&rnd, &key_id, false);
103 }
1e59de90 104 ASSERT_OK(Flush());
7c673cae
FG
105 Close();
106 int const num_files = GetSstFileCount(dbname_);
107 ASSERT_GT(num_files, 0);
108
f67539c2
TL
109 Reopen(options);
110 std::vector<std::string> values;
111 values.reserve(key_id);
112 for (int k = 0; k < key_id; ++k) {
113 values.push_back(Get(Key(k)));
114 }
115 Close();
116
7c673cae 117 std::vector<std::string> filenames;
11fdf7f2 118 GetSstFiles(env_, dbname_, &filenames);
7c673cae
FG
119 int num_ldb_files = 0;
120 for (size_t i = 0; i < filenames.size(); ++i) {
121 if (i & 1) {
122 continue;
123 }
124 std::string const rdb_name = dbname_ + "/" + filenames[i];
125 std::string const ldb_name = Rocks2LevelTableFileName(rdb_name);
126 ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok());
127 ++num_ldb_files;
128 }
129 ASSERT_GT(num_ldb_files, 0);
130 ASSERT_EQ(num_files, GetSstFileCount(dbname_));
131
132 Reopen(options);
133 for (int k = 0; k < key_id; ++k) {
f67539c2 134 ASSERT_EQ(values[k], Get(Key(k)));
7c673cae
FG
135 }
136 Destroy(options);
137}
138
f67539c2
TL
139// Check that we don't crash when opening DB with
140// DBOptions::skip_checking_sst_file_sizes_on_db_open = true.
141TEST_F(DBSSTTest, SkipCheckingSSTFileSizesOnDBOpen) {
142 ASSERT_OK(Put("pika", "choo"));
143 ASSERT_OK(Flush());
144
145 // Just open the DB with the option set to true and check that we don't crash.
146 Options options;
20effc67 147 options.env = env_;
f67539c2
TL
148 options.skip_checking_sst_file_sizes_on_db_open = true;
149 Reopen(options);
150
151 ASSERT_EQ("choo", Get("pika"));
152}
153
7c673cae
FG
154#ifndef ROCKSDB_LITE
155TEST_F(DBSSTTest, DontDeleteMovedFile) {
156 // This test triggers move compaction and verifies that the file is not
157 // deleted when it's part of move compaction
158 Options options = CurrentOptions();
159 options.env = env_;
160 options.create_if_missing = true;
161 options.max_bytes_for_level_base = 1024 * 1024; // 1 MB
162 options.level0_file_num_compaction_trigger =
163 2; // trigger compaction when we have 2 files
164 DestroyAndReopen(options);
165
166 Random rnd(301);
167 // Create two 1MB sst files
168 for (int i = 0; i < 2; ++i) {
169 // Create 1MB sst file
170 for (int j = 0; j < 100; ++j) {
20effc67 171 ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024)));
7c673cae
FG
172 }
173 ASSERT_OK(Flush());
174 }
175 // this should execute both L0->L1 and L1->(move)->L2 compactions
1e59de90 176 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae
FG
177 ASSERT_EQ("0,0,1", FilesPerLevel(0));
178
179 // If the moved file is actually deleted (the move-safeguard in
180 // ~Version::Version() is not there), we get this failure:
181 // Corruption: Can't access /000009.sst
182 Reopen(options);
183}
184
185// This reproduces a bug where we don't delete a file because when it was
186// supposed to be deleted, it was blocked by pending_outputs
187// Consider:
188// 1. current file_number is 13
189// 2. compaction (1) starts, blocks deletion of all files starting with 13
190// (pending outputs)
191// 3. file 13 is created by compaction (2)
192// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file
193// 13 has no references, it is put into VersionSet::obsolete_files_
194// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13
195// is deleted from obsolete_files_ set.
196// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by
197// pending outputs since compaction (1) is still running. It is not deleted and
198// it is not present in obsolete_files_ anymore. Therefore, we never delete it.
199TEST_F(DBSSTTest, DeleteObsoleteFilesPendingOutputs) {
200 Options options = CurrentOptions();
201 options.env = env_;
202 options.write_buffer_size = 2 * 1024 * 1024; // 2 MB
203 options.max_bytes_for_level_base = 1024 * 1024; // 1 MB
204 options.level0_file_num_compaction_trigger =
205 2; // trigger compaction when we have 2 files
206 options.max_background_flushes = 2;
207 options.max_background_compactions = 2;
208
209 OnFileDeletionListener* listener = new OnFileDeletionListener();
210 options.listeners.emplace_back(listener);
211
212 Reopen(options);
213
214 Random rnd(301);
215 // Create two 1MB sst files
216 for (int i = 0; i < 2; ++i) {
217 // Create 1MB sst file
218 for (int j = 0; j < 100; ++j) {
20effc67 219 ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024)));
7c673cae
FG
220 }
221 ASSERT_OK(Flush());
222 }
223 // this should execute both L0->L1 and L1->(move)->L2 compactions
1e59de90 224 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae
FG
225 ASSERT_EQ("0,0,1", FilesPerLevel(0));
226
227 test::SleepingBackgroundTask blocking_thread;
228 port::Mutex mutex_;
229 bool already_blocked(false);
230
231 // block the flush
232 std::function<void()> block_first_time = [&]() {
233 bool blocking = false;
234 {
235 MutexLock l(&mutex_);
236 if (!already_blocked) {
237 blocking = true;
238 already_blocked = true;
239 }
240 }
241 if (blocking) {
242 blocking_thread.DoSleep();
243 }
244 };
245 env_->table_write_callback_ = &block_first_time;
246 // Insert 2.5MB data, which should trigger a flush because we exceed
247 // write_buffer_size. The flush will be blocked with block_first_time
248 // pending_file is protecting all the files created after
249 for (int j = 0; j < 256; ++j) {
20effc67 250 ASSERT_OK(Put(Key(j), rnd.RandomString(10 * 1024)));
7c673cae
FG
251 }
252 blocking_thread.WaitUntilSleeping();
253
254 ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr));
255
256 ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
257 std::vector<LiveFileMetaData> metadata;
258 db_->GetLiveFilesMetaData(&metadata);
259 ASSERT_EQ(metadata.size(), 1U);
260 auto file_on_L2 = metadata[0].name;
261 listener->SetExpectedFileName(dbname_ + file_on_L2);
262
263 ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr,
264 true /* disallow trivial move */));
265 ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0));
266
267 // finish the flush!
268 blocking_thread.WakeUp();
269 blocking_thread.WaitUntilDone();
1e59de90 270 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
7c673cae 271 // File just flushed is too big for L0 and L1 so gets moved to L2.
1e59de90 272 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae
FG
273 ASSERT_EQ("0,0,1,0,1", FilesPerLevel(0));
274
275 metadata.clear();
276 db_->GetLiveFilesMetaData(&metadata);
277 ASSERT_EQ(metadata.size(), 2U);
278
279 // This file should have been deleted during last compaction
280 ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2));
281 listener->VerifyMatchedCount(1);
282}
283
1e59de90
TL
284// Test that producing an empty .sst file does not write it out to
285// disk, and that the DeleteFile() env method is not called for
286// removing the non-existing file later.
287TEST_F(DBSSTTest, DeleteFileNotCalledForNotCreatedSSTFile) {
288 Options options = CurrentOptions();
289 options.env = env_;
290
291 OnFileDeletionListener* listener = new OnFileDeletionListener();
292 options.listeners.emplace_back(listener);
293
294 Reopen(options);
295
296 // Flush the empty database.
297 ASSERT_OK(Flush());
298 ASSERT_EQ("", FilesPerLevel(0));
299
300 // We expect no .sst files.
301 std::vector<LiveFileMetaData> metadata;
302 db_->GetLiveFilesMetaData(&metadata);
303 ASSERT_EQ(metadata.size(), 0U);
304
305 // We expect no file deletions.
306 listener->VerifyMatchedCount(0);
307}
308
309// Test that producing a non-empty .sst file does write it out to
310// disk, and that the DeleteFile() env method is not called for removing
311// the file later.
312TEST_F(DBSSTTest, DeleteFileNotCalledForCreatedSSTFile) {
313 Options options = CurrentOptions();
314 options.env = env_;
315
316 OnFileDeletionListener* listener = new OnFileDeletionListener();
317 options.listeners.emplace_back(listener);
318
319 Reopen(options);
320
321 ASSERT_OK(Put("pika", "choo"));
322
323 // Flush the non-empty database.
324 ASSERT_OK(Flush());
325 ASSERT_EQ("1", FilesPerLevel(0));
326
327 // We expect 1 .sst files.
328 std::vector<LiveFileMetaData> metadata;
329 db_->GetLiveFilesMetaData(&metadata);
330 ASSERT_EQ(metadata.size(), 1U);
331
332 // We expect no file deletions.
333 listener->VerifyMatchedCount(0);
334}
335
7c673cae
FG
336TEST_F(DBSSTTest, DBWithSstFileManager) {
337 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
338 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
339
340 int files_added = 0;
341 int files_deleted = 0;
342 int files_moved = 0;
f67539c2 343 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2 344 "SstFileManagerImpl::OnAddFile", [&](void* /*arg*/) { files_added++; });
f67539c2
TL
345 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
346 "SstFileManagerImpl::OnDeleteFile",
347 [&](void* /*arg*/) { files_deleted++; });
348 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2 349 "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; });
f67539c2 350 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
351
352 Options options = CurrentOptions();
353 options.sst_file_manager = sst_file_manager;
354 DestroyAndReopen(options);
355
356 Random rnd(301);
357 for (int i = 0; i < 25; i++) {
358 GenerateNewRandomFile(&rnd);
359 ASSERT_OK(Flush());
1e59de90
TL
360 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
361 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae 362 // Verify that we are tracking all sst files in dbname_
20effc67 363 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 364 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
20effc67 365 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
7c673cae
FG
366 }
367 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1e59de90 368 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae 369
20effc67 370 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 371 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
7c673cae
FG
372 // Verify that we are tracking all sst files in dbname_
373 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
374 // Verify the total files size
375 uint64_t total_files_size = 0;
376 for (auto& file_to_size : files_in_db) {
377 total_files_size += file_to_size.second;
378 }
379 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
380 // We flushed at least 25 files
381 ASSERT_GE(files_added, 25);
382 // Compaction must have deleted some files
383 ASSERT_GT(files_deleted, 0);
384 // No files were moved
385 ASSERT_EQ(files_moved, 0);
386
387 Close();
388 Reopen(options);
389 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
390 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
391
392 // Verify that we track all the files again after the DB is closed and opened
393 Close();
394 sst_file_manager.reset(NewSstFileManager(env_));
395 options.sst_file_manager = sst_file_manager;
396 sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
397
398 Reopen(options);
399 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
400 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
401
f67539c2 402 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
403}
404
1e59de90
TL
405TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
406 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
407 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
408
409 int files_added = 0;
410 int files_deleted = 0;
411 int files_moved = 0;
412 int files_scheduled_to_delete = 0;
413
414 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
415 "SstFileManagerImpl::OnAddFile", [&](void* arg) {
416 const std::string* const file_path =
417 static_cast<const std::string*>(arg);
418 if (file_path->find(".blob") != std::string::npos) {
419 files_added++;
420 }
421 });
422
423 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
424 "SstFileManagerImpl::OnDeleteFile", [&](void* arg) {
425 const std::string* const file_path =
426 static_cast<const std::string*>(arg);
427 if (file_path->find(".blob") != std::string::npos) {
428 files_deleted++;
429 }
430 });
431
432 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
433 "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) {
434 assert(arg);
435 const std::string* const file_path =
436 static_cast<const std::string*>(arg);
437 if (file_path->find(".blob") != std::string::npos) {
438 ++files_scheduled_to_delete;
439 }
440 });
441
442 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
443 "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; });
444 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
445
446 Options options = CurrentOptions();
447 options.sst_file_manager = sst_file_manager;
448 options.enable_blob_files = true;
449 options.blob_file_size = 32; // create one blob per file
450 DestroyAndReopen(options);
451 Random rnd(301);
452
453 for (int i = 0; i < 10; i++) {
454 ASSERT_OK(Put("Key_" + std::to_string(i), "Value_" + std::to_string(i)));
455 ASSERT_OK(Flush());
456 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
457 ASSERT_OK(dbfull()->TEST_WaitForCompact());
458
459 // Verify that we are tracking all sst and blob files in dbname_
460 std::unordered_map<std::string, uint64_t> files_in_db;
461 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
462 ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db));
463 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
464 }
465
466 std::vector<uint64_t> blob_files = GetBlobFileNumbers();
467 ASSERT_EQ(files_added, blob_files.size());
468 // No blob file is obsoleted.
469 ASSERT_EQ(files_deleted, 0);
470 ASSERT_EQ(files_scheduled_to_delete, 0);
471 // No files were moved.
472 ASSERT_EQ(files_moved, 0);
473
474 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
475 ASSERT_OK(dbfull()->TEST_WaitForCompact());
476
477 std::unordered_map<std::string, uint64_t> files_in_db;
478 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
479 ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db));
480
481 // Verify that we are tracking all sst and blob files in dbname_
482 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
483 // Verify the total files size
484 uint64_t total_files_size = 0;
485 for (auto& file_to_size : files_in_db) {
486 total_files_size += file_to_size.second;
487 }
488 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
489 Close();
490
491 Reopen(options);
492 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
493 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
494
495 // Verify that we track all the files again after the DB is closed and opened.
496 Close();
497
498 sst_file_manager.reset(NewSstFileManager(env_));
499 options.sst_file_manager = sst_file_manager;
500 sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
501
502 Reopen(options);
503
504 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
505 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
506
507 // Destroy DB and it will remove all the blob files from sst file manager and
508 // blob files deletion will go through ScheduleFileDeletion.
509 ASSERT_EQ(files_deleted, 0);
510 ASSERT_EQ(files_scheduled_to_delete, 0);
511 Close();
512 ASSERT_OK(DestroyDB(dbname_, options));
513 ASSERT_EQ(files_deleted, blob_files.size());
514 ASSERT_EQ(files_scheduled_to_delete, blob_files.size());
515
516 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
517 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
518}
519
520TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFilesWithGC) {
521 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
522 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
523 Options options = CurrentOptions();
524 options.sst_file_manager = sst_file_manager;
525 options.enable_blob_files = true;
526 options.blob_file_size = 32; // create one blob per file
527 options.disable_auto_compactions = true;
528 options.enable_blob_garbage_collection = true;
529 options.blob_garbage_collection_age_cutoff = 0.5;
530
531 int files_added = 0;
532 int files_deleted = 0;
533 int files_moved = 0;
534 int files_scheduled_to_delete = 0;
535
536 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
537 "SstFileManagerImpl::OnAddFile", [&](void* arg) {
538 const std::string* const file_path =
539 static_cast<const std::string*>(arg);
540 if (file_path->find(".blob") != std::string::npos) {
541 files_added++;
542 }
543 });
544
545 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
546 "SstFileManagerImpl::OnDeleteFile", [&](void* arg) {
547 const std::string* const file_path =
548 static_cast<const std::string*>(arg);
549 if (file_path->find(".blob") != std::string::npos) {
550 files_deleted++;
551 }
552 });
553
554 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
555 "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) {
556 assert(arg);
557 const std::string* const file_path =
558 static_cast<const std::string*>(arg);
559 if (file_path->find(".blob") != std::string::npos) {
560 ++files_scheduled_to_delete;
561 }
562 });
563
564 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
565 "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; });
566 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
567
568 DestroyAndReopen(options);
569 Random rnd(301);
570
571 constexpr char first_key[] = "first_key";
572 constexpr char first_value[] = "first_value";
573 constexpr char second_key[] = "second_key";
574 constexpr char second_value[] = "second_value";
575
576 ASSERT_OK(Put(first_key, first_value));
577 ASSERT_OK(Put(second_key, second_value));
578 ASSERT_OK(Flush());
579
580 constexpr char third_key[] = "third_key";
581 constexpr char third_value[] = "third_value";
582 constexpr char fourth_key[] = "fourth_key";
583 constexpr char fourth_value[] = "fourth_value";
584 constexpr char fifth_key[] = "fifth_key";
585 constexpr char fifth_value[] = "fifth_value";
586
587 ASSERT_OK(Put(third_key, third_value));
588 ASSERT_OK(Put(fourth_key, fourth_value));
589 ASSERT_OK(Put(fifth_key, fifth_value));
590 ASSERT_OK(Flush());
591
592 const std::vector<uint64_t> original_blob_files = GetBlobFileNumbers();
593
594 ASSERT_EQ(original_blob_files.size(), 5);
595 ASSERT_EQ(files_added, 5);
596 ASSERT_EQ(files_deleted, 0);
597 ASSERT_EQ(files_scheduled_to_delete, 0);
598 ASSERT_EQ(files_moved, 0);
599 {
600 // Verify that we are tracking all sst and blob files in dbname_
601 std::unordered_map<std::string, uint64_t> files_in_db;
602 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
603 ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db));
604 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
605 }
606
607 const size_t cutoff_index = static_cast<size_t>(
608 options.blob_garbage_collection_age_cutoff * original_blob_files.size());
609
610 size_t expected_number_of_files = original_blob_files.size();
611 // Note: turning off enable_blob_files before the compaction results in
612 // garbage collected values getting inlined.
613 ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}}));
614 expected_number_of_files -= cutoff_index;
615 files_added = 0;
616
617 constexpr Slice* begin = nullptr;
618 constexpr Slice* end = nullptr;
619
620 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
621 ASSERT_OK(dbfull()->TEST_WaitForCompact());
622 sfm->WaitForEmptyTrash();
623
624 ASSERT_EQ(Get(first_key), first_value);
625 ASSERT_EQ(Get(second_key), second_value);
626 ASSERT_EQ(Get(third_key), third_value);
627 ASSERT_EQ(Get(fourth_key), fourth_value);
628 ASSERT_EQ(Get(fifth_key), fifth_value);
629
630 const std::vector<uint64_t> new_blob_files = GetBlobFileNumbers();
631
632 ASSERT_EQ(new_blob_files.size(), expected_number_of_files);
633 // No new file is added.
634 ASSERT_EQ(files_added, 0);
635 ASSERT_EQ(files_deleted, cutoff_index);
636 ASSERT_EQ(files_scheduled_to_delete, cutoff_index);
637 ASSERT_EQ(files_moved, 0);
638
639 // Original blob files below the cutoff should be gone, original blob files at
640 // or above the cutoff should be still there
641 for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) {
642 ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]);
643 }
644
645 {
646 // Verify that we are tracking all sst and blob files in dbname_
647 std::unordered_map<std::string, uint64_t> files_in_db;
648 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db));
649 ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db));
650 ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
651 }
652
653 Close();
654 ASSERT_OK(DestroyDB(dbname_, options));
655 sfm->WaitForEmptyTrash();
656 ASSERT_EQ(files_deleted, 5);
657 ASSERT_EQ(files_scheduled_to_delete, 5);
658
659 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
660 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
661}
662
663class DBSSTTestRateLimit : public DBSSTTest,
664 public ::testing::WithParamInterface<bool> {
665 public:
666 DBSSTTestRateLimit() : DBSSTTest() {}
667 ~DBSSTTestRateLimit() override {}
668};
669
670TEST_P(DBSSTTestRateLimit, RateLimitedDelete) {
7c673cae 671 Destroy(last_options_);
f67539c2 672 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
7c673cae
FG
673 {"DBSSTTest::RateLimitedDelete:1",
674 "DeleteScheduler::BackgroundEmptyTrash"},
675 });
676
677 std::vector<uint64_t> penalties;
f67539c2 678 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
679 "DeleteScheduler::BackgroundEmptyTrash:Wait",
680 [&](void* arg) { penalties.push_back(*(static_cast<uint64_t*>(arg))); });
f67539c2 681 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
682 "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) {
683 // Turn timed wait into a simulated sleep
684 uint64_t* abs_time_us = static_cast<uint64_t*>(arg);
20effc67
TL
685 uint64_t cur_time = env_->NowMicros();
686 if (*abs_time_us > cur_time) {
687 env_->MockSleepForMicroseconds(*abs_time_us - cur_time);
7c673cae
FG
688 }
689
20effc67
TL
690 // Plus an additional short, random amount
691 env_->MockSleepForMicroseconds(Random::GetTLSInstance()->Uniform(10));
7c673cae 692
20effc67
TL
693 // Set wait until time to before (actual) current time to force not
694 // to sleep
695 *abs_time_us = Env::Default()->NowMicros();
7c673cae
FG
696 });
697
1e59de90
TL
698 // Disable PeriodicTaskScheduler as it also has TimedWait, which could update
699 // the simulated sleep time
700 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
701 "DBImpl::StartPeriodicTaskScheduler:DisableScheduler", [&](void* arg) {
702 bool* disable_scheduler = static_cast<bool*>(arg);
703 *disable_scheduler = true;
704 });
705
f67539c2 706 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae 707
1e59de90 708 bool different_wal_dir = GetParam();
7c673cae 709 Options options = CurrentOptions();
20effc67 710 SetTimeElapseOnlySleepOnReopen(&options);
7c673cae
FG
711 options.disable_auto_compactions = true;
712 options.env = env_;
20effc67 713 options.statistics = CreateDBStatistics();
1e59de90
TL
714 if (different_wal_dir) {
715 options.wal_dir = alternative_wal_dir_;
716 }
7c673cae 717
7c673cae
FG
718 int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
719 Status s;
720 options.sst_file_manager.reset(
11fdf7f2 721 NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
7c673cae
FG
722 ASSERT_OK(s);
723 options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
724 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
11fdf7f2 725 sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
7c673cae 726
494da23a 727 WriteOptions wo;
1e59de90
TL
728 if (!different_wal_dir) {
729 wo.disableWAL = true;
730 }
731 Reopen(options);
7c673cae
FG
732 // Create 4 files in L0
733 for (char v = 'a'; v <= 'd'; v++) {
494da23a
TL
734 ASSERT_OK(Put("Key2", DummyString(1024, v), wo));
735 ASSERT_OK(Put("Key3", DummyString(1024, v), wo));
736 ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
737 ASSERT_OK(Put("Key1", DummyString(1024, v), wo));
738 ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
7c673cae
FG
739 ASSERT_OK(Flush());
740 }
741 // We created 4 sst files in L0
742 ASSERT_EQ("4", FilesPerLevel(0));
743
744 std::vector<LiveFileMetaData> metadata;
745 db_->GetLiveFilesMetaData(&metadata);
746
747 // Compaction will move the 4 files in L0 to trash and create 1 L1 file
748 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
494da23a 749 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
7c673cae
FG
750 ASSERT_EQ("0,1", FilesPerLevel(0));
751
752 uint64_t delete_start_time = env_->NowMicros();
753 // Hold BackgroundEmptyTrash
754 TEST_SYNC_POINT("DBSSTTest::RateLimitedDelete:1");
755 sfm->WaitForEmptyTrash();
756 uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time;
757
758 uint64_t total_files_size = 0;
759 uint64_t expected_penlty = 0;
760 ASSERT_EQ(penalties.size(), metadata.size());
761 for (size_t i = 0; i < metadata.size(); i++) {
762 total_files_size += metadata[i].size;
763 expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec);
764 ASSERT_EQ(expected_penlty, penalties[i]);
765 }
766 ASSERT_GT(time_spent_deleting, expected_penlty * 0.9);
767 ASSERT_LT(time_spent_deleting, expected_penlty * 1.1);
20effc67
TL
768 ASSERT_EQ(4, options.statistics->getAndResetTickerCount(FILES_MARKED_TRASH));
769 ASSERT_EQ(
770 0, options.statistics->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
7c673cae 771
f67539c2 772 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
773}
774
1e59de90
TL
775INSTANTIATE_TEST_CASE_P(RateLimitedDelete, DBSSTTestRateLimit,
776 ::testing::Bool());
777
494da23a
TL
778TEST_F(DBSSTTest, RateLimitedWALDelete) {
779 Destroy(last_options_);
780
781 std::vector<uint64_t> penalties;
f67539c2 782 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
494da23a
TL
783 "DeleteScheduler::BackgroundEmptyTrash:Wait",
784 [&](void* arg) { penalties.push_back(*(static_cast<uint64_t*>(arg))); });
785
494da23a
TL
786 Options options = CurrentOptions();
787 options.disable_auto_compactions = true;
f67539c2 788 options.compression = kNoCompression;
494da23a
TL
789 options.env = env_;
790
791 int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
792 Status s;
793 options.sst_file_manager.reset(
794 NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
795 ASSERT_OK(s);
796 options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
797 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
f67539c2 798 sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1);
20effc67 799 SetTimeElapseOnlySleepOnReopen(&options);
494da23a
TL
800
801 ASSERT_OK(TryReopen(options));
f67539c2 802 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
494da23a
TL
803
804 // Create 4 files in L0
805 for (char v = 'a'; v <= 'd'; v++) {
806 ASSERT_OK(Put("Key2", DummyString(1024, v)));
807 ASSERT_OK(Put("Key3", DummyString(1024, v)));
808 ASSERT_OK(Put("Key4", DummyString(1024, v)));
809 ASSERT_OK(Put("Key1", DummyString(1024, v)));
810 ASSERT_OK(Put("Key4", DummyString(1024, v)));
811 ASSERT_OK(Flush());
812 }
813 // We created 4 sst files in L0
814 ASSERT_EQ("4", FilesPerLevel(0));
815
816 // Compaction will move the 4 files in L0 to trash and create 1 L1 file
f67539c2
TL
817 CompactRangeOptions cro;
818 cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
819 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
494da23a
TL
820 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
821 ASSERT_EQ("0,1", FilesPerLevel(0));
822
823 sfm->WaitForEmptyTrash();
824 ASSERT_EQ(penalties.size(), 8);
825
f67539c2
TL
826 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
827}
828
829class DBWALTestWithParam
1e59de90 830 : public DBTestBase,
f67539c2
TL
831 public testing::WithParamInterface<std::tuple<std::string, bool>> {
832 public:
1e59de90
TL
833 explicit DBWALTestWithParam()
834 : DBTestBase("db_wal_test_with_params", /*env_do_fsync=*/true) {
f67539c2
TL
835 wal_dir_ = std::get<0>(GetParam());
836 wal_dir_same_as_dbname_ = std::get<1>(GetParam());
837 }
838
839 std::string wal_dir_;
840 bool wal_dir_same_as_dbname_;
841};
842
843TEST_P(DBWALTestWithParam, WALTrashCleanupOnOpen) {
844 class MyEnv : public EnvWrapper {
845 public:
846 MyEnv(Env* t) : EnvWrapper(t), fake_log_delete(false) {}
1e59de90
TL
847 const char* Name() const override { return "MyEnv"; }
848 Status DeleteFile(const std::string& fname) override {
f67539c2
TL
849 if (fname.find(".log.trash") != std::string::npos && fake_log_delete) {
850 return Status::OK();
851 }
852
853 return target()->DeleteFile(fname);
854 }
855
856 void set_fake_log_delete(bool fake) { fake_log_delete = fake; }
857
858 private:
859 bool fake_log_delete;
860 };
861
20effc67 862 std::unique_ptr<MyEnv> env(new MyEnv(env_));
f67539c2
TL
863 Destroy(last_options_);
864
865 env->set_fake_log_delete(true);
866
867 Options options = CurrentOptions();
868 options.disable_auto_compactions = true;
869 options.compression = kNoCompression;
870 options.env = env.get();
871 options.wal_dir = dbname_ + wal_dir_;
872
873 int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
874 Status s;
875 options.sst_file_manager.reset(
876 NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
877 ASSERT_OK(s);
878 options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
879 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
880 sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1);
881
1e59de90 882 Reopen(options);
f67539c2
TL
883
884 // Create 4 files in L0
885 for (char v = 'a'; v <= 'd'; v++) {
1e59de90
TL
886 if (v == 'c') {
887 // Maximize the change that the last log file will be preserved in trash
888 // before restarting the DB.
889 // We have to set this on the 2nd to last file for it to delay deletion
890 // on the last file. (Quirk of DeleteScheduler::BackgroundEmptyTrash())
891 options.sst_file_manager->SetDeleteRateBytesPerSecond(1);
892 }
f67539c2
TL
893 ASSERT_OK(Put("Key2", DummyString(1024, v)));
894 ASSERT_OK(Put("Key3", DummyString(1024, v)));
895 ASSERT_OK(Put("Key4", DummyString(1024, v)));
896 ASSERT_OK(Put("Key1", DummyString(1024, v)));
897 ASSERT_OK(Put("Key4", DummyString(1024, v)));
898 ASSERT_OK(Flush());
899 }
900 // We created 4 sst files in L0
901 ASSERT_EQ("4", FilesPerLevel(0));
902
903 Close();
904
905 options.sst_file_manager.reset();
906 std::vector<std::string> filenames;
907 int trash_log_count = 0;
908 if (!wal_dir_same_as_dbname_) {
909 // Forcibly create some trash log files
910 std::unique_ptr<WritableFile> result;
1e59de90
TL
911 ASSERT_OK(env->NewWritableFile(options.wal_dir + "/1000.log.trash", &result,
912 EnvOptions()));
f67539c2
TL
913 result.reset();
914 }
1e59de90 915 ASSERT_OK(env->GetChildren(options.wal_dir, &filenames));
f67539c2
TL
916 for (const std::string& fname : filenames) {
917 if (fname.find(".log.trash") != std::string::npos) {
918 trash_log_count++;
919 }
920 }
921 ASSERT_GE(trash_log_count, 1);
922
923 env->set_fake_log_delete(false);
1e59de90 924 Reopen(options);
f67539c2
TL
925
926 filenames.clear();
927 trash_log_count = 0;
1e59de90 928 ASSERT_OK(env->GetChildren(options.wal_dir, &filenames));
f67539c2
TL
929 for (const std::string& fname : filenames) {
930 if (fname.find(".log.trash") != std::string::npos) {
931 trash_log_count++;
932 }
933 }
934 ASSERT_EQ(trash_log_count, 0);
935 Close();
494da23a
TL
936}
937
f67539c2
TL
938INSTANTIATE_TEST_CASE_P(DBWALTestWithParam, DBWALTestWithParam,
939 ::testing::Values(std::make_tuple("", true),
940 std::make_tuple("_wal_dir", false)));
941
11fdf7f2
TL
942TEST_F(DBSSTTest, OpenDBWithExistingTrash) {
943 Options options = CurrentOptions();
944
945 options.sst_file_manager.reset(
946 NewSstFileManager(env_, nullptr, "", 1024 * 1024 /* 1 MB/sec */));
947 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
948
949 Destroy(last_options_);
950
951 // Add some trash files to the db directory so the DB can clean them up
1e59de90 952 ASSERT_OK(env_->CreateDirIfMissing(dbname_));
11fdf7f2
TL
953 ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash"));
954 ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash"));
955 ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash"));
956
957 // Reopen the DB and verify that it deletes existing trash files
1e59de90 958 Reopen(options);
11fdf7f2
TL
959 sfm->WaitForEmptyTrash();
960 ASSERT_NOK(env_->FileExists(dbname_ + "/" + "001.sst.trash"));
961 ASSERT_NOK(env_->FileExists(dbname_ + "/" + "002.sst.trash"));
962 ASSERT_NOK(env_->FileExists(dbname_ + "/" + "003.sst.trash"));
963}
964
7c673cae
FG
965// Create a DB with 2 db_paths, and generate multiple files in the 2
966// db_paths using CompactRangeOptions, make sure that files that were
967// deleted from first db_path were deleted using DeleteScheduler and
968// files in the second path were not.
969TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
11fdf7f2 970 std::atomic<int> bg_delete_file(0);
f67539c2 971 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 972 "DeleteScheduler::DeleteTrashFile:DeleteFile",
11fdf7f2
TL
973 [&](void* /*arg*/) { bg_delete_file++; });
974 // The deletion scheduler sometimes skips marking file as trash according to
975 // a heuristic. In that case the deletion will go through the below SyncPoint.
f67539c2
TL
976 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
977 "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { bg_delete_file++; });
7c673cae
FG
978
979 Options options = CurrentOptions();
980 options.disable_auto_compactions = true;
981 options.db_paths.emplace_back(dbname_, 1024 * 100);
982 options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100);
983 options.env = env_;
984
7c673cae
FG
985 int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec
986 Status s;
11fdf7f2
TL
987 options.sst_file_manager.reset(
988 NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s,
989 /* max_trash_db_ratio= */ 1.1));
990
7c673cae
FG
991 ASSERT_OK(s);
992 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
993
994 DestroyAndReopen(options);
f67539c2 995 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
494da23a
TL
996
997 WriteOptions wo;
998 wo.disableWAL = true;
7c673cae
FG
999
1000 // Create 4 files in L0
1001 for (int i = 0; i < 4; i++) {
1e59de90 1002 ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A'), wo));
7c673cae
FG
1003 ASSERT_OK(Flush());
1004 }
1005 // We created 4 sst files in L0
1006 ASSERT_EQ("4", FilesPerLevel(0));
1007 // Compaction will delete files from L0 in first db path and generate a new
1008 // file in L1 in second db path
1009 CompactRangeOptions compact_options;
1010 compact_options.target_path_id = 1;
1011 Slice begin("Key0");
1012 Slice end("Key3");
1013 ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
1014 ASSERT_EQ("0,1", FilesPerLevel(0));
1015
1016 // Create 4 files in L0
1017 for (int i = 4; i < 8; i++) {
1e59de90 1018 ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'B'), wo));
7c673cae
FG
1019 ASSERT_OK(Flush());
1020 }
1021 ASSERT_EQ("4,1", FilesPerLevel(0));
1022
1023 // Compaction will delete files from L0 in first db path and generate a new
1024 // file in L1 in second db path
1025 begin = "Key4";
1026 end = "Key7";
1027 ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
1028 ASSERT_EQ("0,2", FilesPerLevel(0));
1029
1030 sfm->WaitForEmptyTrash();
1031 ASSERT_EQ(bg_delete_file, 8);
1032
11fdf7f2
TL
1033 // Compaction will delete both files and regenerate a file in L1 in second
1034 // db path. The deleted files should still be cleaned up via delete scheduler.
7c673cae 1035 compact_options.bottommost_level_compaction =
f67539c2 1036 BottommostLevelCompaction::kForceOptimized;
7c673cae
FG
1037 ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
1038 ASSERT_EQ("0,1", FilesPerLevel(0));
1039
1040 sfm->WaitForEmptyTrash();
11fdf7f2 1041 ASSERT_EQ(bg_delete_file, 10);
7c673cae 1042
f67539c2 1043 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
1044}
1045
1046TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
1047 int bg_delete_file = 0;
f67539c2 1048 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 1049 "DeleteScheduler::DeleteTrashFile:DeleteFile",
11fdf7f2 1050 [&](void* /*arg*/) { bg_delete_file++; });
f67539c2 1051 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae 1052
11fdf7f2 1053 Status s;
7c673cae
FG
1054 Options options = CurrentOptions();
1055 options.disable_auto_compactions = true;
1056 options.env = env_;
11fdf7f2
TL
1057 options.sst_file_manager.reset(
1058 NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
1059 ASSERT_OK(s);
7c673cae
FG
1060 DestroyAndReopen(options);
1061
1062 // Create 4 files in L0
1063 for (int i = 0; i < 4; i++) {
1e59de90 1064 ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A')));
7c673cae
FG
1065 ASSERT_OK(Flush());
1066 }
1067 // We created 4 sst files in L0
1068 ASSERT_EQ("4", FilesPerLevel(0));
1069
1070 // Close DB and destroy it using DeleteScheduler
1071 Close();
7c673cae 1072
494da23a
TL
1073 int num_sst_files = 0;
1074 int num_wal_files = 0;
1075 std::vector<std::string> db_files;
1e59de90 1076 ASSERT_OK(env_->GetChildren(dbname_, &db_files));
494da23a
TL
1077 for (std::string f : db_files) {
1078 if (f.substr(f.find_last_of(".") + 1) == "sst") {
1079 num_sst_files++;
1080 } else if (f.substr(f.find_last_of(".") + 1) == "log") {
1081 num_wal_files++;
1082 }
1083 }
1084 ASSERT_GT(num_sst_files, 0);
1085 ASSERT_GT(num_wal_files, 0);
1086
7c673cae 1087 auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
11fdf7f2
TL
1088
1089 sfm->SetDeleteRateBytesPerSecond(1024 * 1024);
1e59de90
TL
1090 // Set an extra high trash ratio to prevent immediate/non-rate limited
1091 // deletions
1092 sfm->delete_scheduler()->SetMaxTrashDBRatio(1000.0);
11fdf7f2 1093 ASSERT_OK(DestroyDB(dbname_, options));
7c673cae 1094 sfm->WaitForEmptyTrash();
494da23a 1095 ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files);
7c673cae
FG
1096}
1097
1098TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {
1099 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1100 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1101
1102 Options options = CurrentOptions();
1103 options.sst_file_manager = sst_file_manager;
1104 options.disable_auto_compactions = true;
1105 DestroyAndReopen(options);
1106
1107 Random rnd(301);
1108
1109 // Generate a file containing 100 keys.
1110 for (int i = 0; i < 100; i++) {
20effc67 1111 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
7c673cae
FG
1112 }
1113 ASSERT_OK(Flush());
1114
1115 uint64_t first_file_size = 0;
20effc67 1116 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 1117 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &first_file_size));
7c673cae
FG
1118 ASSERT_EQ(sfm->GetTotalSize(), first_file_size);
1119
1120 // Set the maximum allowed space usage to the current total size
1121 sfm->SetMaxAllowedSpaceUsage(first_file_size + 1);
1122
1123 ASSERT_OK(Put("key1", "val1"));
1124 // This flush will cause bg_error_ and will fail
1125 ASSERT_NOK(Flush());
1126}
1127
1e59de90
TL
1128TEST_F(DBSSTTest, DBWithMaxSpaceAllowedWithBlobFiles) {
1129 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1130 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1131
1132 Options options = CurrentOptions();
1133 options.sst_file_manager = sst_file_manager;
1134 options.disable_auto_compactions = true;
1135 options.enable_blob_files = true;
1136 DestroyAndReopen(options);
1137
1138 Random rnd(301);
1139
1140 // Generate a file containing keys.
1141 for (int i = 0; i < 10; i++) {
1142 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
1143 }
1144 ASSERT_OK(Flush());
1145
1146 uint64_t files_size = 0;
1147 uint64_t total_files_size = 0;
1148 std::unordered_map<std::string, uint64_t> files_in_db;
1149
1150 ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db, &files_size));
1151 // Make sure blob files are considered by SSTFileManage in size limits.
1152 ASSERT_GT(files_size, 0);
1153 total_files_size = files_size;
1154 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &files_size));
1155 total_files_size += files_size;
1156 ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
1157
1158 // Set the maximum allowed space usage to the current total size.
1159 sfm->SetMaxAllowedSpaceUsage(total_files_size + 1);
1160
1161 bool max_allowed_space_reached = false;
1162 bool delete_blob_file = false;
1163 // Sync point called after blob file is closed and max allowed space is
1164 // checked.
1165 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1166 "BlobFileCompletionCallback::CallBack::MaxAllowedSpaceReached",
1167 [&](void* /*arg*/) { max_allowed_space_reached = true; });
1168 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1169 "BuildTable::AfterDeleteFile",
1170 [&](void* /*arg*/) { delete_blob_file = true; });
1171 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
1172 {
1173 "BuildTable::AfterDeleteFile",
1174 "DBSSTTest::DBWithMaxSpaceAllowedWithBlobFiles:1",
1175 },
1176 });
1177
1178 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1179
1180 ASSERT_OK(Put("key1", "val1"));
1181 // This flush will fail
1182 ASSERT_NOK(Flush());
1183 ASSERT_TRUE(max_allowed_space_reached);
1184
1185 TEST_SYNC_POINT("DBSSTTest::DBWithMaxSpaceAllowedWithBlobFiles:1");
1186 ASSERT_TRUE(delete_blob_file);
1187 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1188}
1189
11fdf7f2
TL
1190TEST_F(DBSSTTest, CancellingCompactionsWorks) {
1191 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1192 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1193
1194 Options options = CurrentOptions();
1195 options.sst_file_manager = sst_file_manager;
1196 options.level0_file_num_compaction_trigger = 2;
1197 options.statistics = CreateDBStatistics();
1198 DestroyAndReopen(options);
1199
1200 int completed_compactions = 0;
f67539c2 1201 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
1202 "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* /*arg*/) {
1203 sfm->SetMaxAllowedSpaceUsage(0);
1204 ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
1205 });
f67539c2 1206 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
1207 "DBImpl::BackgroundCompaction:NonTrivial:AfterRun",
1208 [&](void* /*arg*/) { completed_compactions++; });
f67539c2 1209 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2
TL
1210
1211 Random rnd(301);
1212
1213 // Generate a file containing 10 keys.
1214 for (int i = 0; i < 10; i++) {
20effc67 1215 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
11fdf7f2
TL
1216 }
1217 ASSERT_OK(Flush());
1218 uint64_t total_file_size = 0;
20effc67 1219 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 1220 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_file_size));
11fdf7f2
TL
1221 // Set the maximum allowed space usage to the current total size
1222 sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1);
1223
1224 // Generate another file to trigger compaction.
1225 for (int i = 0; i < 10; i++) {
20effc67 1226 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
11fdf7f2
TL
1227 }
1228 ASSERT_OK(Flush());
1e59de90 1229 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
11fdf7f2
TL
1230
1231 // Because we set a callback in CancelledCompaction, we actually
1232 // let the compaction run
1233 ASSERT_GT(completed_compactions, 0);
1234 ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
1235 // Make sure the stat is bumped
1e59de90
TL
1236 ASSERT_GT(dbfull()->immutable_db_options().statistics.get()->getTickerCount(
1237 COMPACTION_CANCELLED),
1238 0);
20effc67
TL
1239 ASSERT_EQ(0,
1240 dbfull()->immutable_db_options().statistics.get()->getTickerCount(
1241 FILES_MARKED_TRASH));
1242 ASSERT_EQ(4,
1243 dbfull()->immutable_db_options().statistics.get()->getTickerCount(
1244 FILES_DELETED_IMMEDIATELY));
f67539c2 1245 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
11fdf7f2
TL
1246}
1247
1248TEST_F(DBSSTTest, CancellingManualCompactionsWorks) {
1249 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1250 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1251
1252 Options options = CurrentOptions();
1253 options.sst_file_manager = sst_file_manager;
1254 options.statistics = CreateDBStatistics();
1255
1256 FlushedFileCollector* collector = new FlushedFileCollector();
1257 options.listeners.emplace_back(collector);
1258
1259 DestroyAndReopen(options);
1260
1261 Random rnd(301);
1262
1263 // Generate a file containing 10 keys.
1264 for (int i = 0; i < 10; i++) {
20effc67 1265 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
11fdf7f2
TL
1266 }
1267 ASSERT_OK(Flush());
1268 uint64_t total_file_size = 0;
20effc67 1269 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 1270 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_file_size));
11fdf7f2
TL
1271 // Set the maximum allowed space usage to the current total size
1272 sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1);
1273
1274 // Generate another file to trigger compaction.
1275 for (int i = 0; i < 10; i++) {
20effc67 1276 ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
11fdf7f2
TL
1277 }
1278 ASSERT_OK(Flush());
1279
1280 // OK, now trigger a manual compaction
1e59de90
TL
1281 ASSERT_TRUE(dbfull()
1282 ->CompactRange(CompactRangeOptions(), nullptr, nullptr)
1283 .IsCompactionTooLarge());
11fdf7f2
TL
1284
1285 // Wait for manual compaction to get scheduled and finish
1e59de90 1286 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
11fdf7f2
TL
1287
1288 ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
1289 // Make sure the stat is bumped
1290 ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount(
1291 COMPACTION_CANCELLED),
1292 1);
1293
1294 // Now make sure CompactFiles also gets cancelled
1295 auto l0_files = collector->GetFlushedFiles();
1e59de90
TL
1296 ASSERT_TRUE(
1297 dbfull()
1298 ->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), l0_files, 0)
1299 .IsCompactionTooLarge());
11fdf7f2
TL
1300
1301 // Wait for manual compaction to get scheduled and finish
1e59de90 1302 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
11fdf7f2
TL
1303
1304 ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount(
1305 COMPACTION_CANCELLED),
1306 2);
1307 ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
1308
1309 // Now let the flush through and make sure GetCompactionsReservedSize
1310 // returns to normal
1311 sfm->SetMaxAllowedSpaceUsage(0);
1312 int completed_compactions = 0;
f67539c2 1313 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
1314 "CompactFilesImpl:End", [&](void* /*arg*/) { completed_compactions++; });
1315
f67539c2 1316 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1e59de90
TL
1317 ASSERT_OK(dbfull()->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(),
1318 l0_files, 0));
1319 ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
11fdf7f2
TL
1320
1321 ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
1322 ASSERT_GT(completed_compactions, 0);
1323
f67539c2 1324 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
11fdf7f2
TL
1325}
1326
7c673cae
FG
1327TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) {
1328 // This test will set a maximum allowed space for the DB, then it will
1329 // keep filling the DB until the limit is reached and bg_error_ is set.
1330 // When bg_error_ is set we will verify that the DB size is greater
1331 // than the limit.
1332
11fdf7f2
TL
1333 std::vector<int> max_space_limits_mbs = {1, 10};
1334 std::atomic<bool> bg_error_set(false);
7c673cae 1335
11fdf7f2
TL
1336 std::atomic<int> reached_max_space_on_flush(0);
1337 std::atomic<int> reached_max_space_on_compaction(0);
f67539c2 1338 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
1339 "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
1340 [&](void* arg) {
1341 Status* bg_error = static_cast<Status*>(arg);
1342 bg_error_set = true;
7c673cae 1343 reached_max_space_on_flush++;
7c673cae
FG
1344 // clear error to ensure compaction callback is called
1345 *bg_error = Status::OK();
11fdf7f2
TL
1346 });
1347
f67539c2 1348 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
1349 "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* arg) {
1350 bool* enough_room = static_cast<bool*>(arg);
1351 *enough_room = true;
7c673cae
FG
1352 });
1353
f67539c2 1354 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 1355 "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached",
11fdf7f2 1356 [&](void* /*arg*/) {
7c673cae 1357 bg_error_set = true;
7c673cae
FG
1358 reached_max_space_on_compaction++;
1359 });
1360
1361 for (auto limit_mb : max_space_limits_mbs) {
1362 bg_error_set = false;
f67539c2
TL
1363 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
1364 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
1365 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1366 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1367
1368 Options options = CurrentOptions();
1369 options.sst_file_manager = sst_file_manager;
1370 options.write_buffer_size = 1024 * 512; // 512 Kb
1371 DestroyAndReopen(options);
1372 Random rnd(301);
1373
1374 sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024);
1375
11fdf7f2
TL
1376 // It is easy to detect if the test is stuck in a loop. No need for
1377 // complex termination logic.
7c673cae 1378 while (true) {
20effc67 1379 auto s = Put(rnd.RandomString(10), rnd.RandomString(50));
7c673cae
FG
1380 if (!s.ok()) {
1381 break;
1382 }
7c673cae
FG
1383 }
1384 ASSERT_TRUE(bg_error_set);
11fdf7f2 1385 uint64_t total_sst_files_size = 0;
20effc67 1386 std::unordered_map<std::string, uint64_t> files_in_db;
1e59de90 1387 ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_sst_files_size));
7c673cae 1388 ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024);
f67539c2 1389 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
1390 }
1391
1392 ASSERT_GT(reached_max_space_on_flush, 0);
1393 ASSERT_GT(reached_max_space_on_compaction, 0);
1394}
1395
1396TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFiles) {
1397 // Open DB with infinite max open files
1398 // - First iteration use 1 thread to open files
1399 // - Second iteration use 5 threads to open files
1400 for (int iter = 0; iter < 2; iter++) {
1401 Options options;
1402 options.create_if_missing = true;
1403 options.write_buffer_size = 100000;
1404 options.disable_auto_compactions = true;
1405 options.max_open_files = -1;
1406 if (iter == 0) {
1407 options.max_file_opening_threads = 1;
1408 } else {
1409 options.max_file_opening_threads = 5;
1410 }
1411 options = CurrentOptions(options);
1412 DestroyAndReopen(options);
1413
1414 // Create 12 Files in L0 (then move then to L2)
1415 for (int i = 0; i < 12; i++) {
1416 std::string k = "L2_" + Key(i);
1417 ASSERT_OK(Put(k, k + std::string(1000, 'a')));
1418 ASSERT_OK(Flush());
1419 }
1420 CompactRangeOptions compact_options;
1421 compact_options.change_level = true;
1422 compact_options.target_level = 2;
1e59de90 1423 ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
7c673cae
FG
1424
1425 // Create 12 Files in L0
1426 for (int i = 0; i < 12; i++) {
1427 std::string k = "L0_" + Key(i);
1428 ASSERT_OK(Put(k, k + std::string(1000, 'a')));
1429 ASSERT_OK(Flush());
1430 }
1431 Close();
1432
1433 // Reopening the DB will load all existing files
1434 Reopen(options);
1435 ASSERT_EQ("12,0,12", FilesPerLevel(0));
1436 std::vector<std::vector<FileMetaData>> files;
1437 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
1438
1439 for (const auto& level : files) {
1440 for (const auto& file : level) {
1441 ASSERT_TRUE(file.table_reader_handle != nullptr);
1442 }
1443 }
1444
1445 for (int i = 0; i < 12; i++) {
1446 ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a'));
1447 ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a'));
1448 }
1449 }
1450}
1451
1e59de90
TL
1452TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFilesSubjectToMemoryLimit) {
1453 for (CacheEntryRoleOptions::Decision charge_table_reader :
1454 {CacheEntryRoleOptions::Decision::kEnabled,
1455 CacheEntryRoleOptions::Decision::kDisabled}) {
1456 // Open DB with infinite max open files
1457 // - First iteration use 1 thread to open files
1458 // - Second iteration use 5 threads to open files
1459 for (int iter = 0; iter < 2; iter++) {
1460 Options options;
1461 options.create_if_missing = true;
1462 options.write_buffer_size = 100000;
1463 options.disable_auto_compactions = true;
1464 options.max_open_files = -1;
1465
1466 BlockBasedTableOptions table_options;
1467 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1468 if (iter == 0) {
1469 options.max_file_opening_threads = 1;
1470 } else {
1471 options.max_file_opening_threads = 5;
1472 }
1473
1474 DestroyAndReopen(options);
1475
1476 // Create 5 Files in L0 (then move then to L2)
1477 for (int i = 0; i < 5; i++) {
1478 std::string k = "L2_" + Key(i);
1479 ASSERT_OK(Put(k, k + std::string(1000, 'a')));
1480 ASSERT_OK(Flush()) << i;
1481 }
1482 CompactRangeOptions compact_options;
1483 compact_options.change_level = true;
1484 compact_options.target_level = 2;
1485 ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
1486
1487 // Create 5 Files in L0
1488 for (int i = 0; i < 5; i++) {
1489 std::string k = "L0_" + Key(i);
1490 ASSERT_OK(Put(k, k + std::string(1000, 'a')));
1491 ASSERT_OK(Flush());
1492 }
1493 Close();
1494
1495 table_options.cache_usage_options.options_overrides.insert(
1496 {CacheEntryRole::kBlockBasedTableReader,
1497 {/*.charged = */ charge_table_reader}});
1498 table_options.block_cache =
1499 NewLRUCache(1024 /* capacity */, 0 /* num_shard_bits */,
1500 true /* strict_capacity_limit */);
1501 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1502
1503 // Reopening the DB will try to load all existing files, conditionally
1504 // subject to memory limit
1505 Status s = TryReopen(options);
1506
1507 if (charge_table_reader == CacheEntryRoleOptions::Decision::kEnabled) {
1508 EXPECT_TRUE(s.IsMemoryLimit());
1509 EXPECT_TRUE(s.ToString().find(
1510 kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
1511 CacheEntryRole::kBlockBasedTableReader)]) !=
1512 std::string::npos);
1513 EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") !=
1514 std::string::npos);
1515
1516 } else {
1517 EXPECT_TRUE(s.ok());
1518 ASSERT_EQ("5,0,5", FilesPerLevel(0));
1519 }
1520 }
1521 }
1522}
1523
7c673cae 1524TEST_F(DBSSTTest, GetTotalSstFilesSize) {
11fdf7f2
TL
1525 // We don't propagate oldest-key-time table property on compaction and
1526 // just write 0 as default value. This affect the exact table size, since
1527 // we encode table properties as varint64. Force time to be 0 to work around
1528 // it. Should remove the workaround after we propagate the property on
1529 // compaction.
20effc67
TL
1530 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1531 "FlushJob::WriteLevel0Table:oldest_ancester_time", [&](void* arg) {
1532 uint64_t* current_time = static_cast<uint64_t*>(arg);
1533 *current_time = 0;
1534 });
1535 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2 1536
7c673cae
FG
1537 Options options = CurrentOptions();
1538 options.disable_auto_compactions = true;
1539 options.compression = kNoCompression;
1540 DestroyAndReopen(options);
1541 // Generate 5 files in L0
1542 for (int i = 0; i < 5; i++) {
1543 for (int j = 0; j < 10; j++) {
1e59de90 1544 std::string val = "val_file_" + std::to_string(i);
7c673cae
FG
1545 ASSERT_OK(Put(Key(j), val));
1546 }
1e59de90 1547 ASSERT_OK(Flush());
7c673cae
FG
1548 }
1549 ASSERT_EQ("5", FilesPerLevel(0));
1550
1551 std::vector<LiveFileMetaData> live_files_meta;
1552 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1553 ASSERT_EQ(live_files_meta.size(), 5);
1554 uint64_t single_file_size = live_files_meta[0].size;
1555
1556 uint64_t live_sst_files_size = 0;
1557 uint64_t total_sst_files_size = 0;
1558 for (const auto& file_meta : live_files_meta) {
1559 live_sst_files_size += file_meta.size;
1560 }
1561
1562 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1563 &total_sst_files_size));
1564 // Live SST files = 5
1565 // Total SST files = 5
1566 ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
1567 ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
1568
1569 // hold current version
1570 std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
1e59de90 1571 ASSERT_OK(iter1->status());
7c673cae
FG
1572
1573 // Compact 5 files into 1 file in L0
1574 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1575 ASSERT_EQ("0,1", FilesPerLevel(0));
1576
1577 live_files_meta.clear();
1578 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1579 ASSERT_EQ(live_files_meta.size(), 1);
1580
1581 live_sst_files_size = 0;
1582 total_sst_files_size = 0;
1583 for (const auto& file_meta : live_files_meta) {
1584 live_sst_files_size += file_meta.size;
1585 }
1586 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1587 &total_sst_files_size));
1588 // Live SST files = 1 (compacted file)
1589 // Total SST files = 6 (5 original files + compacted file)
1590 ASSERT_EQ(live_sst_files_size, 1 * single_file_size);
1591 ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
1592
1593 // hold current version
1594 std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
1e59de90 1595 ASSERT_OK(iter2->status());
7c673cae
FG
1596
1597 // Delete all keys and compact, this will delete all live files
1598 for (int i = 0; i < 10; i++) {
1599 ASSERT_OK(Delete(Key(i)));
1600 }
1e59de90 1601 ASSERT_OK(Flush());
7c673cae
FG
1602 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1603 ASSERT_EQ("", FilesPerLevel(0));
1604
1605 live_files_meta.clear();
1606 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1607 ASSERT_EQ(live_files_meta.size(), 0);
1608
1609 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1610 &total_sst_files_size));
1611 // Live SST files = 0
1612 // Total SST files = 6 (5 original files + compacted file)
1613 ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
1614
1e59de90 1615 ASSERT_OK(iter1->status());
7c673cae
FG
1616 iter1.reset();
1617 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1618 &total_sst_files_size));
1619 // Live SST files = 0
1620 // Total SST files = 1 (compacted file)
1621 ASSERT_EQ(total_sst_files_size, 1 * single_file_size);
1622
1e59de90 1623 ASSERT_OK(iter2->status());
7c673cae
FG
1624 iter2.reset();
1625 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1626 &total_sst_files_size));
1627 // Live SST files = 0
1628 // Total SST files = 0
1629 ASSERT_EQ(total_sst_files_size, 0);
11fdf7f2 1630
20effc67 1631 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
1632}
1633
1e59de90
TL
1634TEST_F(DBSSTTest, OpenDBWithoutGetFileSizeInvocations) {
1635 Options options = CurrentOptions();
1636 std::unique_ptr<MockEnv> env{MockEnv::Create(Env::Default())};
1637 options.env = env.get();
1638 options.disable_auto_compactions = true;
1639 options.compression = kNoCompression;
1640 options.enable_blob_files = true;
1641 options.blob_file_size = 32; // create one blob per file
1642 options.skip_checking_sst_file_sizes_on_db_open = true;
1643
1644 DestroyAndReopen(options);
1645 // Generate 5 files in L0
1646 for (int i = 0; i < 5; i++) {
1647 for (int j = 0; j < 10; j++) {
1648 std::string val = "val_file_" + std::to_string(i);
1649 ASSERT_OK(Put(Key(j), val));
1650 }
1651 ASSERT_OK(Flush());
1652 }
1653 Close();
1654
1655 bool is_get_file_size_called = false;
1656 SyncPoint::GetInstance()->SetCallBack(
1657 "MockFileSystem::GetFileSize:CheckFileType", [&](void* arg) {
1658 std::string* filename = reinterpret_cast<std::string*>(arg);
1659 if (filename->find(".blob") != std::string::npos) {
1660 is_get_file_size_called = true;
1661 }
1662 });
1663
1664 SyncPoint::GetInstance()->EnableProcessing();
1665 Reopen(options);
1666 ASSERT_FALSE(is_get_file_size_called);
1667 SyncPoint::GetInstance()->DisableProcessing();
1668 SyncPoint::GetInstance()->ClearAllCallBacks();
1669
1670 Destroy(options);
1671}
1672
7c673cae
FG
1673TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) {
1674 Options options = CurrentOptions();
1675 options.disable_auto_compactions = true;
1676 options.compression = kNoCompression;
1677 DestroyAndReopen(options);
1678 // Generate 5 files in L0
1679 for (int i = 0; i < 5; i++) {
1680 ASSERT_OK(Put(Key(i), "val"));
1e59de90 1681 ASSERT_OK(Flush());
7c673cae
FG
1682 }
1683 ASSERT_EQ("5", FilesPerLevel(0));
1684
1685 std::vector<LiveFileMetaData> live_files_meta;
1686 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1687 ASSERT_EQ(live_files_meta.size(), 5);
1688 uint64_t single_file_size = live_files_meta[0].size;
1689
1690 uint64_t live_sst_files_size = 0;
1691 uint64_t total_sst_files_size = 0;
1692 for (const auto& file_meta : live_files_meta) {
1693 live_sst_files_size += file_meta.size;
1694 }
1695
1696 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1697 &total_sst_files_size));
1698
1699 // Live SST files = 5
1700 // Total SST files = 5
1701 ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
1702 ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
1703
1704 // hold current version
1705 std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
1e59de90 1706 ASSERT_OK(iter1->status());
7c673cae
FG
1707
1708 // Compaction will do trivial move from L0 to L1
1709 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1710 ASSERT_EQ("0,5", FilesPerLevel(0));
1711
1712 live_files_meta.clear();
1713 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1714 ASSERT_EQ(live_files_meta.size(), 5);
1715
1716 live_sst_files_size = 0;
1717 total_sst_files_size = 0;
1718 for (const auto& file_meta : live_files_meta) {
1719 live_sst_files_size += file_meta.size;
1720 }
1721 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1722 &total_sst_files_size));
1723 // Live SST files = 5
1724 // Total SST files = 5 (used in 2 version)
1725 ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
1726 ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
1727
1728 // hold current version
1729 std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
1e59de90 1730 ASSERT_OK(iter2->status());
7c673cae
FG
1731
1732 // Delete all keys and compact, this will delete all live files
1733 for (int i = 0; i < 5; i++) {
1734 ASSERT_OK(Delete(Key(i)));
1735 }
1e59de90 1736 ASSERT_OK(Flush());
7c673cae
FG
1737 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1738 ASSERT_EQ("", FilesPerLevel(0));
1739
1740 live_files_meta.clear();
1741 dbfull()->GetLiveFilesMetaData(&live_files_meta);
1742 ASSERT_EQ(live_files_meta.size(), 0);
1743
1744 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1745 &total_sst_files_size));
1746 // Live SST files = 0
1747 // Total SST files = 5 (used in 2 version)
1748 ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
1749
1e59de90 1750 ASSERT_OK(iter1->status());
7c673cae 1751 iter1.reset();
1e59de90 1752 ASSERT_OK(iter2->status());
7c673cae
FG
1753 iter2.reset();
1754
1755 ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
1756 &total_sst_files_size));
1757 // Live SST files = 0
1758 // Total SST files = 0
1759 ASSERT_EQ(total_sst_files_size, 0);
1760}
1761
1e59de90
TL
1762// This test if blob files are recorded by SST File Manager when Compaction job
1763// creates/delete them and in case of AtomicFlush.
1764TEST_F(DBSSTTest, DBWithSFMForBlobFilesAtomicFlush) {
1765 std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
1766 auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
1767 Options options = CurrentOptions();
1768 options.sst_file_manager = sst_file_manager;
1769 options.enable_blob_files = true;
1770 options.min_blob_size = 0;
1771 options.disable_auto_compactions = true;
1772 options.enable_blob_garbage_collection = true;
1773 options.blob_garbage_collection_age_cutoff = 0.5;
1774 options.atomic_flush = true;
1775
1776 int files_added = 0;
1777 int files_deleted = 0;
1778 int files_scheduled_to_delete = 0;
1779
1780 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1781 "SstFileManagerImpl::OnAddFile", [&](void* arg) {
1782 const std::string* const file_path =
1783 static_cast<const std::string*>(arg);
1784 if (EndsWith(*file_path, ".blob")) {
1785 files_added++;
1786 }
1787 });
1788
1789 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1790 "SstFileManagerImpl::OnDeleteFile", [&](void* arg) {
1791 const std::string* const file_path =
1792 static_cast<const std::string*>(arg);
1793 if (EndsWith(*file_path, ".blob")) {
1794 files_deleted++;
1795 }
1796 });
1797
1798 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1799 "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) {
1800 assert(arg);
1801 const std::string* const file_path =
1802 static_cast<const std::string*>(arg);
1803 if (EndsWith(*file_path, ".blob")) {
1804 ++files_scheduled_to_delete;
1805 }
1806 });
1807
1808 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1809
1810 DestroyAndReopen(options);
1811 Random rnd(301);
1812
1813 ASSERT_OK(Put("key_1", "value_1"));
1814 ASSERT_OK(Put("key_2", "value_2"));
1815 ASSERT_OK(Put("key_3", "value_3"));
1816 ASSERT_OK(Put("key_4", "value_4"));
1817 ASSERT_OK(Flush());
1818
1819 // Overwrite will create the garbage data.
1820 ASSERT_OK(Put("key_3", "new_value_3"));
1821 ASSERT_OK(Put("key_4", "new_value_4"));
1822 ASSERT_OK(Flush());
1823
1824 ASSERT_OK(Put("Key5", "blob_value5"));
1825 ASSERT_OK(Put("Key6", "blob_value6"));
1826 ASSERT_OK(Flush());
1827
1828 ASSERT_EQ(files_added, 3);
1829 ASSERT_EQ(files_deleted, 0);
1830 ASSERT_EQ(files_scheduled_to_delete, 0);
1831 files_added = 0;
1832
1833 constexpr Slice* begin = nullptr;
1834 constexpr Slice* end = nullptr;
1835 // Compaction job will create a new file and delete the older files.
1836 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
1837 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1838
1839 ASSERT_EQ(files_added, 1);
1840 ASSERT_EQ(files_scheduled_to_delete, 1);
1841
1842 sfm->WaitForEmptyTrash();
1843
1844 ASSERT_EQ(files_deleted, 1);
1845
1846 Close();
1847 ASSERT_OK(DestroyDB(dbname_, options));
1848
1849 ASSERT_EQ(files_scheduled_to_delete, 4);
1850
1851 sfm->WaitForEmptyTrash();
1852
1853 ASSERT_EQ(files_deleted, 4);
1854
1855 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1856 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
1857}
1858
7c673cae
FG
1859#endif // ROCKSDB_LITE
1860
f67539c2 1861} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
1862
1863int main(int argc, char** argv) {
f67539c2 1864 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae 1865 ::testing::InitGoogleTest(&argc, argv);
1e59de90 1866 RegisterCustomObjects(argc, argv);
7c673cae
FG
1867 return RUN_ALL_TESTS();
1868}