1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 // Introduction of SyncPoint effectively disabled building and running this test
12 // which is a pity, it is a good test
18 #include <unordered_set>
28 #include "cache/lru_cache.h"
29 #include "db/blob/blob_index.h"
30 #include "db/blob/blob_log_format.h"
31 #include "db/db_impl/db_impl.h"
32 #include "db/db_test_util.h"
33 #include "db/dbformat.h"
34 #include "db/job_context.h"
35 #include "db/version_set.h"
36 #include "db/write_batch_internal.h"
37 #include "env/mock_env.h"
38 #include "file/filename.h"
39 #include "monitoring/thread_status_util.h"
40 #include "port/port.h"
41 #include "port/stack_trace.h"
42 #include "rocksdb/cache.h"
43 #include "rocksdb/compaction_filter.h"
44 #include "rocksdb/convenience.h"
45 #include "rocksdb/db.h"
46 #include "rocksdb/env.h"
47 #include "rocksdb/experimental.h"
48 #include "rocksdb/filter_policy.h"
49 #include "rocksdb/options.h"
50 #include "rocksdb/perf_context.h"
51 #include "rocksdb/slice.h"
52 #include "rocksdb/slice_transform.h"
53 #include "rocksdb/snapshot.h"
54 #include "rocksdb/table.h"
55 #include "rocksdb/table_properties.h"
56 #include "rocksdb/thread_status.h"
57 #include "rocksdb/types.h"
58 #include "rocksdb/utilities/checkpoint.h"
59 #include "rocksdb/utilities/optimistic_transaction_db.h"
60 #include "rocksdb/utilities/write_batch_with_index.h"
61 #include "table/mock_table.h"
62 #include "table/scoped_arena_iterator.h"
63 #include "test_util/sync_point.h"
64 #include "test_util/testharness.h"
65 #include "test_util/testutil.h"
66 #include "util/compression.h"
67 #include "util/mutexlock.h"
68 #include "util/random.h"
69 #include "util/rate_limiter.h"
70 #include "util/string_util.h"
71 #include "utilities/merge_operators.h"
73 namespace ROCKSDB_NAMESPACE
{
75 // Note that whole DBTest and its child classes disable fsync on files
76 // and directories for speed.
77 // If fsync needs to be covered in a test, put it in other places.
78 class DBTest
: public DBTestBase
{
80 DBTest() : DBTestBase("db_test", /*env_do_fsync=*/false) {}
85 public testing::WithParamInterface
<std::tuple
<uint32_t, bool>> {
88 max_subcompactions_
= std::get
<0>(GetParam());
89 exclusive_manual_compaction_
= std::get
<1>(GetParam());
92 // Required if inheriting from testing::WithParamInterface<>
93 static void SetUpTestCase() {}
94 static void TearDownTestCase() {}
96 uint32_t max_subcompactions_
;
97 bool exclusive_manual_compaction_
;
100 TEST_F(DBTest
, MockEnvTest
) {
101 std::unique_ptr
<MockEnv
> env
{MockEnv::Create(Env::Default())};
103 options
.create_if_missing
= true;
104 options
.env
= env
.get();
107 const Slice keys
[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
108 const Slice vals
[] = {Slice("foo"), Slice("bar"), Slice("baz")};
110 ASSERT_OK(DB::Open(options
, "/dir/db", &db
));
111 for (size_t i
= 0; i
< 3; ++i
) {
112 ASSERT_OK(db
->Put(WriteOptions(), keys
[i
], vals
[i
]));
115 for (size_t i
= 0; i
< 3; ++i
) {
117 ASSERT_OK(db
->Get(ReadOptions(), keys
[i
], &res
));
118 ASSERT_TRUE(res
== vals
[i
]);
121 Iterator
* iterator
= db
->NewIterator(ReadOptions());
122 iterator
->SeekToFirst();
123 for (size_t i
= 0; i
< 3; ++i
) {
124 ASSERT_TRUE(iterator
->Valid());
125 ASSERT_TRUE(keys
[i
] == iterator
->key());
126 ASSERT_TRUE(vals
[i
] == iterator
->value());
129 ASSERT_TRUE(!iterator
->Valid());
132 // TEST_FlushMemTable() is not supported in ROCKSDB_LITE
134 DBImpl
* dbi
= static_cast_with_check
<DBImpl
>(db
);
135 ASSERT_OK(dbi
->TEST_FlushMemTable());
137 for (size_t i
= 0; i
< 3; ++i
) {
139 ASSERT_OK(db
->Get(ReadOptions(), keys
[i
], &res
));
140 ASSERT_TRUE(res
== vals
[i
]);
142 #endif // ROCKSDB_LITE
147 // NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
150 TEST_F(DBTest
, MemEnvTest
) {
151 std::unique_ptr
<Env
> env
{NewMemEnv(Env::Default())};
153 options
.create_if_missing
= true;
154 options
.env
= env
.get();
157 const Slice keys
[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
158 const Slice vals
[] = {Slice("foo"), Slice("bar"), Slice("baz")};
160 ASSERT_OK(DB::Open(options
, "/dir/db", &db
));
161 for (size_t i
= 0; i
< 3; ++i
) {
162 ASSERT_OK(db
->Put(WriteOptions(), keys
[i
], vals
[i
]));
165 for (size_t i
= 0; i
< 3; ++i
) {
167 ASSERT_OK(db
->Get(ReadOptions(), keys
[i
], &res
));
168 ASSERT_TRUE(res
== vals
[i
]);
171 Iterator
* iterator
= db
->NewIterator(ReadOptions());
172 iterator
->SeekToFirst();
173 for (size_t i
= 0; i
< 3; ++i
) {
174 ASSERT_TRUE(iterator
->Valid());
175 ASSERT_TRUE(keys
[i
] == iterator
->key());
176 ASSERT_TRUE(vals
[i
] == iterator
->value());
179 ASSERT_TRUE(!iterator
->Valid());
182 DBImpl
* dbi
= static_cast_with_check
<DBImpl
>(db
);
183 ASSERT_OK(dbi
->TEST_FlushMemTable());
185 for (size_t i
= 0; i
< 3; ++i
) {
187 ASSERT_OK(db
->Get(ReadOptions(), keys
[i
], &res
));
188 ASSERT_TRUE(res
== vals
[i
]);
193 options
.create_if_missing
= false;
194 ASSERT_OK(DB::Open(options
, "/dir/db", &db
));
195 for (size_t i
= 0; i
< 3; ++i
) {
197 ASSERT_OK(db
->Get(ReadOptions(), keys
[i
], &res
));
198 ASSERT_TRUE(res
== vals
[i
]);
202 #endif // ROCKSDB_LITE
204 TEST_F(DBTest
, WriteEmptyBatch
) {
205 Options options
= CurrentOptions();
207 options
.write_buffer_size
= 100000;
208 CreateAndReopenWithCF({"pikachu"}, options
);
210 ASSERT_OK(Put(1, "foo", "bar"));
213 wo
.disableWAL
= false;
214 WriteBatch empty_batch
;
215 ASSERT_OK(dbfull()->Write(wo
, &empty_batch
));
217 // make sure we can re-open it.
218 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options
));
219 ASSERT_EQ("bar", Get(1, "foo"));
222 TEST_F(DBTest
, SkipDelay
) {
223 Options options
= CurrentOptions();
225 options
.write_buffer_size
= 100000;
226 CreateAndReopenWithCF({"pikachu"}, options
);
228 for (bool sync
: {true, false}) {
229 for (bool disableWAL
: {true, false}) {
230 if (sync
&& disableWAL
) {
231 // sync and disableWAL is incompatible.
234 // Use a small number to ensure a large delay that is still effective
236 // TODO(myabandeh): this is time dependent and could potentially make
238 auto token
= dbfull()->TEST_write_controler().GetDelayToken(1);
239 std::atomic
<int> sleep_count(0);
240 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
241 "DBImpl::DelayWrite:Sleep",
242 [&](void* /*arg*/) { sleep_count
.fetch_add(1); });
243 std::atomic
<int> wait_count(0);
244 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
245 "DBImpl::DelayWrite:Wait",
246 [&](void* /*arg*/) { wait_count
.fetch_add(1); });
247 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
251 wo
.disableWAL
= disableWAL
;
252 wo
.no_slowdown
= true;
253 // Large enough to exceed allowance for one time interval
254 std::string
large_value(1024, 'x');
255 // Perhaps ideally this first write would fail because of delay, but
256 // the current implementation does not guarantee that.
257 dbfull()->Put(wo
, "foo", large_value
).PermitUncheckedError();
258 // We need the 2nd write to trigger delay. This is because delay is
259 // estimated based on the last write size which is 0 for the first write.
260 ASSERT_NOK(dbfull()->Put(wo
, "foo2", large_value
));
261 ASSERT_GE(sleep_count
.load(), 0);
262 ASSERT_GE(wait_count
.load(), 0);
265 token
= dbfull()->TEST_write_controler().GetDelayToken(1000000);
266 wo
.no_slowdown
= false;
267 ASSERT_OK(dbfull()->Put(wo
, "foo3", large_value
));
268 ASSERT_GE(sleep_count
.load(), 1);
274 TEST_F(DBTest
, MixedSlowdownOptions
) {
275 Options options
= CurrentOptions();
277 options
.write_buffer_size
= 100000;
278 CreateAndReopenWithCF({"pikachu"}, options
);
279 std::vector
<port::Thread
> threads
;
280 std::atomic
<int> thread_num(0);
282 std::function
<void()> write_slowdown_func
= [&]() {
283 int a
= thread_num
.fetch_add(1);
284 std::string key
= "foo" + std::to_string(a
);
286 wo
.no_slowdown
= false;
287 ASSERT_OK(dbfull()->Put(wo
, key
, "bar"));
289 std::function
<void()> write_no_slowdown_func
= [&]() {
290 int a
= thread_num
.fetch_add(1);
291 std::string key
= "foo" + std::to_string(a
);
293 wo
.no_slowdown
= true;
294 ASSERT_NOK(dbfull()->Put(wo
, key
, "bar"));
296 // Use a small number to ensure a large delay that is still effective
298 // TODO(myabandeh): this is time dependent and could potentially make
300 auto token
= dbfull()->TEST_write_controler().GetDelayToken(1);
301 std::atomic
<int> sleep_count(0);
302 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
303 "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) {
304 sleep_count
.fetch_add(1);
305 if (threads
.empty()) {
306 for (int i
= 0; i
< 2; ++i
) {
307 threads
.emplace_back(write_slowdown_func
);
309 for (int i
= 0; i
< 2; ++i
) {
310 threads
.emplace_back(write_no_slowdown_func
);
314 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
318 wo
.disableWAL
= false;
319 wo
.no_slowdown
= false;
320 ASSERT_OK(dbfull()->Put(wo
, "foo", "bar"));
321 // We need the 2nd write to trigger delay. This is because delay is
322 // estimated based on the last write size which is 0 for the first write.
323 ASSERT_OK(dbfull()->Put(wo
, "foo2", "bar2"));
326 for (auto& t
: threads
) {
329 ASSERT_GE(sleep_count
.load(), 1);
331 wo
.no_slowdown
= true;
332 ASSERT_OK(dbfull()->Put(wo
, "foo3", "bar"));
335 TEST_F(DBTest
, MixedSlowdownOptionsInQueue
) {
336 Options options
= CurrentOptions();
338 options
.write_buffer_size
= 100000;
339 CreateAndReopenWithCF({"pikachu"}, options
);
340 std::vector
<port::Thread
> threads
;
341 std::atomic
<int> thread_num(0);
343 std::function
<void()> write_no_slowdown_func
= [&]() {
344 int a
= thread_num
.fetch_add(1);
345 std::string key
= "foo" + std::to_string(a
);
347 wo
.no_slowdown
= true;
348 ASSERT_NOK(dbfull()->Put(wo
, key
, "bar"));
350 // Use a small number to ensure a large delay that is still effective
352 // TODO(myabandeh): this is time dependent and could potentially make
354 auto token
= dbfull()->TEST_write_controler().GetDelayToken(1);
355 std::atomic
<int> sleep_count(0);
356 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
357 "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) {
358 sleep_count
.fetch_add(1);
359 if (threads
.empty()) {
360 for (int i
= 0; i
< 2; ++i
) {
361 threads
.emplace_back(write_no_slowdown_func
);
363 // Sleep for 2s to allow the threads to insert themselves into the
365 env_
->SleepForMicroseconds(3000000ULL);
368 std::atomic
<int> wait_count(0);
369 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
370 "DBImpl::DelayWrite:Wait",
371 [&](void* /*arg*/) { wait_count
.fetch_add(1); });
372 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
376 wo
.disableWAL
= false;
377 wo
.no_slowdown
= false;
378 ASSERT_OK(dbfull()->Put(wo
, "foo", "bar"));
379 // We need the 2nd write to trigger delay. This is because delay is
380 // estimated based on the last write size which is 0 for the first write.
381 ASSERT_OK(dbfull()->Put(wo
, "foo2", "bar2"));
384 for (auto& t
: threads
) {
387 ASSERT_EQ(sleep_count
.load(), 1);
388 ASSERT_GE(wait_count
.load(), 0);
391 TEST_F(DBTest
, MixedSlowdownOptionsStop
) {
392 Options options
= CurrentOptions();
394 options
.write_buffer_size
= 100000;
395 CreateAndReopenWithCF({"pikachu"}, options
);
396 std::vector
<port::Thread
> threads
;
397 std::atomic
<int> thread_num(0);
399 std::function
<void()> write_slowdown_func
= [&]() {
400 int a
= thread_num
.fetch_add(1);
401 std::string key
= "foo" + std::to_string(a
);
403 wo
.no_slowdown
= false;
404 ASSERT_OK(dbfull()->Put(wo
, key
, "bar"));
406 std::function
<void()> write_no_slowdown_func
= [&]() {
407 int a
= thread_num
.fetch_add(1);
408 std::string key
= "foo" + std::to_string(a
);
410 wo
.no_slowdown
= true;
411 ASSERT_NOK(dbfull()->Put(wo
, key
, "bar"));
413 std::function
<void()> wakeup_writer
= [&]() {
414 dbfull()->mutex_
.Lock();
415 dbfull()->bg_cv_
.SignalAll();
416 dbfull()->mutex_
.Unlock();
418 // Use a small number to ensure a large delay that is still effective
420 // TODO(myabandeh): this is time dependent and could potentially make
422 auto token
= dbfull()->TEST_write_controler().GetStopToken();
423 std::atomic
<int> wait_count(0);
424 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
425 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
426 wait_count
.fetch_add(1);
427 if (threads
.empty()) {
428 for (int i
= 0; i
< 2; ++i
) {
429 threads
.emplace_back(write_slowdown_func
);
431 for (int i
= 0; i
< 2; ++i
) {
432 threads
.emplace_back(write_no_slowdown_func
);
434 // Sleep for 2s to allow the threads to insert themselves into the
436 env_
->SleepForMicroseconds(3000000ULL);
439 threads
.emplace_back(wakeup_writer
);
441 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
445 wo
.disableWAL
= false;
446 wo
.no_slowdown
= false;
447 ASSERT_OK(dbfull()->Put(wo
, "foo", "bar"));
448 // We need the 2nd write to trigger delay. This is because delay is
449 // estimated based on the last write size which is 0 for the first write.
450 ASSERT_OK(dbfull()->Put(wo
, "foo2", "bar2"));
453 for (auto& t
: threads
) {
456 ASSERT_GE(wait_count
.load(), 1);
458 wo
.no_slowdown
= true;
459 ASSERT_OK(dbfull()->Put(wo
, "foo3", "bar"));
463 TEST_F(DBTest
, LevelLimitReopen
) {
464 Options options
= CurrentOptions();
465 CreateAndReopenWithCF({"pikachu"}, options
);
467 const std::string
value(1024 * 1024, ' ');
469 while (NumTableFilesAtLevel(2, 1) == 0) {
470 ASSERT_OK(Put(1, Key(i
++), value
));
473 options
.num_levels
= 1;
474 options
.max_bytes_for_level_multiplier_additional
.resize(1, 1);
475 Status s
= TryReopenWithColumnFamilies({"default", "pikachu"}, options
);
476 ASSERT_EQ(s
.IsInvalidArgument(), true);
477 ASSERT_EQ(s
.ToString(),
478 "Invalid argument: db has more levels than options.num_levels");
480 options
.num_levels
= 10;
481 options
.max_bytes_for_level_multiplier_additional
.resize(10, 1);
482 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options
));
484 #endif // ROCKSDB_LITE
487 TEST_F(DBTest
, LevelReopenWithFIFO
) {
488 const int kLevelCount
= 4;
489 const int kKeyCount
= 5;
490 const int kTotalSstFileCount
= kLevelCount
* kKeyCount
;
493 Options options
= CurrentOptions();
494 // Config level0_file_num_compaction_trigger to prevent L0 files being
495 // automatically compacted while we are constructing a LSM tree structure
496 // to test multi-level FIFO compaction.
497 options
.level0_file_num_compaction_trigger
= kKeyCount
+ 1;
498 CreateAndReopenWithCF({"pikachu"}, options
);
500 // The expected number of files per level after each file creation.
501 const std::string expected_files_per_level
[kLevelCount
][kKeyCount
] = {
502 {"0,0,0,1", "0,0,0,2", "0,0,0,3", "0,0,0,4", "0,0,0,5"},
503 {"0,0,1,5", "0,0,2,5", "0,0,3,5", "0,0,4,5", "0,0,5,5"},
504 {"0,1,5,5", "0,2,5,5", "0,3,5,5", "0,4,5,5", "0,5,5,5"},
505 {"1,5,5,5", "2,5,5,5", "3,5,5,5", "4,5,5,5", "5,5,5,5"},
508 const std::string expected_entries
[kKeyCount
][kLevelCount
+ 1] = {
509 {"[ ]", "[ a3 ]", "[ a2, a3 ]", "[ a1, a2, a3 ]", "[ a0, a1, a2, a3 ]"},
510 {"[ ]", "[ b3 ]", "[ b2, b3 ]", "[ b1, b2, b3 ]", "[ b0, b1, b2, b3 ]"},
511 {"[ ]", "[ c3 ]", "[ c2, c3 ]", "[ c1, c2, c3 ]", "[ c0, c1, c2, c3 ]"},
512 {"[ ]", "[ d3 ]", "[ d2, d3 ]", "[ d1, d2, d3 ]", "[ d0, d1, d2, d3 ]"},
513 {"[ ]", "[ e3 ]", "[ e2, e3 ]", "[ e1, e2, e3 ]", "[ e0, e1, e2, e3 ]"},
516 // The loop below creates the following LSM tree where each (k, v) pair
517 // represents a file that contains that entry. When a file is created,
518 // the db is reopend with FIFO compaction and verified the LSM tree
519 // structure is still the same.
521 // The resulting LSM tree will contain 5 different keys. Each key as
522 // 4 different versions, located in different level.
524 // L0: (e, e0) (d, d0) (c, c0) (b, b0) (a, a0)
525 // L1: (a, a1) (b, b1) (c, c1) (d, d1) (e, e1)
526 // L2: (a, a2) (b, b2) (c, c2) (d, d2) (e, e2)
527 // L3: (a, a3) (b, b3) (c, c3) (d, d3) (e, e3)
528 for (int l
= 0; l
< kLevelCount
; ++l
) {
529 int level
= kLevelCount
- 1 - l
;
530 for (int p
= 0; p
< kKeyCount
; ++p
) {
531 std::string put_key
= std::string(1, char('a' + p
));
532 ASSERT_OK(Put(kCF
, put_key
, put_key
+ std::to_string(level
)));
533 ASSERT_OK(Flush(kCF
));
534 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
535 for (int g
= 0; g
< kKeyCount
; ++g
) {
536 int entry_count
= (p
>= g
) ? l
+ 1 : l
;
537 std::string get_key
= std::string(1, char('a' + g
));
538 CheckAllEntriesWithFifoReopen(expected_entries
[g
][entry_count
], get_key
,
539 kCF
, {"pikachu"}, options
);
542 MoveFilesToLevel(level
, kCF
);
543 for (int g
= 0; g
< kKeyCount
; ++g
) {
544 int entry_count
= (p
>= g
) ? l
+ 1 : l
;
545 std::string get_key
= std::string(1, char('a' + g
));
546 CheckAllEntriesWithFifoReopen(expected_entries
[g
][entry_count
],
547 get_key
, kCF
, {"pikachu"}, options
);
550 ASSERT_EQ(expected_files_per_level
[l
][p
], FilesPerLevel(kCF
));
554 // The expected number of sst files in each level after each FIFO compaction
555 // that deletes the oldest sst file.
556 const std::string expected_files_per_level_after_fifo
[] = {
557 "5,5,5,4", "5,5,5,3", "5,5,5,2", "5,5,5,1", "5,5,5", "5,5,4", "5,5,3",
558 "5,5,2", "5,5,1", "5,5", "5,4", "5,3", "5,2", "5,1",
559 "5", "4", "3", "2", "1", "",
562 // The expected value entries of each key after each FIFO compaction.
563 // This verifies whether FIFO removes the file with the smallest key in non-L0
564 // files first then the oldest files in L0.
565 const std::string expected_entries_after_fifo
[kKeyCount
][kLevelCount
+ 1] = {
566 {"[ a0, a1, a2, a3 ]", "[ a0, a1, a2 ]", "[ a0, a1 ]", "[ a0 ]", "[ ]"},
567 {"[ b0, b1, b2, b3 ]", "[ b0, b1, b2 ]", "[ b0, b1 ]", "[ b0 ]", "[ ]"},
568 {"[ c0, c1, c2, c3 ]", "[ c0, c1, c2 ]", "[ c0, c1 ]", "[ c0 ]", "[ ]"},
569 {"[ d0, d1, d2, d3 ]", "[ d0, d1, d2 ]", "[ d0, d1 ]", "[ d0 ]", "[ ]"},
570 {"[ e0, e1, e2, e3 ]", "[ e0, e1, e2 ]", "[ e0, e1 ]", "[ e0 ]", "[ ]"},
573 // In the 2nd phase, we reopen the DB with FIFO compaction. In each reopen,
574 // we config max_table_files_size so that FIFO will remove exactly one file
575 // at a time upon compaction, and we will use it to verify whether the sst
576 // files are deleted in the correct order.
577 for (int i
= 0; i
< kTotalSstFileCount
; ++i
) {
578 uint64_t total_sst_files_size
= 0;
579 ASSERT_TRUE(dbfull()->GetIntProperty(
580 handles_
[1], "rocksdb.total-sst-files-size", &total_sst_files_size
));
581 ASSERT_TRUE(total_sst_files_size
> 0);
583 Options
fifo_options(options
);
584 fifo_options
.compaction_style
= kCompactionStyleFIFO
;
585 options
.create_if_missing
= false;
586 fifo_options
.max_open_files
= -1;
587 fifo_options
.disable_auto_compactions
= false;
588 // Config max_table_files_size to be total_sst_files_size - 1 so that
589 // FIFO will delete one file.
590 fifo_options
.compaction_options_fifo
.max_table_files_size
=
591 total_sst_files_size
- 1;
593 TryReopenWithColumnFamilies({"default", "pikachu"}, fifo_options
));
594 // For FIFO to pick a compaction
595 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_
[1]));
596 ASSERT_OK(dbfull()->TEST_WaitForCompact(false));
597 for (int g
= 0; g
< kKeyCount
; ++g
) {
598 std::string get_key
= std::string(1, char('a' + g
));
599 int status_index
= i
/ kKeyCount
;
600 if ((i
% kKeyCount
) >= g
) {
601 // If true, then it means the sst file containing the get_key in the
602 // current level has already been deleted, so we need to move the
603 // status_index for checking the expected value.
606 CheckAllEntriesWithFifoReopen(
607 expected_entries_after_fifo
[g
][status_index
], get_key
, kCF
,
608 {"pikachu"}, options
);
610 ASSERT_EQ(expected_files_per_level_after_fifo
[i
], FilesPerLevel(kCF
));
613 #endif // !ROCKSDB_LITE
615 TEST_F(DBTest
, PutSingleDeleteGet
) {
617 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
618 ASSERT_OK(Put(1, "foo", "v1"));
619 ASSERT_EQ("v1", Get(1, "foo"));
620 ASSERT_OK(Put(1, "foo2", "v2"));
621 ASSERT_EQ("v2", Get(1, "foo2"));
622 ASSERT_OK(SingleDelete(1, "foo"));
623 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
624 // Skip FIFO and universal compaction because they do not apply to the test
625 // case. Skip MergePut because single delete does not get removed when it
626 // encounters a merge.
627 } while (ChangeOptions(kSkipFIFOCompaction
| kSkipUniversalCompaction
|
631 TEST_F(DBTest
, ReadFromPersistedTier
) {
634 Options options
= CurrentOptions();
635 for (int disableWAL
= 0; disableWAL
<= 1; ++disableWAL
) {
636 CreateAndReopenWithCF({"pikachu"}, options
);
638 wopt
.disableWAL
= (disableWAL
== 1);
639 // 1st round: put but not flush
640 ASSERT_OK(db_
->Put(wopt
, handles_
[1], "foo", "first"));
641 ASSERT_OK(db_
->Put(wopt
, handles_
[1], "bar", "one"));
642 ASSERT_EQ("first", Get(1, "foo"));
643 ASSERT_EQ("one", Get(1, "bar"));
645 // Read directly from persited data.
647 ropt
.read_tier
= kPersistedTier
;
649 if (wopt
.disableWAL
) {
650 // as data has not yet being flushed, we expect not found.
651 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "foo", &value
).IsNotFound());
652 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "bar", &value
).IsNotFound());
654 ASSERT_OK(db_
->Get(ropt
, handles_
[1], "foo", &value
));
655 ASSERT_OK(db_
->Get(ropt
, handles_
[1], "bar", &value
));
659 std::vector
<ColumnFamilyHandle
*> multiget_cfs
;
660 multiget_cfs
.push_back(handles_
[1]);
661 multiget_cfs
.push_back(handles_
[1]);
662 std::vector
<Slice
> multiget_keys
;
663 multiget_keys
.push_back("foo");
664 multiget_keys
.push_back("bar");
665 std::vector
<std::string
> multiget_values
;
667 db_
->MultiGet(ropt
, multiget_cfs
, multiget_keys
, &multiget_values
);
668 if (wopt
.disableWAL
) {
669 ASSERT_TRUE(statuses
[0].IsNotFound());
670 ASSERT_TRUE(statuses
[1].IsNotFound());
672 ASSERT_OK(statuses
[0]);
673 ASSERT_OK(statuses
[1]);
676 // 2nd round: flush and put a new value in memtable.
678 ASSERT_OK(db_
->Put(wopt
, handles_
[1], "rocksdb", "hello"));
680 // once the data has been flushed, we are able to get the
681 // data when kPersistedTier is used.
682 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "foo", &value
).ok());
683 ASSERT_EQ(value
, "first");
684 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "bar", &value
).ok());
685 ASSERT_EQ(value
, "one");
686 if (wopt
.disableWAL
) {
688 db_
->Get(ropt
, handles_
[1], "rocksdb", &value
).IsNotFound());
690 ASSERT_OK(db_
->Get(ropt
, handles_
[1], "rocksdb", &value
));
691 ASSERT_EQ(value
, "hello");
694 // Expect same result in multiget
695 multiget_cfs
.push_back(handles_
[1]);
696 multiget_keys
.push_back("rocksdb");
698 db_
->MultiGet(ropt
, multiget_cfs
, multiget_keys
, &multiget_values
);
699 ASSERT_TRUE(statuses
[0].ok());
700 ASSERT_EQ("first", multiget_values
[0]);
701 ASSERT_TRUE(statuses
[1].ok());
702 ASSERT_EQ("one", multiget_values
[1]);
703 if (wopt
.disableWAL
) {
704 ASSERT_TRUE(statuses
[2].IsNotFound());
706 ASSERT_OK(statuses
[2]);
709 // 3rd round: delete and flush
710 ASSERT_OK(db_
->Delete(wopt
, handles_
[1], "foo"));
712 ASSERT_OK(db_
->Delete(wopt
, handles_
[1], "bar"));
714 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "foo", &value
).IsNotFound());
715 if (wopt
.disableWAL
) {
716 // Still expect finding the value as its delete has not yet being
718 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "bar", &value
).ok());
719 ASSERT_EQ(value
, "one");
721 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "bar", &value
).IsNotFound());
723 ASSERT_TRUE(db_
->Get(ropt
, handles_
[1], "rocksdb", &value
).ok());
724 ASSERT_EQ(value
, "hello");
727 db_
->MultiGet(ropt
, multiget_cfs
, multiget_keys
, &multiget_values
);
728 ASSERT_TRUE(statuses
[0].IsNotFound());
729 if (wopt
.disableWAL
) {
730 ASSERT_TRUE(statuses
[1].ok());
731 ASSERT_EQ("one", multiget_values
[1]);
733 ASSERT_TRUE(statuses
[1].IsNotFound());
735 ASSERT_TRUE(statuses
[2].ok());
736 ASSERT_EQ("hello", multiget_values
[2]);
737 if (wopt
.disableWAL
== 0) {
738 DestroyAndReopen(options
);
741 } while (ChangeOptions());
744 TEST_F(DBTest
, SingleDeleteFlush
) {
745 // Test to check whether flushing preserves a single delete hidden
750 Options options
= CurrentOptions();
751 options
.disable_auto_compactions
= true;
752 CreateAndReopenWithCF({"pikachu"}, options
);
754 // Put values on second level (so that they will not be in the same
755 // compaction as the other operations.
756 ASSERT_OK(Put(1, "foo", "first"));
757 ASSERT_OK(Put(1, "bar", "one"));
759 MoveFilesToLevel(2, 1);
761 // (Single) delete hidden by a put
762 ASSERT_OK(SingleDelete(1, "foo"));
763 ASSERT_OK(Put(1, "foo", "second"));
764 ASSERT_OK(Delete(1, "bar"));
765 ASSERT_OK(Put(1, "bar", "two"));
768 ASSERT_OK(SingleDelete(1, "foo"));
769 ASSERT_OK(Delete(1, "bar"));
772 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_
[1],
775 ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
776 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
777 // Skip FIFO and universal compaction beccaus they do not apply to the test
778 // case. Skip MergePut because single delete does not get removed when it
779 // encounters a merge.
780 } while (ChangeOptions(kSkipFIFOCompaction
| kSkipUniversalCompaction
|
784 TEST_F(DBTest
, SingleDeletePutFlush
) {
785 // Single deletes that encounter the matching put in a flush should get
790 Options options
= CurrentOptions();
791 options
.disable_auto_compactions
= true;
792 CreateAndReopenWithCF({"pikachu"}, options
);
794 ASSERT_OK(Put(1, "foo", Slice()));
795 ASSERT_OK(Put(1, "a", Slice()));
796 ASSERT_OK(SingleDelete(1, "a"));
799 ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
800 // Skip FIFO and universal compaction because they do not apply to the test
801 // case. Skip MergePut because single delete does not get removed when it
802 // encounters a merge.
803 } while (ChangeOptions(kSkipFIFOCompaction
| kSkipUniversalCompaction
|
807 // Disable because not all platform can run it.
808 // It requires more than 9GB memory to run it, With single allocation
810 TEST_F(DBTest
, DISABLED_SanitizeVeryVeryLargeValue
) {
811 const size_t kValueSize
= 4 * size_t{1024 * 1024 * 1024}; // 4GB value
812 std::string
raw(kValueSize
, 'v');
813 Options options
= CurrentOptions();
815 options
.merge_operator
= MergeOperators::CreatePutOperator();
816 options
.write_buffer_size
= 100000; // Small write buffer
817 options
.paranoid_checks
= true;
818 DestroyAndReopen(options
);
820 ASSERT_OK(Put("boo", "v1"));
821 ASSERT_TRUE(Put("foo", raw
).IsInvalidArgument());
822 ASSERT_TRUE(Merge("foo", raw
).IsInvalidArgument());
825 ASSERT_TRUE(wb
.Put("foo", raw
).IsInvalidArgument());
826 ASSERT_TRUE(wb
.Merge("foo", raw
).IsInvalidArgument());
828 Slice value_slice
= raw
;
829 Slice key_slice
= "foo";
830 SliceParts
sp_key(&key_slice
, 1);
831 SliceParts
sp_value(&value_slice
, 1);
833 ASSERT_TRUE(wb
.Put(sp_key
, sp_value
).IsInvalidArgument());
834 ASSERT_TRUE(wb
.Merge(sp_key
, sp_value
).IsInvalidArgument());
837 // Disable because not all platform can run it.
838 // It requires more than 9GB memory to run it, With single allocation
840 TEST_F(DBTest
, DISABLED_VeryLargeValue
) {
841 const size_t kValueSize
= 3221225472u; // 3GB value
842 const size_t kKeySize
= 8388608u; // 8MB key
843 std::string
raw(kValueSize
, 'v');
844 std::string
key1(kKeySize
, 'c');
845 std::string
key2(kKeySize
, 'd');
847 Options options
= CurrentOptions();
849 options
.write_buffer_size
= 100000; // Small write buffer
850 options
.paranoid_checks
= true;
851 DestroyAndReopen(options
);
853 ASSERT_OK(Put("boo", "v1"));
854 ASSERT_OK(Put("foo", "v1"));
855 ASSERT_OK(Put(key1
, raw
));
857 ASSERT_OK(Put(key2
, raw
));
858 dbfull()->TEST_WaitForFlushMemTable();
861 ASSERT_EQ(1, NumTableFilesAtLevel(0));
862 #endif // !ROCKSDB_LITE
865 Status s
= db_
->Get(ReadOptions(), key1
, &value
);
867 ASSERT_EQ(kValueSize
, value
.size());
868 ASSERT_EQ('v', value
[0]);
870 s
= db_
->Get(ReadOptions(), key2
, &value
);
872 ASSERT_EQ(kValueSize
, value
.size());
873 ASSERT_EQ('w', value
[0]);
875 // Compact all files.
877 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
879 // Check DB is not in read-only state.
880 ASSERT_OK(Put("boo", "v1"));
882 s
= db_
->Get(ReadOptions(), key1
, &value
);
884 ASSERT_EQ(kValueSize
, value
.size());
885 ASSERT_EQ('v', value
[0]);
887 s
= db_
->Get(ReadOptions(), key2
, &value
);
889 ASSERT_EQ(kValueSize
, value
.size());
890 ASSERT_EQ('w', value
[0]);
893 TEST_F(DBTest
, GetFromImmutableLayer
) {
895 Options options
= CurrentOptions();
897 CreateAndReopenWithCF({"pikachu"}, options
);
899 ASSERT_OK(Put(1, "foo", "v1"));
900 ASSERT_EQ("v1", Get(1, "foo"));
903 env_
->delay_sstable_sync_
.store(true, std::memory_order_release
);
904 ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
905 ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
906 ASSERT_EQ("v1", Get(1, "foo"));
907 ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
908 // Release sync calls
909 env_
->delay_sstable_sync_
.store(false, std::memory_order_release
);
910 } while (ChangeOptions());
913 TEST_F(DBTest
, GetLevel0Ordering
) {
915 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
916 // Check that we process level-0 files in correct order. The code
917 // below generates two level-0 files where the earlier one comes
918 // before the later one in the level-0 file list since the earlier
919 // one has a smaller "smallest" key.
920 ASSERT_OK(Put(1, "bar", "b"));
921 ASSERT_OK(Put(1, "foo", "v1"));
923 ASSERT_OK(Put(1, "foo", "v2"));
925 ASSERT_EQ("v2", Get(1, "foo"));
926 } while (ChangeOptions());
929 TEST_F(DBTest
, WrongLevel0Config
) {
930 Options options
= CurrentOptions();
932 ASSERT_OK(DestroyDB(dbname_
, options
));
933 options
.level0_stop_writes_trigger
= 1;
934 options
.level0_slowdown_writes_trigger
= 2;
935 options
.level0_file_num_compaction_trigger
= 3;
936 ASSERT_OK(DB::Open(options
, dbname_
, &db_
));
940 TEST_F(DBTest
, GetOrderedByLevels
) {
942 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
943 ASSERT_OK(Put(1, "foo", "v1"));
944 Compact(1, "a", "z");
945 ASSERT_EQ("v1", Get(1, "foo"));
946 ASSERT_OK(Put(1, "foo", "v2"));
947 ASSERT_EQ("v2", Get(1, "foo"));
949 ASSERT_EQ("v2", Get(1, "foo"));
950 } while (ChangeOptions());
953 TEST_F(DBTest
, GetPicksCorrectFile
) {
955 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
956 // Arrange to have multiple files in a non-level-0 level.
957 ASSERT_OK(Put(1, "a", "va"));
958 Compact(1, "a", "b");
959 ASSERT_OK(Put(1, "x", "vx"));
960 Compact(1, "x", "y");
961 ASSERT_OK(Put(1, "f", "vf"));
962 Compact(1, "f", "g");
963 ASSERT_EQ("va", Get(1, "a"));
964 ASSERT_EQ("vf", Get(1, "f"));
965 ASSERT_EQ("vx", Get(1, "x"));
966 } while (ChangeOptions());
969 TEST_F(DBTest
, GetEncountersEmptyLevel
) {
971 Options options
= CurrentOptions();
972 CreateAndReopenWithCF({"pikachu"}, options
);
973 // Arrange for the following to happen:
974 // * sstable A in level 0
975 // * nothing in level 1
976 // * sstable B in level 2
977 // Then do enough Get() calls to arrange for an automatic compaction
978 // of sstable A. A bug would cause the compaction to be marked as
979 // occurring at level 1 (instead of the correct level 0).
981 // Step 1: First place sstables in levels 0 and 2
982 ASSERT_OK(Put(1, "a", "begin"));
983 ASSERT_OK(Put(1, "z", "end"));
985 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_
[1]));
986 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_
[1]));
987 ASSERT_OK(Put(1, "a", "begin"));
988 ASSERT_OK(Put(1, "z", "end"));
990 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
991 ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
993 // Step 2: clear level 1 if necessary.
994 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_
[1]));
995 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
996 ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
997 ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
999 // Step 3: read a bunch of times
1000 for (int i
= 0; i
< 1000; i
++) {
1001 ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
1004 // Step 4: Wait for compaction to finish
1005 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1007 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX
1008 } while (ChangeOptions(kSkipUniversalCompaction
| kSkipFIFOCompaction
));
1010 #endif // ROCKSDB_LITE
1012 TEST_F(DBTest
, FlushMultipleMemtable
) {
1014 Options options
= CurrentOptions();
1015 WriteOptions writeOpt
= WriteOptions();
1016 writeOpt
.disableWAL
= true;
1017 options
.max_write_buffer_number
= 4;
1018 options
.min_write_buffer_number_to_merge
= 3;
1019 options
.max_write_buffer_size_to_maintain
= -1;
1020 CreateAndReopenWithCF({"pikachu"}, options
);
1021 ASSERT_OK(dbfull()->Put(writeOpt
, handles_
[1], "foo", "v1"));
1022 ASSERT_OK(Flush(1));
1023 ASSERT_OK(dbfull()->Put(writeOpt
, handles_
[1], "bar", "v1"));
1025 ASSERT_EQ("v1", Get(1, "foo"));
1026 ASSERT_EQ("v1", Get(1, "bar"));
1027 ASSERT_OK(Flush(1));
1028 } while (ChangeCompactOptions());
1030 #ifndef ROCKSDB_LITE
1031 TEST_F(DBTest
, FlushSchedule
) {
1032 Options options
= CurrentOptions();
1033 options
.disable_auto_compactions
= true;
1034 options
.level0_stop_writes_trigger
= 1 << 10;
1035 options
.level0_slowdown_writes_trigger
= 1 << 10;
1036 options
.min_write_buffer_number_to_merge
= 1;
1037 options
.max_write_buffer_size_to_maintain
=
1038 static_cast<int64_t>(options
.write_buffer_size
);
1039 options
.max_write_buffer_number
= 2;
1040 options
.write_buffer_size
= 120 * 1024;
1041 auto flush_listener
= std::make_shared
<FlushCounterListener
>();
1042 flush_listener
->expected_flush_reason
= FlushReason::kWriteBufferFull
;
1043 options
.listeners
.push_back(flush_listener
);
1044 CreateAndReopenWithCF({"pikachu"}, options
);
1045 std::vector
<port::Thread
> threads
;
1047 std::atomic
<int> thread_num(0);
1048 // each column family will have 5 thread, each thread generating 2 memtables.
1049 // each column family should end up with 10 table files
1050 std::function
<void()> fill_memtable_func
= [&]() {
1051 int a
= thread_num
.fetch_add(1);
1054 // this should fill up 2 memtables
1055 for (int k
= 0; k
< 5000; ++k
) {
1056 ASSERT_OK(db_
->Put(wo
, handles_
[a
& 1], rnd
.RandomString(13), ""));
1060 for (int i
= 0; i
< 10; ++i
) {
1061 threads
.emplace_back(fill_memtable_func
);
1064 for (auto& t
: threads
) {
1068 auto default_tables
= GetNumberOfSstFilesForColumnFamily(db_
, "default");
1069 auto pikachu_tables
= GetNumberOfSstFilesForColumnFamily(db_
, "pikachu");
1070 ASSERT_LE(default_tables
, static_cast<uint64_t>(10));
1071 ASSERT_GT(default_tables
, static_cast<uint64_t>(0));
1072 ASSERT_LE(pikachu_tables
, static_cast<uint64_t>(10));
1073 ASSERT_GT(pikachu_tables
, static_cast<uint64_t>(0));
1075 #endif // ROCKSDB_LITE
1078 class KeepFilter
: public CompactionFilter
{
1080 bool Filter(int /*level*/, const Slice
& /*key*/, const Slice
& /*value*/,
1081 std::string
* /*new_value*/,
1082 bool* /*value_changed*/) const override
{
1086 const char* Name() const override
{ return "KeepFilter"; }
1089 class KeepFilterFactory
: public CompactionFilterFactory
{
1091 explicit KeepFilterFactory(bool check_context
= false)
1092 : check_context_(check_context
) {}
1094 std::unique_ptr
<CompactionFilter
> CreateCompactionFilter(
1095 const CompactionFilter::Context
& context
) override
{
1096 if (check_context_
) {
1097 EXPECT_EQ(expect_full_compaction_
.load(), context
.is_full_compaction
);
1098 EXPECT_EQ(expect_manual_compaction_
.load(), context
.is_manual_compaction
);
1100 return std::unique_ptr
<CompactionFilter
>(new KeepFilter());
1103 const char* Name() const override
{ return "KeepFilterFactory"; }
1104 bool check_context_
;
1105 std::atomic_bool expect_full_compaction_
;
1106 std::atomic_bool expect_manual_compaction_
;
1109 class DelayFilter
: public CompactionFilter
{
1111 explicit DelayFilter(DBTestBase
* d
) : db_test(d
) {}
1112 bool Filter(int /*level*/, const Slice
& /*key*/, const Slice
& /*value*/,
1113 std::string
* /*new_value*/,
1114 bool* /*value_changed*/) const override
{
1115 db_test
->env_
->MockSleepForMicroseconds(1000);
1119 const char* Name() const override
{ return "DelayFilter"; }
1122 DBTestBase
* db_test
;
1125 class DelayFilterFactory
: public CompactionFilterFactory
{
1127 explicit DelayFilterFactory(DBTestBase
* d
) : db_test(d
) {}
1128 std::unique_ptr
<CompactionFilter
> CreateCompactionFilter(
1129 const CompactionFilter::Context
& /*context*/) override
{
1130 return std::unique_ptr
<CompactionFilter
>(new DelayFilter(db_test
));
1133 const char* Name() const override
{ return "DelayFilterFactory"; }
1136 DBTestBase
* db_test
;
1138 } // anonymous namespace
1140 #ifndef ROCKSDB_LITE
1142 static std::string
CompressibleString(Random
* rnd
, int len
) {
1144 test::CompressibleString(rnd
, 0.8, len
, &r
);
1147 #endif // ROCKSDB_LITE
1149 TEST_F(DBTest
, FailMoreDbPaths
) {
1150 Options options
= CurrentOptions();
1151 options
.db_paths
.emplace_back(dbname_
, 10000000);
1152 options
.db_paths
.emplace_back(dbname_
+ "_2", 1000000);
1153 options
.db_paths
.emplace_back(dbname_
+ "_3", 1000000);
1154 options
.db_paths
.emplace_back(dbname_
+ "_4", 1000000);
1155 options
.db_paths
.emplace_back(dbname_
+ "_5", 1000000);
1156 ASSERT_TRUE(TryReopen(options
).IsNotSupported());
1159 void CheckColumnFamilyMeta(
1160 const ColumnFamilyMetaData
& cf_meta
, const std::string
& cf_name
,
1161 const std::vector
<std::vector
<FileMetaData
>>& files_by_level
,
1162 uint64_t start_time
, uint64_t end_time
) {
1163 ASSERT_EQ(cf_meta
.name
, cf_name
);
1164 ASSERT_EQ(cf_meta
.levels
.size(), files_by_level
.size());
1166 uint64_t cf_size
= 0;
1167 size_t file_count
= 0;
1169 for (size_t i
= 0; i
< cf_meta
.levels
.size(); ++i
) {
1170 const auto& level_meta_from_cf
= cf_meta
.levels
[i
];
1171 const auto& level_meta_from_files
= files_by_level
[i
];
1173 ASSERT_EQ(level_meta_from_cf
.level
, i
);
1174 ASSERT_EQ(level_meta_from_cf
.files
.size(), level_meta_from_files
.size());
1176 file_count
+= level_meta_from_cf
.files
.size();
1178 uint64_t level_size
= 0;
1179 for (size_t j
= 0; j
< level_meta_from_cf
.files
.size(); ++j
) {
1180 const auto& file_meta_from_cf
= level_meta_from_cf
.files
[j
];
1181 const auto& file_meta_from_files
= level_meta_from_files
[j
];
1183 level_size
+= file_meta_from_cf
.size
;
1185 ASSERT_EQ(file_meta_from_cf
.file_number
,
1186 file_meta_from_files
.fd
.GetNumber());
1187 ASSERT_EQ(file_meta_from_cf
.file_number
,
1188 TableFileNameToNumber(file_meta_from_cf
.name
));
1189 ASSERT_EQ(file_meta_from_cf
.size
, file_meta_from_files
.fd
.file_size
);
1190 ASSERT_EQ(file_meta_from_cf
.smallest_seqno
,
1191 file_meta_from_files
.fd
.smallest_seqno
);
1192 ASSERT_EQ(file_meta_from_cf
.largest_seqno
,
1193 file_meta_from_files
.fd
.largest_seqno
);
1194 ASSERT_EQ(file_meta_from_cf
.smallestkey
,
1195 file_meta_from_files
.smallest
.user_key().ToString());
1196 ASSERT_EQ(file_meta_from_cf
.largestkey
,
1197 file_meta_from_files
.largest
.user_key().ToString());
1198 ASSERT_EQ(file_meta_from_cf
.oldest_blob_file_number
,
1199 file_meta_from_files
.oldest_blob_file_number
);
1200 ASSERT_EQ(file_meta_from_cf
.oldest_ancester_time
,
1201 file_meta_from_files
.oldest_ancester_time
);
1202 ASSERT_EQ(file_meta_from_cf
.file_creation_time
,
1203 file_meta_from_files
.file_creation_time
);
1204 ASSERT_GE(file_meta_from_cf
.file_creation_time
, start_time
);
1205 ASSERT_LE(file_meta_from_cf
.file_creation_time
, end_time
);
1206 ASSERT_GE(file_meta_from_cf
.oldest_ancester_time
, start_time
);
1207 ASSERT_LE(file_meta_from_cf
.oldest_ancester_time
, end_time
);
1208 // More from FileStorageInfo
1209 ASSERT_EQ(file_meta_from_cf
.file_type
, kTableFile
);
1210 ASSERT_EQ(file_meta_from_cf
.name
,
1211 "/" + file_meta_from_cf
.relative_filename
);
1212 ASSERT_EQ(file_meta_from_cf
.directory
, file_meta_from_cf
.db_path
);
1215 ASSERT_EQ(level_meta_from_cf
.size
, level_size
);
1216 cf_size
+= level_size
;
1219 ASSERT_EQ(cf_meta
.file_count
, file_count
);
1220 ASSERT_EQ(cf_meta
.size
, cf_size
);
1223 void CheckLiveFilesMeta(
1224 const std::vector
<LiveFileMetaData
>& live_file_meta
,
1225 const std::vector
<std::vector
<FileMetaData
>>& files_by_level
) {
1226 size_t total_file_count
= 0;
1227 for (const auto& f
: files_by_level
) {
1228 total_file_count
+= f
.size();
1231 ASSERT_EQ(live_file_meta
.size(), total_file_count
);
1236 for (const auto& meta
: live_file_meta
) {
1237 if (level
!= meta
.level
) {
1242 ASSERT_LT(i
, files_by_level
[level
].size());
1244 const auto& expected_meta
= files_by_level
[level
][i
];
1246 ASSERT_EQ(meta
.column_family_name
, kDefaultColumnFamilyName
);
1247 ASSERT_EQ(meta
.file_number
, expected_meta
.fd
.GetNumber());
1248 ASSERT_EQ(meta
.file_number
, TableFileNameToNumber(meta
.name
));
1249 ASSERT_EQ(meta
.size
, expected_meta
.fd
.file_size
);
1250 ASSERT_EQ(meta
.smallest_seqno
, expected_meta
.fd
.smallest_seqno
);
1251 ASSERT_EQ(meta
.largest_seqno
, expected_meta
.fd
.largest_seqno
);
1252 ASSERT_EQ(meta
.smallestkey
, expected_meta
.smallest
.user_key().ToString());
1253 ASSERT_EQ(meta
.largestkey
, expected_meta
.largest
.user_key().ToString());
1254 ASSERT_EQ(meta
.oldest_blob_file_number
,
1255 expected_meta
.oldest_blob_file_number
);
1257 // More from FileStorageInfo
1258 ASSERT_EQ(meta
.file_type
, kTableFile
);
1259 ASSERT_EQ(meta
.name
, "/" + meta
.relative_filename
);
1260 ASSERT_EQ(meta
.directory
, meta
.db_path
);
1266 #ifndef ROCKSDB_LITE
1267 void AddBlobFile(const ColumnFamilyHandle
* cfh
, uint64_t blob_file_number
,
1268 uint64_t total_blob_count
, uint64_t total_blob_bytes
,
1269 const std::string
& checksum_method
,
1270 const std::string
& checksum_value
,
1271 uint64_t garbage_blob_count
= 0,
1272 uint64_t garbage_blob_bytes
= 0) {
1273 ColumnFamilyData
* cfd
=
1274 (static_cast<const ColumnFamilyHandleImpl
*>(cfh
))->cfd();
1277 Version
* const version
= cfd
->current();
1280 VersionStorageInfo
* const storage_info
= version
->storage_info();
1281 assert(storage_info
);
1283 // Add a live blob file.
1285 auto shared_meta
= SharedBlobFileMetaData::Create(
1286 blob_file_number
, total_blob_count
, total_blob_bytes
, checksum_method
,
1289 auto meta
= BlobFileMetaData::Create(std::move(shared_meta
),
1290 BlobFileMetaData::LinkedSsts(),
1291 garbage_blob_count
, garbage_blob_bytes
);
1293 storage_info
->AddBlobFile(std::move(meta
));
1296 static void CheckBlobMetaData(
1297 const BlobMetaData
& bmd
, uint64_t blob_file_number
,
1298 uint64_t total_blob_count
, uint64_t total_blob_bytes
,
1299 const std::string
& checksum_method
, const std::string
& checksum_value
,
1300 uint64_t garbage_blob_count
= 0, uint64_t garbage_blob_bytes
= 0) {
1301 ASSERT_EQ(bmd
.blob_file_number
, blob_file_number
);
1302 ASSERT_EQ(bmd
.blob_file_name
, BlobFileName("", blob_file_number
));
1303 ASSERT_EQ(bmd
.blob_file_size
,
1304 total_blob_bytes
+ BlobLogHeader::kSize
+ BlobLogFooter::kSize
);
1306 ASSERT_EQ(bmd
.total_blob_count
, total_blob_count
);
1307 ASSERT_EQ(bmd
.total_blob_bytes
, total_blob_bytes
);
1308 ASSERT_EQ(bmd
.garbage_blob_count
, garbage_blob_count
);
1309 ASSERT_EQ(bmd
.garbage_blob_bytes
, garbage_blob_bytes
);
1310 ASSERT_EQ(bmd
.checksum_method
, checksum_method
);
1311 ASSERT_EQ(bmd
.checksum_value
, checksum_value
);
1314 TEST_F(DBTest
, MetaDataTest
) {
1315 Options options
= CurrentOptions();
1316 options
.create_if_missing
= true;
1317 options
.disable_auto_compactions
= true;
1319 int64_t temp_time
= 0;
1320 options
.env
->GetCurrentTime(&temp_time
);
1321 uint64_t start_time
= static_cast<uint64_t>(temp_time
);
1323 DestroyAndReopen(options
);
1327 for (int i
= 0; i
< 100; ++i
) {
1328 // Add a single blob reference to each file
1329 std::string blob_index
;
1330 BlobIndex::EncodeBlob(&blob_index
, /* blob_file_number */ i
+ 1000,
1331 /* offset */ 1234, /* size */ 5678, kNoCompression
);
1334 ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch
, 0, Key(key_index
),
1336 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch
));
1340 // Fill up the rest of the file with random values.
1341 GenerateNewFile(&rnd
, &key_index
, /* nowait */ true);
1346 std::vector
<std::vector
<FileMetaData
>> files_by_level
;
1347 dbfull()->TEST_GetFilesMetaData(db_
->DefaultColumnFamily(), &files_by_level
);
1349 options
.env
->GetCurrentTime(&temp_time
);
1350 uint64_t end_time
= static_cast<uint64_t>(temp_time
);
1352 ColumnFamilyMetaData cf_meta
;
1353 db_
->GetColumnFamilyMetaData(&cf_meta
);
1354 CheckColumnFamilyMeta(cf_meta
, kDefaultColumnFamilyName
, files_by_level
,
1355 start_time
, end_time
);
1356 std::vector
<LiveFileMetaData
> live_file_meta
;
1357 db_
->GetLiveFilesMetaData(&live_file_meta
);
1358 CheckLiveFilesMeta(live_file_meta
, files_by_level
);
1361 TEST_F(DBTest
, AllMetaDataTest
) {
1362 Options options
= CurrentOptions();
1363 options
.create_if_missing
= true;
1364 options
.disable_auto_compactions
= true;
1365 DestroyAndReopen(options
);
1366 CreateAndReopenWithCF({"pikachu"}, options
);
1368 constexpr uint64_t blob_file_number
= 234;
1369 constexpr uint64_t total_blob_count
= 555;
1370 constexpr uint64_t total_blob_bytes
= 66666;
1371 constexpr char checksum_method
[] = "CRC32";
1372 constexpr char checksum_value
[] = "\x3d\x87\xff\x57";
1374 int64_t temp_time
= 0;
1375 options
.env
->GetCurrentTime(&temp_time
).PermitUncheckedError();
1376 uint64_t start_time
= static_cast<uint64_t>(temp_time
);
1379 dbfull()->TEST_LockMutex();
1380 for (int cf
= 0; cf
< 2; cf
++) {
1381 AddBlobFile(handles_
[cf
], blob_file_number
* (cf
+ 1),
1382 total_blob_count
* (cf
+ 1), total_blob_bytes
* (cf
+ 1),
1383 checksum_method
, checksum_value
);
1385 dbfull()->TEST_UnlockMutex();
1387 std::vector
<ColumnFamilyMetaData
> all_meta
;
1388 db_
->GetAllColumnFamilyMetaData(&all_meta
);
1390 std::vector
<std::vector
<FileMetaData
>> default_files_by_level
;
1391 std::vector
<std::vector
<FileMetaData
>> pikachu_files_by_level
;
1392 dbfull()->TEST_GetFilesMetaData(handles_
[0], &default_files_by_level
);
1393 dbfull()->TEST_GetFilesMetaData(handles_
[1], &pikachu_files_by_level
);
1395 options
.env
->GetCurrentTime(&temp_time
).PermitUncheckedError();
1396 uint64_t end_time
= static_cast<uint64_t>(temp_time
);
1398 ASSERT_EQ(all_meta
.size(), 2);
1399 for (int cf
= 0; cf
< 2; cf
++) {
1400 const auto& cfmd
= all_meta
[cf
];
1402 CheckColumnFamilyMeta(cfmd
, "default", default_files_by_level
, start_time
,
1405 CheckColumnFamilyMeta(cfmd
, "pikachu", pikachu_files_by_level
, start_time
,
1408 ASSERT_EQ(cfmd
.blob_files
.size(), 1U);
1409 const auto& bmd
= cfmd
.blob_files
[0];
1410 ASSERT_EQ(cfmd
.blob_file_count
, 1U);
1411 ASSERT_EQ(cfmd
.blob_file_size
, bmd
.blob_file_size
);
1412 ASSERT_EQ(NormalizePath(bmd
.blob_file_path
), NormalizePath(dbname_
));
1413 CheckBlobMetaData(bmd
, blob_file_number
* (cf
+ 1),
1414 total_blob_count
* (cf
+ 1), total_blob_bytes
* (cf
+ 1),
1415 checksum_method
, checksum_value
);
1420 void MinLevelHelper(DBTest
* self
, Options
& options
) {
1423 for (int num
= 0; num
< options
.level0_file_num_compaction_trigger
- 1;
1425 std::vector
<std::string
> values
;
1426 // Write 120KB (12 values, each 10K)
1427 for (int i
= 0; i
< 12; i
++) {
1428 values
.push_back(rnd
.RandomString(10000));
1429 ASSERT_OK(self
->Put(DBTestBase::Key(i
), values
[i
]));
1431 ASSERT_OK(self
->dbfull()->TEST_WaitForFlushMemTable());
1432 ASSERT_EQ(self
->NumTableFilesAtLevel(0), num
+ 1);
1435 // generate one more file in level-0, and should trigger level-0 compaction
1436 std::vector
<std::string
> values
;
1437 for (int i
= 0; i
< 12; i
++) {
1438 values
.push_back(rnd
.RandomString(10000));
1439 ASSERT_OK(self
->Put(DBTestBase::Key(i
), values
[i
]));
1441 ASSERT_OK(self
->dbfull()->TEST_WaitForCompact());
1443 ASSERT_EQ(self
->NumTableFilesAtLevel(0), 0);
1444 ASSERT_EQ(self
->NumTableFilesAtLevel(1), 1);
1447 // returns false if the calling-Test should be skipped
1448 bool MinLevelToCompress(CompressionType
& type
, Options
& options
, int wbits
,
1449 int lev
, int strategy
) {
1451 "Test with compression options : window_bits = %d, level = %d, "
1453 wbits
, lev
, strategy
);
1454 options
.write_buffer_size
= 100 << 10; // 100KB
1455 options
.arena_block_size
= 4096;
1456 options
.num_levels
= 3;
1457 options
.level0_file_num_compaction_trigger
= 3;
1458 options
.create_if_missing
= true;
1460 if (Snappy_Supported()) {
1461 type
= kSnappyCompression
;
1462 fprintf(stderr
, "using snappy\n");
1463 } else if (Zlib_Supported()) {
1464 type
= kZlibCompression
;
1465 fprintf(stderr
, "using zlib\n");
1466 } else if (BZip2_Supported()) {
1467 type
= kBZip2Compression
;
1468 fprintf(stderr
, "using bzip2\n");
1469 } else if (LZ4_Supported()) {
1470 type
= kLZ4Compression
;
1471 fprintf(stderr
, "using lz4\n");
1472 } else if (XPRESS_Supported()) {
1473 type
= kXpressCompression
;
1474 fprintf(stderr
, "using xpress\n");
1475 } else if (ZSTD_Supported()) {
1477 fprintf(stderr
, "using ZSTD\n");
1479 fprintf(stderr
, "skipping test, compression disabled\n");
1482 options
.compression_per_level
.resize(options
.num_levels
);
1484 // do not compress L0
1485 for (int i
= 0; i
< 1; i
++) {
1486 options
.compression_per_level
[i
] = kNoCompression
;
1488 for (int i
= 1; i
< options
.num_levels
; i
++) {
1489 options
.compression_per_level
[i
] = type
;
1493 } // anonymous namespace
1495 TEST_F(DBTest
, MinLevelToCompress1
) {
1496 Options options
= CurrentOptions();
1497 CompressionType type
= kSnappyCompression
;
1498 if (!MinLevelToCompress(type
, options
, -14, -1, 0)) {
1502 MinLevelHelper(this, options
);
1504 // do not compress L0 and L1
1505 for (int i
= 0; i
< 2; i
++) {
1506 options
.compression_per_level
[i
] = kNoCompression
;
1508 for (int i
= 2; i
< options
.num_levels
; i
++) {
1509 options
.compression_per_level
[i
] = type
;
1511 DestroyAndReopen(options
);
1512 MinLevelHelper(this, options
);
1515 TEST_F(DBTest
, MinLevelToCompress2
) {
1516 Options options
= CurrentOptions();
1517 CompressionType type
= kSnappyCompression
;
1518 if (!MinLevelToCompress(type
, options
, 15, -1, 0)) {
1522 MinLevelHelper(this, options
);
1524 // do not compress L0 and L1
1525 for (int i
= 0; i
< 2; i
++) {
1526 options
.compression_per_level
[i
] = kNoCompression
;
1528 for (int i
= 2; i
< options
.num_levels
; i
++) {
1529 options
.compression_per_level
[i
] = type
;
1531 DestroyAndReopen(options
);
1532 MinLevelHelper(this, options
);
1535 // This test may fail because of a legit case that multiple L0 files
1536 // are trivial moved to L1.
1537 TEST_F(DBTest
, DISABLED_RepeatedWritesToSameKey
) {
1539 Options options
= CurrentOptions();
1541 options
.write_buffer_size
= 100000; // Small write buffer
1542 CreateAndReopenWithCF({"pikachu"}, options
);
1544 // We must have at most one file per level except for level-0,
1545 // which may have up to kL0_StopWritesTrigger files.
1546 const int kMaxFiles
=
1547 options
.num_levels
+ options
.level0_stop_writes_trigger
;
1551 rnd
.RandomString(static_cast<int>(2 * options
.write_buffer_size
));
1552 for (int i
= 0; i
< 5 * kMaxFiles
; i
++) {
1553 ASSERT_OK(Put(1, "key", value
));
1554 ASSERT_LE(TotalTableFiles(1), kMaxFiles
);
1556 } while (ChangeCompactOptions());
1558 #endif // ROCKSDB_LITE
1560 #ifndef ROCKSDB_LITE
1561 static bool Between(uint64_t val
, uint64_t low
, uint64_t high
) {
1562 bool result
= (val
>= low
) && (val
<= high
);
1564 fprintf(stderr
, "Value %llu is not in range [%llu, %llu]\n",
1565 (unsigned long long)(val
), (unsigned long long)(low
),
1566 (unsigned long long)(high
));
1571 TEST_F(DBTest
, ApproximateSizesMemTable
) {
1572 Options options
= CurrentOptions();
1573 options
.write_buffer_size
= 100000000; // Large write buffer
1574 options
.compression
= kNoCompression
;
1575 options
.create_if_missing
= true;
1576 DestroyAndReopen(options
);
1577 auto default_cf
= db_
->DefaultColumnFamily();
1581 for (int i
= 0; i
< N
; i
++) {
1582 ASSERT_OK(Put(Key(i
), rnd
.RandomString(1024)));
1586 std::string start
= Key(50);
1587 std::string end
= Key(60);
1588 Range
r(start
, end
);
1589 SizeApproximationOptions size_approx_options
;
1590 size_approx_options
.include_memtables
= true;
1591 size_approx_options
.include_files
= true;
1593 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1594 ASSERT_GT(size
, 6000);
1595 ASSERT_LT(size
, 204800);
1596 // Zero if not including mem table
1597 ASSERT_OK(db_
->GetApproximateSizes(&r
, 1, &size
));
1602 r
= Range(start
, end
);
1604 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1607 for (int i
= 0; i
< N
; i
++) {
1608 ASSERT_OK(Put(Key(1000 + i
), rnd
.RandomString(1024)));
1613 r
= Range(start
, end
);
1615 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1620 r
= Range(start
, end
);
1622 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1623 ASSERT_GT(size
, 6000);
1625 options
.max_write_buffer_number
= 8;
1626 options
.min_write_buffer_number_to_merge
= 5;
1627 options
.write_buffer_size
= 1024 * N
; // Not very large
1628 DestroyAndReopen(options
);
1629 default_cf
= db_
->DefaultColumnFamily();
1632 for (int i
= 0; i
< N
; i
++) {
1633 keys
[i
* 3] = i
* 5;
1634 keys
[i
* 3 + 1] = i
* 5 + 1;
1635 keys
[i
* 3 + 2] = i
* 5 + 2;
1637 // MemTable entry counting is estimated and can vary greatly depending on
1638 // layout. Thus, using deterministic seed for test stability.
1639 RandomShuffle(std::begin(keys
), std::end(keys
), rnd
.Next());
1641 for (int i
= 0; i
< N
* 3; i
++) {
1642 ASSERT_OK(Put(Key(keys
[i
] + 1000), rnd
.RandomString(1024)));
1647 r
= Range(start
, end
);
1649 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1654 r
= Range(start
, end
);
1656 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1657 ASSERT_GT(size
, 6000);
1661 r
= Range(start
, end
);
1663 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1668 r
= Range(start
, end
);
1669 uint64_t size_with_mt
, size_without_mt
;
1670 ASSERT_OK(db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1,
1672 ASSERT_GT(size_with_mt
, 6000);
1673 ASSERT_OK(db_
->GetApproximateSizes(&r
, 1, &size_without_mt
));
1674 ASSERT_EQ(size_without_mt
, 0);
1678 for (int i
= 0; i
< N
; i
++) {
1679 ASSERT_OK(Put(Key(i
+ 1000), rnd
.RandomString(1024)));
1684 r
= Range(start
, end
);
1685 ASSERT_OK(db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1,
1687 ASSERT_OK(db_
->GetApproximateSizes(&r
, 1, &size_without_mt
));
1688 ASSERT_GT(size_with_mt
, size_without_mt
);
1689 ASSERT_GT(size_without_mt
, 6000);
1691 // Check that include_memtables flag works as expected
1692 size_approx_options
.include_memtables
= false;
1694 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1695 ASSERT_EQ(size
, size_without_mt
);
1697 // Check that files_size_error_margin works as expected, when the heuristic
1698 // conditions are not met
1700 end
= Key(1000 + N
- 2);
1701 r
= Range(start
, end
);
1702 size_approx_options
.files_size_error_margin
= -1.0; // disabled
1704 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size
));
1706 size_approx_options
.files_size_error_margin
= 0.5; // enabled, but not used
1708 db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1, &size2
));
1709 ASSERT_EQ(size
, size2
);
1712 TEST_F(DBTest
, ApproximateSizesFilesWithErrorMargin
) {
1713 // Roughly 4 keys per data block, 1000 keys per file,
1714 // with filter substantially larger than a data block
1715 BlockBasedTableOptions table_options
;
1716 table_options
.filter_policy
.reset(NewBloomFilterPolicy(16));
1717 table_options
.block_size
= 100;
1718 Options options
= CurrentOptions();
1719 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1720 options
.write_buffer_size
= 24 * 1024;
1721 options
.compression
= kNoCompression
;
1722 options
.create_if_missing
= true;
1723 options
.target_file_size_base
= 24 * 1024;
1724 DestroyAndReopen(options
);
1725 const auto default_cf
= db_
->DefaultColumnFamily();
1727 const int N
= 64000;
1729 for (int i
= 0; i
< N
; i
++) {
1730 ASSERT_OK(Put(Key(i
), rnd
.RandomString(24)));
1732 // Flush everything to files
1734 // Compact the entire key space into the next level
1736 db_
->CompactRange(CompactRangeOptions(), default_cf
, nullptr, nullptr));
1739 for (int i
= N
; i
< (N
+ N
/ 4); i
++) {
1740 ASSERT_OK(Put(Key(i
), rnd
.RandomString(24)));
1742 // Flush everything to files again
1745 // Wait for compaction to finish
1746 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1749 const std::string start
= Key(0);
1750 const std::string end
= Key(2 * N
);
1751 const Range
r(start
, end
);
1753 SizeApproximationOptions size_approx_options
;
1754 size_approx_options
.include_memtables
= false;
1755 size_approx_options
.include_files
= true;
1756 size_approx_options
.files_size_error_margin
= -1.0; // disabled
1758 // Get the precise size without any approximation heuristic
1760 ASSERT_OK(db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1,
1764 // Get the size with an approximation heuristic
1766 const double error_margin
= 0.2;
1767 size_approx_options
.files_size_error_margin
= error_margin
;
1768 ASSERT_OK(db_
->GetApproximateSizes(size_approx_options
, default_cf
, &r
, 1,
1770 ASSERT_LT(size2
, size
* (1 + error_margin
));
1771 ASSERT_GT(size2
, size
* (1 - error_margin
));
1775 // Ensure that metadata is not falsely attributed only to the last data in
1776 // the file. (In some applications, filters can be large portion of data
1778 // Perform many queries over small range, enough to ensure crossing file
1779 // boundary, and make sure we never see a spike for large filter.
1780 for (int i
= 0; i
< 3000; i
+= 10) {
1781 const std::string start
= Key(i
);
1782 const std::string end
= Key(i
+ 11); // overlap by 1 key
1783 const Range
r(start
, end
);
1785 ASSERT_OK(db_
->GetApproximateSizes(&r
, 1, &size
));
1786 ASSERT_LE(size
, 11 * 100);
1791 TEST_F(DBTest
, GetApproximateMemTableStats
) {
1792 Options options
= CurrentOptions();
1793 options
.write_buffer_size
= 100000000;
1794 options
.compression
= kNoCompression
;
1795 options
.create_if_missing
= true;
1796 DestroyAndReopen(options
);
1800 for (int i
= 0; i
< N
; i
++) {
1801 ASSERT_OK(Put(Key(i
), rnd
.RandomString(1024)));
1807 std::string start
= Key(50);
1808 std::string end
= Key(60);
1809 Range
r(start
, end
);
1810 db_
->GetApproximateMemTableStats(r
, &count
, &size
);
1811 ASSERT_GT(count
, 0);
1812 ASSERT_LE(count
, N
);
1813 ASSERT_GT(size
, 6000);
1814 ASSERT_LT(size
, 204800);
1818 r
= Range(start
, end
);
1819 db_
->GetApproximateMemTableStats(r
, &count
, &size
);
1820 ASSERT_EQ(count
, 0);
1827 r
= Range(start
, end
);
1828 db_
->GetApproximateMemTableStats(r
, &count
, &size
);
1829 ASSERT_EQ(count
, 0);
1832 for (int i
= 0; i
< N
; i
++) {
1833 ASSERT_OK(Put(Key(1000 + i
), rnd
.RandomString(1024)));
1838 r
= Range(start
, end
);
1839 db_
->GetApproximateMemTableStats(r
, &count
, &size
);
1840 ASSERT_GT(count
, 20);
1841 ASSERT_GT(size
, 6000);
1844 TEST_F(DBTest
, ApproximateSizes
) {
1846 Options options
= CurrentOptions();
1847 options
.write_buffer_size
= 100000000; // Large write buffer
1848 options
.compression
= kNoCompression
;
1849 options
.create_if_missing
= true;
1850 DestroyAndReopen(options
);
1851 CreateAndReopenWithCF({"pikachu"}, options
);
1854 ASSERT_OK(Size("", "xyz", 1, &size
));
1855 ASSERT_TRUE(Between(size
, 0, 0));
1856 ReopenWithColumnFamilies({"default", "pikachu"}, options
);
1857 ASSERT_OK(Size("", "xyz", 1, &size
));
1858 ASSERT_TRUE(Between(size
, 0, 0));
1860 // Write 8MB (80 values, each 100K)
1861 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1863 static const int S1
= 100000;
1864 static const int S2
= 105000; // Allow some expansion from metadata
1866 for (int i
= 0; i
< N
; i
++) {
1867 ASSERT_OK(Put(1, Key(i
), rnd
.RandomString(S1
)));
1870 // 0 because GetApproximateSizes() does not account for memtable space
1871 ASSERT_OK(Size("", Key(50), 1, &size
));
1872 ASSERT_TRUE(Between(size
, 0, 0));
1874 // Check sizes across recovery by reopening a few times
1875 for (int run
= 0; run
< 3; run
++) {
1876 ReopenWithColumnFamilies({"default", "pikachu"}, options
);
1878 for (int compact_start
= 0; compact_start
< N
; compact_start
+= 10) {
1879 for (int i
= 0; i
< N
; i
+= 10) {
1880 ASSERT_OK(Size("", Key(i
), 1, &size
));
1881 ASSERT_TRUE(Between(size
, S1
* i
, S2
* i
));
1882 ASSERT_OK(Size("", Key(i
) + ".suffix", 1, &size
));
1883 ASSERT_TRUE(Between(size
, S1
* (i
+ 1), S2
* (i
+ 1)));
1884 ASSERT_OK(Size(Key(i
), Key(i
+ 10), 1, &size
));
1885 ASSERT_TRUE(Between(size
, S1
* 10, S2
* 10));
1887 ASSERT_OK(Size("", Key(50), 1, &size
));
1888 ASSERT_TRUE(Between(size
, S1
* 50, S2
* 50));
1889 ASSERT_OK(Size("", Key(50) + ".suffix", 1, &size
));
1890 ASSERT_TRUE(Between(size
, S1
* 50, S2
* 50));
1892 std::string cstart_str
= Key(compact_start
);
1893 std::string cend_str
= Key(compact_start
+ 9);
1894 Slice cstart
= cstart_str
;
1895 Slice cend
= cend_str
;
1896 ASSERT_OK(dbfull()->TEST_CompactRange(0, &cstart
, &cend
, handles_
[1]));
1899 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1900 ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
1902 // ApproximateOffsetOf() is not yet implemented in plain table format.
1903 } while (ChangeOptions(kSkipUniversalCompaction
| kSkipFIFOCompaction
|
1904 kSkipPlainTable
| kSkipHashIndex
));
1907 TEST_F(DBTest
, ApproximateSizes_MixOfSmallAndLarge
) {
1909 Options options
= CurrentOptions();
1910 options
.compression
= kNoCompression
;
1911 CreateAndReopenWithCF({"pikachu"}, options
);
1914 std::string big1
= rnd
.RandomString(100000);
1915 ASSERT_OK(Put(1, Key(0), rnd
.RandomString(10000)));
1916 ASSERT_OK(Put(1, Key(1), rnd
.RandomString(10000)));
1917 ASSERT_OK(Put(1, Key(2), big1
));
1918 ASSERT_OK(Put(1, Key(3), rnd
.RandomString(10000)));
1919 ASSERT_OK(Put(1, Key(4), big1
));
1920 ASSERT_OK(Put(1, Key(5), rnd
.RandomString(10000)));
1921 ASSERT_OK(Put(1, Key(6), rnd
.RandomString(300000)));
1922 ASSERT_OK(Put(1, Key(7), rnd
.RandomString(10000)));
1924 // Check sizes across recovery by reopening a few times
1926 for (int run
= 0; run
< 3; run
++) {
1927 ReopenWithColumnFamilies({"default", "pikachu"}, options
);
1929 ASSERT_OK(Size("", Key(0), 1, &size
));
1930 ASSERT_TRUE(Between(size
, 0, 0));
1931 ASSERT_OK(Size("", Key(1), 1, &size
));
1932 ASSERT_TRUE(Between(size
, 10000, 11000));
1933 ASSERT_OK(Size("", Key(2), 1, &size
));
1934 ASSERT_TRUE(Between(size
, 20000, 21000));
1935 ASSERT_OK(Size("", Key(3), 1, &size
));
1936 ASSERT_TRUE(Between(size
, 120000, 121000));
1937 ASSERT_OK(Size("", Key(4), 1, &size
));
1938 ASSERT_TRUE(Between(size
, 130000, 131000));
1939 ASSERT_OK(Size("", Key(5), 1, &size
));
1940 ASSERT_TRUE(Between(size
, 230000, 232000));
1941 ASSERT_OK(Size("", Key(6), 1, &size
));
1942 ASSERT_TRUE(Between(size
, 240000, 242000));
1943 // Ensure some overhead is accounted for, even without including all
1944 ASSERT_OK(Size("", Key(7), 1, &size
));
1945 ASSERT_TRUE(Between(size
, 540500, 545000));
1946 ASSERT_OK(Size("", Key(8), 1, &size
));
1947 ASSERT_TRUE(Between(size
, 550500, 555000));
1949 ASSERT_OK(Size(Key(3), Key(5), 1, &size
));
1950 ASSERT_TRUE(Between(size
, 110100, 111000));
1952 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_
[1]));
1954 // ApproximateOffsetOf() is not yet implemented in plain table format.
1955 } while (ChangeOptions(kSkipPlainTable
));
1957 #endif // ROCKSDB_LITE
1959 #ifndef ROCKSDB_LITE
1960 TEST_F(DBTest
, Snapshot
) {
1961 env_
->SetMockSleep();
1962 anon::OptionsOverride options_override
;
1963 options_override
.skip_policy
= kSkipNoSnapshot
;
1965 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override
));
1966 ASSERT_OK(Put(0, "foo", "0v1"));
1967 ASSERT_OK(Put(1, "foo", "1v1"));
1969 const Snapshot
* s1
= db_
->GetSnapshot();
1970 ASSERT_EQ(1U, GetNumSnapshots());
1971 uint64_t time_snap1
= GetTimeOldestSnapshots();
1972 ASSERT_GT(time_snap1
, 0U);
1973 ASSERT_EQ(GetSequenceOldestSnapshots(), s1
->GetSequenceNumber());
1974 ASSERT_EQ(GetTimeOldestSnapshots(),
1975 static_cast<uint64_t>(s1
->GetUnixTime()));
1976 ASSERT_OK(Put(0, "foo", "0v2"));
1977 ASSERT_OK(Put(1, "foo", "1v2"));
1979 env_
->MockSleepForSeconds(1);
1981 const Snapshot
* s2
= db_
->GetSnapshot();
1982 ASSERT_EQ(2U, GetNumSnapshots());
1983 ASSERT_EQ(time_snap1
, GetTimeOldestSnapshots());
1984 ASSERT_EQ(GetSequenceOldestSnapshots(), s1
->GetSequenceNumber());
1985 ASSERT_EQ(GetTimeOldestSnapshots(),
1986 static_cast<uint64_t>(s1
->GetUnixTime()));
1987 ASSERT_OK(Put(0, "foo", "0v3"));
1988 ASSERT_OK(Put(1, "foo", "1v3"));
1991 ManagedSnapshot
s3(db_
);
1992 ASSERT_EQ(3U, GetNumSnapshots());
1993 ASSERT_EQ(time_snap1
, GetTimeOldestSnapshots());
1994 ASSERT_EQ(GetSequenceOldestSnapshots(), s1
->GetSequenceNumber());
1995 ASSERT_EQ(GetTimeOldestSnapshots(),
1996 static_cast<uint64_t>(s1
->GetUnixTime()));
1998 ASSERT_OK(Put(0, "foo", "0v4"));
1999 ASSERT_OK(Put(1, "foo", "1v4"));
2000 ASSERT_EQ("0v1", Get(0, "foo", s1
));
2001 ASSERT_EQ("1v1", Get(1, "foo", s1
));
2002 ASSERT_EQ("0v2", Get(0, "foo", s2
));
2003 ASSERT_EQ("1v2", Get(1, "foo", s2
));
2004 ASSERT_EQ("0v3", Get(0, "foo", s3
.snapshot()));
2005 ASSERT_EQ("1v3", Get(1, "foo", s3
.snapshot()));
2006 ASSERT_EQ("0v4", Get(0, "foo"));
2007 ASSERT_EQ("1v4", Get(1, "foo"));
2010 ASSERT_EQ(2U, GetNumSnapshots());
2011 ASSERT_EQ(time_snap1
, GetTimeOldestSnapshots());
2012 ASSERT_EQ(GetSequenceOldestSnapshots(), s1
->GetSequenceNumber());
2013 ASSERT_EQ(GetTimeOldestSnapshots(),
2014 static_cast<uint64_t>(s1
->GetUnixTime()));
2015 ASSERT_EQ("0v1", Get(0, "foo", s1
));
2016 ASSERT_EQ("1v1", Get(1, "foo", s1
));
2017 ASSERT_EQ("0v2", Get(0, "foo", s2
));
2018 ASSERT_EQ("1v2", Get(1, "foo", s2
));
2019 ASSERT_EQ("0v4", Get(0, "foo"));
2020 ASSERT_EQ("1v4", Get(1, "foo"));
2022 db_
->ReleaseSnapshot(s1
);
2023 ASSERT_EQ("0v2", Get(0, "foo", s2
));
2024 ASSERT_EQ("1v2", Get(1, "foo", s2
));
2025 ASSERT_EQ("0v4", Get(0, "foo"));
2026 ASSERT_EQ("1v4", Get(1, "foo"));
2027 ASSERT_EQ(1U, GetNumSnapshots());
2028 ASSERT_LT(time_snap1
, GetTimeOldestSnapshots());
2029 ASSERT_EQ(GetSequenceOldestSnapshots(), s2
->GetSequenceNumber());
2030 ASSERT_EQ(GetTimeOldestSnapshots(),
2031 static_cast<uint64_t>(s2
->GetUnixTime()));
2033 db_
->ReleaseSnapshot(s2
);
2034 ASSERT_EQ(0U, GetNumSnapshots());
2035 ASSERT_EQ(GetSequenceOldestSnapshots(), 0);
2036 ASSERT_EQ("0v4", Get(0, "foo"));
2037 ASSERT_EQ("1v4", Get(1, "foo"));
2038 } while (ChangeOptions());
2041 TEST_F(DBTest
, HiddenValuesAreRemoved
) {
2042 anon::OptionsOverride options_override
;
2043 options_override
.skip_policy
= kSkipNoSnapshot
;
2046 Options options
= CurrentOptions(options_override
);
2047 CreateAndReopenWithCF({"pikachu"}, options
);
2049 FillLevels("a", "z", 1);
2051 std::string big
= rnd
.RandomString(50000);
2052 ASSERT_OK(Put(1, "foo", big
));
2053 ASSERT_OK(Put(1, "pastfoo", "v"));
2054 const Snapshot
* snapshot
= db_
->GetSnapshot();
2055 ASSERT_OK(Put(1, "foo", "tiny"));
2056 ASSERT_OK(Put(1, "pastfoo2", "v2")); // Advance sequence number one more
2058 ASSERT_OK(Flush(1));
2059 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
2061 ASSERT_EQ(big
, Get(1, "foo", snapshot
));
2062 ASSERT_OK(Size("", "pastfoo", 1, &size
));
2063 ASSERT_TRUE(Between(size
, 50000, 60000));
2064 db_
->ReleaseSnapshot(snapshot
);
2065 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big
+ " ]");
2067 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, &x
, handles_
[1]));
2068 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2069 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2070 ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
2071 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, &x
, handles_
[1]));
2072 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2074 ASSERT_OK(Size("", "pastfoo", 1, &size
));
2075 ASSERT_TRUE(Between(size
, 0, 1000));
2076 // ApproximateOffsetOf() is not yet implemented in plain table format,
2077 // which is used by Size().
2078 } while (ChangeOptions(kSkipUniversalCompaction
| kSkipFIFOCompaction
|
2081 #endif // ROCKSDB_LITE
2083 TEST_F(DBTest
, UnremovableSingleDelete
) {
2086 // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
2088 // We do not want to end up with:
2090 // Put(A, v1) Snapshot Put(A, v2)
2092 // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
2093 // but not Put(A, v1), so Get(A) would return v1.
2094 anon::OptionsOverride options_override
;
2095 options_override
.skip_policy
= kSkipNoSnapshot
;
2097 Options options
= CurrentOptions(options_override
);
2098 options
.disable_auto_compactions
= true;
2099 CreateAndReopenWithCF({"pikachu"}, options
);
2101 ASSERT_OK(Put(1, "foo", "first"));
2102 const Snapshot
* snapshot
= db_
->GetSnapshot();
2103 ASSERT_OK(SingleDelete(1, "foo"));
2104 ASSERT_OK(Put(1, "foo", "second"));
2105 ASSERT_OK(Flush(1));
2107 ASSERT_EQ("first", Get(1, "foo", snapshot
));
2108 ASSERT_EQ("second", Get(1, "foo"));
2110 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_
[1],
2112 ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
2114 ASSERT_OK(SingleDelete(1, "foo"));
2116 ASSERT_EQ("first", Get(1, "foo", snapshot
));
2117 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2119 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_
[1],
2122 ASSERT_EQ("first", Get(1, "foo", snapshot
));
2123 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2124 db_
->ReleaseSnapshot(snapshot
);
2125 // Skip FIFO and universal compaction because they do not apply to the test
2126 // case. Skip MergePut because single delete does not get removed when it
2127 // encounters a merge.
2128 } while (ChangeOptions(kSkipFIFOCompaction
| kSkipUniversalCompaction
|
2132 #ifndef ROCKSDB_LITE
2133 TEST_F(DBTest
, DeletionMarkers1
) {
2134 Options options
= CurrentOptions();
2135 CreateAndReopenWithCF({"pikachu"}, options
);
2136 ASSERT_OK(Put(1, "foo", "v1"));
2137 ASSERT_OK(Flush(1));
2139 MoveFilesToLevel(last
, 1);
2140 // foo => v1 is now in last level
2141 ASSERT_EQ(NumTableFilesAtLevel(last
, 1), 1);
2143 // Place a table at level last-1 to prevent merging with preceding mutation
2144 ASSERT_OK(Put(1, "a", "begin"));
2145 ASSERT_OK(Put(1, "z", "end"));
2146 ASSERT_OK(Flush(1));
2147 MoveFilesToLevel(last
- 1, 1);
2148 ASSERT_EQ(NumTableFilesAtLevel(last
, 1), 1);
2149 ASSERT_EQ(NumTableFilesAtLevel(last
- 1, 1), 1);
2151 ASSERT_OK(Delete(1, "foo"));
2152 ASSERT_OK(Put(1, "foo", "v2"));
2153 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
2154 ASSERT_OK(Flush(1)); // Moves to level last-2
2155 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2157 ASSERT_OK(dbfull()->TEST_CompactRange(last
- 2, nullptr, &z
, handles_
[1]));
2158 // DEL eliminated, but v1 remains because we aren't compacting that level
2159 // (DEL can be eliminated because v2 hides v1).
2160 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2162 dbfull()->TEST_CompactRange(last
- 1, nullptr, nullptr, handles_
[1]));
2163 // Merging last-1 w/ last, so we are the base level for "foo", so
2164 // DEL is removed. (as is v1).
2165 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
2168 TEST_F(DBTest
, DeletionMarkers2
) {
2169 Options options
= CurrentOptions();
2170 CreateAndReopenWithCF({"pikachu"}, options
);
2171 ASSERT_OK(Put(1, "foo", "v1"));
2172 ASSERT_OK(Flush(1));
2174 MoveFilesToLevel(last
, 1);
2175 // foo => v1 is now in last level
2176 ASSERT_EQ(NumTableFilesAtLevel(last
, 1), 1);
2178 // Place a table at level last-1 to prevent merging with preceding mutation
2179 ASSERT_OK(Put(1, "a", "begin"));
2180 ASSERT_OK(Put(1, "z", "end"));
2181 ASSERT_OK(Flush(1));
2182 MoveFilesToLevel(last
- 1, 1);
2183 ASSERT_EQ(NumTableFilesAtLevel(last
, 1), 1);
2184 ASSERT_EQ(NumTableFilesAtLevel(last
- 1, 1), 1);
2186 ASSERT_OK(Delete(1, "foo"));
2187 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2188 ASSERT_OK(Flush(1)); // Moves to level last-2
2189 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2191 dbfull()->TEST_CompactRange(last
- 2, nullptr, nullptr, handles_
[1]));
2192 // DEL kept: "last" file overlaps
2193 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2195 dbfull()->TEST_CompactRange(last
- 1, nullptr, nullptr, handles_
[1]));
2196 // Merging last-1 w/ last, so we are the base level for "foo", so
2197 // DEL is removed. (as is v1).
2198 ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
2201 TEST_F(DBTest
, OverlapInLevel0
) {
2203 Options options
= CurrentOptions();
2204 CreateAndReopenWithCF({"pikachu"}, options
);
2206 // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
2208 ASSERT_OK(Put(1, "100", "v100"));
2209 ASSERT_OK(Put(1, "999", "v999"));
2210 ASSERT_OK(Flush(1));
2211 MoveFilesToLevel(2, 1);
2212 ASSERT_OK(Delete(1, "100"));
2213 ASSERT_OK(Delete(1, "999"));
2214 ASSERT_OK(Flush(1));
2215 MoveFilesToLevel(1, 1);
2216 ASSERT_EQ("0,1,1", FilesPerLevel(1));
2218 // Make files spanning the following ranges in level-0:
2219 // files[0] 200 .. 900
2220 // files[1] 300 .. 500
2221 // Note that files are sorted by smallest key.
2222 ASSERT_OK(Put(1, "300", "v300"));
2223 ASSERT_OK(Put(1, "500", "v500"));
2224 ASSERT_OK(Flush(1));
2225 ASSERT_OK(Put(1, "200", "v200"));
2226 ASSERT_OK(Put(1, "600", "v600"));
2227 ASSERT_OK(Put(1, "900", "v900"));
2228 ASSERT_OK(Flush(1));
2229 ASSERT_EQ("2,1,1", FilesPerLevel(1));
2231 // BEGIN addition to existing test
2232 // Take this opportunity to verify SST unique ids (including Plain table)
2233 TablePropertiesCollection tbc
;
2234 ASSERT_OK(db_
->GetPropertiesOfAllTables(handles_
[1], &tbc
));
2235 VerifySstUniqueIds(tbc
);
2236 // END addition to existing test
2238 // Compact away the placeholder files we created initially
2239 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_
[1]));
2240 ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_
[1]));
2241 ASSERT_EQ("2", FilesPerLevel(1));
2243 // Do a memtable compaction. Before bug-fix, the compaction would
2244 // not detect the overlap with level-0 files and would incorrectly place
2245 // the deletion in a deeper level.
2246 ASSERT_OK(Delete(1, "600"));
2247 ASSERT_OK(Flush(1));
2248 ASSERT_EQ("3", FilesPerLevel(1));
2249 ASSERT_EQ("NOT_FOUND", Get(1, "600"));
2250 } while (ChangeOptions(kSkipUniversalCompaction
| kSkipFIFOCompaction
));
2252 #endif // ROCKSDB_LITE
2254 TEST_F(DBTest
, ComparatorCheck
) {
2255 class NewComparator
: public Comparator
{
2257 const char* Name() const override
{ return "rocksdb.NewComparator"; }
2258 int Compare(const Slice
& a
, const Slice
& b
) const override
{
2259 return BytewiseComparator()->Compare(a
, b
);
2261 void FindShortestSeparator(std::string
* s
, const Slice
& l
) const override
{
2262 BytewiseComparator()->FindShortestSeparator(s
, l
);
2264 void FindShortSuccessor(std::string
* key
) const override
{
2265 BytewiseComparator()->FindShortSuccessor(key
);
2268 Options new_options
, options
;
2271 options
= CurrentOptions();
2272 CreateAndReopenWithCF({"pikachu"}, options
);
2273 new_options
= CurrentOptions();
2274 new_options
.comparator
= &cmp
;
2275 // only the non-default column family has non-matching comparator
2276 Status s
= TryReopenWithColumnFamilies(
2277 {"default", "pikachu"}, std::vector
<Options
>({options
, new_options
}));
2278 ASSERT_TRUE(!s
.ok());
2279 ASSERT_TRUE(s
.ToString().find("comparator") != std::string::npos
)
2281 } while (ChangeCompactOptions());
2284 TEST_F(DBTest
, CustomComparator
) {
2285 class NumberComparator
: public Comparator
{
2287 const char* Name() const override
{ return "test.NumberComparator"; }
2288 int Compare(const Slice
& a
, const Slice
& b
) const override
{
2289 return ToNumber(a
) - ToNumber(b
);
2291 void FindShortestSeparator(std::string
* s
, const Slice
& l
) const override
{
2292 ToNumber(*s
); // Check format
2293 ToNumber(l
); // Check format
2295 void FindShortSuccessor(std::string
* key
) const override
{
2296 ToNumber(*key
); // Check format
2300 static int ToNumber(const Slice
& x
) {
2301 // Check that there are no extra characters.
2302 EXPECT_TRUE(x
.size() >= 2 && x
[0] == '[' && x
[x
.size() - 1] == ']')
2306 EXPECT_TRUE(sscanf(x
.ToString().c_str(), "[%i]%c", &val
, &ignored
) == 1)
2311 Options new_options
;
2312 NumberComparator cmp
;
2314 new_options
= CurrentOptions();
2315 new_options
.create_if_missing
= true;
2316 new_options
.comparator
= &cmp
;
2317 new_options
.write_buffer_size
= 4096; // Compact more often
2318 new_options
.arena_block_size
= 4096;
2319 new_options
= CurrentOptions(new_options
);
2320 DestroyAndReopen(new_options
);
2321 CreateAndReopenWithCF({"pikachu"}, new_options
);
2322 ASSERT_OK(Put(1, "[10]", "ten"));
2323 ASSERT_OK(Put(1, "[0x14]", "twenty"));
2324 for (int i
= 0; i
< 2; i
++) {
2325 ASSERT_EQ("ten", Get(1, "[10]"));
2326 ASSERT_EQ("ten", Get(1, "[0xa]"));
2327 ASSERT_EQ("twenty", Get(1, "[20]"));
2328 ASSERT_EQ("twenty", Get(1, "[0x14]"));
2329 ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
2330 ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
2331 Compact(1, "[0]", "[9999]");
2334 for (int run
= 0; run
< 2; run
++) {
2335 for (int i
= 0; i
< 1000; i
++) {
2337 snprintf(buf
, sizeof(buf
), "[%d]", i
* 10);
2338 ASSERT_OK(Put(1, buf
, buf
));
2340 Compact(1, "[0]", "[1000000]");
2342 } while (ChangeCompactOptions());
2345 TEST_F(DBTest
, DBOpen_Options
) {
2346 Options options
= CurrentOptions();
2347 std::string dbname
= test::PerThreadDBPath("db_options_test");
2348 ASSERT_OK(DestroyDB(dbname
, options
));
2350 // Does not exist, and create_if_missing == false: error
2352 options
.create_if_missing
= false;
2353 Status s
= DB::Open(options
, dbname
, &db
);
2354 ASSERT_TRUE(strstr(s
.ToString().c_str(), "does not exist") != nullptr);
2355 ASSERT_TRUE(db
== nullptr);
2357 // Does not exist, and create_if_missing == true: OK
2358 options
.create_if_missing
= true;
2359 s
= DB::Open(options
, dbname
, &db
);
2361 ASSERT_TRUE(db
!= nullptr);
2366 // Does exist, and error_if_exists == true: error
2367 options
.create_if_missing
= false;
2368 options
.error_if_exists
= true;
2369 s
= DB::Open(options
, dbname
, &db
);
2370 ASSERT_TRUE(strstr(s
.ToString().c_str(), "exists") != nullptr);
2371 ASSERT_TRUE(db
== nullptr);
2373 // Does exist, and error_if_exists == false: OK
2374 options
.create_if_missing
= true;
2375 options
.error_if_exists
= false;
2376 s
= DB::Open(options
, dbname
, &db
);
2378 ASSERT_TRUE(db
!= nullptr);
2384 TEST_F(DBTest
, DBOpen_Change_NumLevels
) {
2385 Options options
= CurrentOptions();
2386 options
.create_if_missing
= true;
2387 DestroyAndReopen(options
);
2388 ASSERT_TRUE(db_
!= nullptr);
2389 CreateAndReopenWithCF({"pikachu"}, options
);
2391 ASSERT_OK(Put(1, "a", "123"));
2392 ASSERT_OK(Put(1, "b", "234"));
2393 ASSERT_OK(Flush(1));
2394 MoveFilesToLevel(3, 1);
2397 options
.create_if_missing
= false;
2398 options
.num_levels
= 2;
2399 Status s
= TryReopenWithColumnFamilies({"default", "pikachu"}, options
);
2400 ASSERT_TRUE(strstr(s
.ToString().c_str(), "Invalid argument") != nullptr);
2401 ASSERT_TRUE(db_
== nullptr);
2404 TEST_F(DBTest
, DestroyDBMetaDatabase
) {
2405 std::string dbname
= test::PerThreadDBPath("db_meta");
2406 ASSERT_OK(env_
->CreateDirIfMissing(dbname
));
2407 std::string metadbname
= MetaDatabaseName(dbname
, 0);
2408 ASSERT_OK(env_
->CreateDirIfMissing(metadbname
));
2409 std::string metametadbname
= MetaDatabaseName(metadbname
, 0);
2410 ASSERT_OK(env_
->CreateDirIfMissing(metametadbname
));
2412 // Destroy previous versions if they exist. Using the long way.
2413 Options options
= CurrentOptions();
2414 ASSERT_OK(DestroyDB(metametadbname
, options
));
2415 ASSERT_OK(DestroyDB(metadbname
, options
));
2416 ASSERT_OK(DestroyDB(dbname
, options
));
2420 ASSERT_OK(DB::Open(options
, dbname
, &db
));
2423 ASSERT_OK(DB::Open(options
, metadbname
, &db
));
2426 ASSERT_OK(DB::Open(options
, metametadbname
, &db
));
2431 ASSERT_OK(DestroyDB(dbname
, options
));
2433 // Check if deletion worked.
2434 options
.create_if_missing
= false;
2435 ASSERT_TRUE(!(DB::Open(options
, dbname
, &db
)).ok());
2436 ASSERT_TRUE(!(DB::Open(options
, metadbname
, &db
)).ok());
2437 ASSERT_TRUE(!(DB::Open(options
, metametadbname
, &db
)).ok());
2440 #ifndef ROCKSDB_LITE
2441 TEST_F(DBTest
, SnapshotFiles
) {
2443 Options options
= CurrentOptions();
2444 options
.write_buffer_size
= 100000000; // Large write buffer
2445 CreateAndReopenWithCF({"pikachu"}, options
);
2449 // Write 8MB (80 values, each 100K)
2450 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2451 std::vector
<std::string
> values
;
2452 for (int i
= 0; i
< 80; i
++) {
2453 values
.push_back(rnd
.RandomString(100000));
2454 ASSERT_OK(Put((i
< 40), Key(i
), values
[i
]));
2457 // assert that nothing makes it to disk yet.
2458 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2460 // get a file snapshot
2461 uint64_t manifest_number
= 0;
2462 uint64_t manifest_size
= 0;
2463 std::vector
<std::string
> files
;
2464 ASSERT_OK(dbfull()->DisableFileDeletions());
2465 ASSERT_OK(dbfull()->GetLiveFiles(files
, &manifest_size
));
2467 // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF)
2468 ASSERT_EQ(files
.size(), 5U);
2470 uint64_t number
= 0;
2473 // copy these files to a new snapshot directory
2474 std::string snapdir
= dbname_
+ ".snapdir/";
2475 if (env_
->FileExists(snapdir
).ok()) {
2476 ASSERT_OK(DestroyDir(env_
, snapdir
));
2478 ASSERT_OK(env_
->CreateDir(snapdir
));
2480 for (size_t i
= 0; i
< files
.size(); i
++) {
2481 // our clients require that GetLiveFiles returns
2482 // files with "/" as first character!
2483 ASSERT_EQ(files
[i
][0], '/');
2484 std::string src
= dbname_
+ files
[i
];
2485 std::string dest
= snapdir
+ files
[i
];
2488 ASSERT_OK(env_
->GetFileSize(src
, &size
));
2490 // record the number and the size of the
2491 // latest manifest file
2492 if (ParseFileName(files
[i
].substr(1), &number
, &type
)) {
2493 if (type
== kDescriptorFile
) {
2494 ASSERT_EQ(manifest_number
, 0);
2495 manifest_number
= number
;
2496 ASSERT_GE(size
, manifest_size
);
2497 size
= manifest_size
; // copy only valid MANIFEST data
2500 CopyFile(src
, dest
, size
);
2503 // release file snapshot
2504 ASSERT_OK(dbfull()->EnableFileDeletions(/*force*/ false));
2505 // overwrite one key, this key should not appear in the snapshot
2506 std::vector
<std::string
> extras
;
2507 for (unsigned int i
= 0; i
< 1; i
++) {
2508 extras
.push_back(rnd
.RandomString(100000));
2509 ASSERT_OK(Put(0, Key(i
), extras
[i
]));
2512 // verify that data in the snapshot are correct
2513 std::vector
<ColumnFamilyDescriptor
> column_families
;
2514 column_families
.emplace_back("default", ColumnFamilyOptions());
2515 column_families
.emplace_back("pikachu", ColumnFamilyOptions());
2516 std::vector
<ColumnFamilyHandle
*> cf_handles
;
2520 opts
.create_if_missing
= false;
2522 DB::Open(opts
, snapdir
, column_families
, &cf_handles
, &snapdb
);
2525 ReadOptions roptions
;
2527 for (unsigned int i
= 0; i
< 80; i
++) {
2528 ASSERT_OK(snapdb
->Get(roptions
, cf_handles
[i
< 40], Key(i
), &val
));
2529 ASSERT_EQ(values
[i
].compare(val
), 0);
2531 for (auto cfh
: cf_handles
) {
2536 // look at the new live files after we added an 'extra' key
2537 // and after we took the first snapshot.
2538 uint64_t new_manifest_number
= 0;
2539 uint64_t new_manifest_size
= 0;
2540 std::vector
<std::string
> newfiles
;
2541 ASSERT_OK(dbfull()->DisableFileDeletions());
2542 ASSERT_OK(dbfull()->GetLiveFiles(newfiles
, &new_manifest_size
));
2544 // find the new manifest file. assert that this manifest file is
2545 // the same one as in the previous snapshot. But its size should be
2546 // larger because we added an extra key after taking the
2547 // previous shapshot.
2548 for (size_t i
= 0; i
< newfiles
.size(); i
++) {
2549 std::string src
= dbname_
+ "/" + newfiles
[i
];
2550 // record the lognumber and the size of the
2551 // latest manifest file
2552 if (ParseFileName(newfiles
[i
].substr(1), &number
, &type
)) {
2553 if (type
== kDescriptorFile
) {
2554 ASSERT_EQ(new_manifest_number
, 0);
2556 new_manifest_number
= number
;
2557 ASSERT_OK(env_
->GetFileSize(src
, &size
));
2558 ASSERT_GE(size
, new_manifest_size
);
2562 ASSERT_EQ(manifest_number
, new_manifest_number
);
2563 ASSERT_GT(new_manifest_size
, manifest_size
);
2565 // Also test GetLiveFilesStorageInfo
2566 std::vector
<LiveFileStorageInfo
> new_infos
;
2567 ASSERT_OK(db_
->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
2570 // Close DB (while deletions disabled)
2574 for (auto& info
: new_infos
) {
2575 std::string path
= info
.directory
+ "/" + info
.relative_filename
;
2577 ASSERT_OK(env_
->GetFileSize(path
, &size
));
2578 if (info
.trim_to_size
) {
2579 ASSERT_LE(info
.size
, size
);
2580 } else if (!info
.replacement_contents
.empty()) {
2581 ASSERT_EQ(info
.size
, info
.replacement_contents
.size());
2583 ASSERT_EQ(info
.size
, size
);
2585 if (info
.file_type
== kDescriptorFile
) {
2586 ASSERT_EQ(info
.file_number
, manifest_number
);
2589 } while (ChangeCompactOptions());
2592 TEST_F(DBTest
, ReadonlyDBGetLiveManifestSize
) {
2594 Options options
= CurrentOptions();
2595 options
.level0_file_num_compaction_trigger
= 2;
2596 DestroyAndReopen(options
);
2598 ASSERT_OK(Put("foo", "bar"));
2600 ASSERT_OK(Put("foo", "bar"));
2602 ASSERT_OK(dbfull()->TEST_WaitForCompact());
2605 ASSERT_OK(ReadOnlyReopen(options
));
2607 uint64_t manifest_size
= 0;
2608 std::vector
<std::string
> files
;
2609 ASSERT_OK(dbfull()->GetLiveFiles(files
, &manifest_size
));
2611 for (const std::string
& f
: files
) {
2612 uint64_t number
= 0;
2614 if (ParseFileName(f
.substr(1), &number
, &type
)) {
2615 if (type
== kDescriptorFile
) {
2616 uint64_t size_on_disk
;
2617 ASSERT_OK(env_
->GetFileSize(dbname_
+ "/" + f
, &size_on_disk
));
2618 ASSERT_EQ(manifest_size
, size_on_disk
);
2624 } while (ChangeCompactOptions());
2627 TEST_F(DBTest
, GetLiveBlobFiles
) {
2628 // Note: the following prevents an otherwise harmless data race between the
2629 // test setup code (AddBlobFile) below and the periodic stat dumping thread.
2630 Options options
= CurrentOptions();
2631 options
.stats_dump_period_sec
= 0;
2633 constexpr uint64_t blob_file_number
= 234;
2634 constexpr uint64_t total_blob_count
= 555;
2635 constexpr uint64_t total_blob_bytes
= 66666;
2636 constexpr char checksum_method
[] = "CRC32";
2637 constexpr char checksum_value
[] = "\x3d\x87\xff\x57";
2638 constexpr uint64_t garbage_blob_count
= 0;
2639 constexpr uint64_t garbage_blob_bytes
= 0;
2643 AddBlobFile(db_
->DefaultColumnFamily(), blob_file_number
, total_blob_count
,
2644 total_blob_bytes
, checksum_method
, checksum_value
,
2645 garbage_blob_count
, garbage_blob_bytes
);
2646 // Make sure it appears in the results returned by GetLiveFiles.
2647 uint64_t manifest_size
= 0;
2648 std::vector
<std::string
> files
;
2649 ASSERT_OK(dbfull()->GetLiveFiles(files
, &manifest_size
));
2651 ASSERT_FALSE(files
.empty());
2652 ASSERT_EQ(files
[0], BlobFileName("", blob_file_number
));
2654 ColumnFamilyMetaData cfmd
;
2656 db_
->GetColumnFamilyMetaData(&cfmd
);
2657 ASSERT_EQ(cfmd
.blob_files
.size(), 1);
2658 const BlobMetaData
& bmd
= cfmd
.blob_files
[0];
2660 CheckBlobMetaData(bmd
, blob_file_number
, total_blob_count
, total_blob_bytes
,
2661 checksum_method
, checksum_value
, garbage_blob_count
,
2662 garbage_blob_bytes
);
2663 ASSERT_EQ(NormalizePath(bmd
.blob_file_path
), NormalizePath(dbname_
));
2664 ASSERT_EQ(cfmd
.blob_file_count
, 1U);
2665 ASSERT_EQ(cfmd
.blob_file_size
, bmd
.blob_file_size
);
2669 TEST_F(DBTest
, PurgeInfoLogs
) {
2670 Options options
= CurrentOptions();
2671 options
.keep_log_file_num
= 5;
2672 options
.create_if_missing
= true;
2674 for (int mode
= 0; mode
<= 1; mode
++) {
2676 options
.db_log_dir
= dbname_
+ "_logs";
2677 ASSERT_OK(env_
->CreateDirIfMissing(options
.db_log_dir
));
2679 options
.db_log_dir
= "";
2681 for (int i
= 0; i
< 8; i
++) {
2685 std::vector
<std::string
> files
;
2686 ASSERT_OK(env_
->GetChildren(
2687 options
.db_log_dir
.empty() ? dbname_
: options
.db_log_dir
, &files
));
2688 int info_log_count
= 0;
2689 for (std::string file
: files
) {
2690 if (file
.find("LOG") != std::string::npos
) {
2694 ASSERT_EQ(5, info_log_count
);
2697 // For mode (1), test DestroyDB() to delete all the logs under DB dir.
2698 // For mode (2), no info log file should have been put under DB dir.
2699 // Since dbname_ has no children, there is no need to loop db_files
2700 std::vector
<std::string
> db_files
;
2701 ASSERT_TRUE(env_
->GetChildren(dbname_
, &db_files
).IsNotFound());
2702 ASSERT_TRUE(db_files
.empty());
2706 ASSERT_OK(env_
->GetChildren(options
.db_log_dir
, &files
));
2707 for (std::string file
: files
) {
2708 ASSERT_OK(env_
->DeleteFile(options
.db_log_dir
+ "/" + file
));
2710 ASSERT_OK(env_
->DeleteDir(options
.db_log_dir
));
2715 #ifndef ROCKSDB_LITE
2716 // Multi-threaded test:
2719 static const int kColumnFamilies
= 10;
2720 static const int kNumThreads
= 10;
2721 static const int kTestSeconds
= 10;
2722 static const int kNumKeys
= 1000;
2726 std::atomic
<int> counter
[kNumThreads
];
2732 bool multiget_batched
;
2735 static void MTThreadBody(void* arg
) {
2736 MTThread
* t
= reinterpret_cast<MTThread
*>(arg
);
2738 DB
* db
= t
->state
->test
->db_
;
2740 std::shared_ptr
<SystemClock
> clock
= SystemClock::Default();
2741 auto end_micros
= clock
->NowMicros() + kTestSeconds
* 1000000U;
2743 fprintf(stderr
, "... starting thread %d\n", id
);
2744 Random
rnd(1000 + id
);
2746 while (clock
->NowMicros() < end_micros
) {
2747 t
->state
->counter
[id
].store(counter
, std::memory_order_release
);
2749 int key
= rnd
.Uniform(kNumKeys
);
2751 snprintf(keybuf
, sizeof(keybuf
), "%016d", key
);
2754 // Write values of the form <key, my id, counter, cf, unique_id>.
2755 // into each of the CFs
2756 // We add some padding for force compactions.
2757 int unique_id
= rnd
.Uniform(1000000);
2759 // Half of the time directly use WriteBatch. Half of the time use
2760 // WriteBatchWithIndex.
2763 for (int cf
= 0; cf
< kColumnFamilies
; ++cf
) {
2764 snprintf(valbuf
, sizeof(valbuf
), "%d.%d.%d.%d.%-1000d", key
, id
,
2765 static_cast<int>(counter
), cf
, unique_id
);
2766 ASSERT_OK(batch
.Put(t
->state
->test
->handles_
[cf
], Slice(keybuf
),
2769 ASSERT_OK(db
->Write(WriteOptions(), &batch
));
2771 WriteBatchWithIndex
batch(db
->GetOptions().comparator
);
2772 for (int cf
= 0; cf
< kColumnFamilies
; ++cf
) {
2773 snprintf(valbuf
, sizeof(valbuf
), "%d.%d.%d.%d.%-1000d", key
, id
,
2774 static_cast<int>(counter
), cf
, unique_id
);
2775 ASSERT_OK(batch
.Put(t
->state
->test
->handles_
[cf
], Slice(keybuf
),
2778 ASSERT_OK(db
->Write(WriteOptions(), batch
.GetWriteBatch()));
2781 // Read a value and verify that it matches the pattern written above
2782 // and that writes to all column families were atomic (unique_id is the
2784 std::vector
<Slice
> keys(kColumnFamilies
, Slice(keybuf
));
2785 std::vector
<std::string
> values
;
2786 std::vector
<Status
> statuses
;
2787 if (!t
->multiget_batched
) {
2788 statuses
= db
->MultiGet(ReadOptions(), t
->state
->test
->handles_
, keys
,
2791 std::vector
<PinnableSlice
> pin_values(keys
.size());
2792 statuses
.resize(keys
.size());
2793 const Snapshot
* snapshot
= db
->GetSnapshot();
2795 ro
.snapshot
= snapshot
;
2796 for (int cf
= 0; cf
< kColumnFamilies
; ++cf
) {
2797 db
->MultiGet(ro
, t
->state
->test
->handles_
[cf
], 1, &keys
[cf
],
2798 &pin_values
[cf
], &statuses
[cf
]);
2800 db
->ReleaseSnapshot(snapshot
);
2801 values
.resize(keys
.size());
2802 for (int cf
= 0; cf
< kColumnFamilies
; ++cf
) {
2803 if (statuses
[cf
].ok()) {
2804 values
[cf
].assign(pin_values
[cf
].data(), pin_values
[cf
].size());
2808 Status s
= statuses
[0];
2809 // all statuses have to be the same
2810 for (size_t i
= 1; i
< statuses
.size(); ++i
) {
2811 // they are either both ok or both not-found
2812 ASSERT_TRUE((s
.ok() && statuses
[i
].ok()) ||
2813 (s
.IsNotFound() && statuses
[i
].IsNotFound()));
2815 if (s
.IsNotFound()) {
2816 // Key has not yet been written
2818 // Check that the writer thread counter is >= the counter in the value
2821 for (int i
= 0; i
< kColumnFamilies
; ++i
) {
2823 ASSERT_EQ(5, sscanf(values
[i
].c_str(), "%d.%d.%d.%d.%d", &k
, &w
, &c
,
2828 ASSERT_LT(w
, kNumThreads
);
2829 ASSERT_LE(c
, t
->state
->counter
[w
].load(std::memory_order_acquire
));
2834 // this checks that updates across column families happened
2835 // atomically -- all unique ids are the same
2836 ASSERT_EQ(u
, unique_id
);
2843 fprintf(stderr
, "... stopping thread %d after %d ops\n", id
, int(counter
));
2846 } // anonymous namespace
2848 class MultiThreadedDBTest
2850 public ::testing::WithParamInterface
<std::tuple
<int, bool>> {
2852 void SetUp() override
{
2853 std::tie(option_config_
, multiget_batched_
) = GetParam();
2856 static std::vector
<int> GenerateOptionConfigs() {
2857 std::vector
<int> optionConfigs
;
2858 for (int optionConfig
= kDefault
; optionConfig
< kEnd
; ++optionConfig
) {
2859 optionConfigs
.push_back(optionConfig
);
2861 return optionConfigs
;
2864 bool multiget_batched_
;
2867 TEST_P(MultiThreadedDBTest
, MultiThreaded
) {
2868 if (option_config_
== kPipelinedWrite
) return;
2869 anon::OptionsOverride options_override
;
2870 options_override
.skip_policy
= kSkipNoSnapshot
;
2871 Options options
= CurrentOptions(options_override
);
2872 std::vector
<std::string
> cfs
;
2873 for (int i
= 1; i
< kColumnFamilies
; ++i
) {
2874 cfs
.push_back(std::to_string(i
));
2877 CreateAndReopenWithCF(cfs
, options
);
2881 for (int id
= 0; id
< kNumThreads
; id
++) {
2882 mt
.counter
[id
].store(0, std::memory_order_release
);
2886 MTThread thread
[kNumThreads
];
2887 for (int id
= 0; id
< kNumThreads
; id
++) {
2888 thread
[id
].state
= &mt
;
2890 thread
[id
].multiget_batched
= multiget_batched_
;
2891 env_
->StartThread(MTThreadBody
, &thread
[id
]);
2894 env_
->WaitForJoin();
2897 INSTANTIATE_TEST_CASE_P(
2898 MultiThreaded
, MultiThreadedDBTest
,
2900 ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()),
2901 ::testing::Bool()));
2902 #endif // ROCKSDB_LITE
2904 // Group commit test:
2905 #if !defined(OS_WIN)
2906 // Disable this test temporarily on Travis and appveyor as it fails
2907 // intermittently. Github issue: #4151
2910 static const int kGCNumThreads
= 4;
2911 static const int kGCNumKeys
= 1000;
2916 std::atomic
<bool> done
;
2919 static void GCThreadBody(void* arg
) {
2920 GCThread
* t
= reinterpret_cast<GCThread
*>(arg
);
2925 for (int i
= 0; i
< kGCNumKeys
; ++i
) {
2926 std::string
kv(std::to_string(i
+ id
* kGCNumKeys
));
2927 ASSERT_OK(db
->Put(wo
, kv
, kv
));
2932 } // anonymous namespace
2934 TEST_F(DBTest
, GroupCommitTest
) {
2936 Options options
= CurrentOptions();
2938 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2941 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
2942 {{"WriteThread::JoinBatchGroup:BeganWaiting",
2943 "DBImpl::WriteImpl:BeforeLeaderEnters"},
2944 {"WriteThread::AwaitState:BlockingWaiting",
2945 "WriteThread::EnterAsBatchGroupLeader:End"}});
2946 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
2949 GCThread thread
[kGCNumThreads
];
2950 for (int id
= 0; id
< kGCNumThreads
; id
++) {
2952 thread
[id
].db
= db_
;
2953 thread
[id
].done
= false;
2954 env_
->StartThread(GCThreadBody
, &thread
[id
]);
2956 env_
->WaitForJoin();
2958 ASSERT_GT(TestGetTickerCount(options
, WRITE_DONE_BY_OTHER
), 0);
2960 std::vector
<std::string
> expected_db
;
2961 for (int i
= 0; i
< kGCNumThreads
* kGCNumKeys
; ++i
) {
2962 expected_db
.push_back(std::to_string(i
));
2964 std::sort(expected_db
.begin(), expected_db
.end());
2966 Iterator
* itr
= db_
->NewIterator(ReadOptions());
2968 for (auto x
: expected_db
) {
2969 ASSERT_TRUE(itr
->Valid());
2970 ASSERT_EQ(itr
->key().ToString(), x
);
2971 ASSERT_EQ(itr
->value().ToString(), x
);
2974 ASSERT_TRUE(!itr
->Valid());
2977 HistogramData hist_data
;
2978 options
.statistics
->histogramData(DB_WRITE
, &hist_data
);
2979 ASSERT_GT(hist_data
.average
, 0.0);
2980 } while (ChangeOptions(kSkipNoSeekToLast
));
2985 using KVMap
= std::map
<std::string
, std::string
>;
2988 class ModelDB
: public DB
{
2990 class ModelSnapshot
: public Snapshot
{
2994 SequenceNumber
GetSequenceNumber() const override
{
2995 // no need to call this
3000 int64_t GetUnixTime() const override
{
3001 // no need to call this
3006 uint64_t GetTimestamp() const override
{
3007 // no need to call this
3013 explicit ModelDB(const Options
& options
) : options_(options
) {}
3015 Status
Put(const WriteOptions
& o
, ColumnFamilyHandle
* cf
, const Slice
& k
,
3016 const Slice
& v
) override
{
3018 Status s
= batch
.Put(cf
, k
, v
);
3022 return Write(o
, &batch
);
3024 Status
Put(const WriteOptions
& /*o*/, ColumnFamilyHandle
* /*cf*/,
3025 const Slice
& /*k*/, const Slice
& /*ts*/,
3026 const Slice
& /*v*/) override
{
3027 return Status::NotSupported();
3030 using DB::PutEntity
;
3031 Status
PutEntity(const WriteOptions
& /* options */,
3032 ColumnFamilyHandle
* /* column_family */,
3033 const Slice
& /* key */,
3034 const WideColumns
& /* columns */) override
{
3035 return Status::NotSupported();
3039 Status
Close() override
{ return Status::OK(); }
3041 Status
Delete(const WriteOptions
& o
, ColumnFamilyHandle
* cf
,
3042 const Slice
& key
) override
{
3044 Status s
= batch
.Delete(cf
, key
);
3048 return Write(o
, &batch
);
3050 Status
Delete(const WriteOptions
& /*o*/, ColumnFamilyHandle
* /*cf*/,
3051 const Slice
& /*key*/, const Slice
& /*ts*/) override
{
3052 return Status::NotSupported();
3054 using DB::SingleDelete
;
3055 Status
SingleDelete(const WriteOptions
& o
, ColumnFamilyHandle
* cf
,
3056 const Slice
& key
) override
{
3058 Status s
= batch
.SingleDelete(cf
, key
);
3062 return Write(o
, &batch
);
3064 Status
SingleDelete(const WriteOptions
& /*o*/, ColumnFamilyHandle
* /*cf*/,
3065 const Slice
& /*key*/, const Slice
& /*ts*/) override
{
3066 return Status::NotSupported();
3069 Status
Merge(const WriteOptions
& o
, ColumnFamilyHandle
* cf
, const Slice
& k
,
3070 const Slice
& v
) override
{
3072 Status s
= batch
.Merge(cf
, k
, v
);
3076 return Write(o
, &batch
);
3078 Status
Merge(const WriteOptions
& /*o*/, ColumnFamilyHandle
* /*cf*/,
3079 const Slice
& /*k*/, const Slice
& /*ts*/,
3080 const Slice
& /*value*/) override
{
3081 return Status::NotSupported();
3084 Status
Get(const ReadOptions
& /*options*/, ColumnFamilyHandle
* /*cf*/,
3085 const Slice
& key
, PinnableSlice
* /*value*/) override
{
3086 return Status::NotSupported(key
);
3089 using DB::GetMergeOperands
;
3090 virtual Status
GetMergeOperands(
3091 const ReadOptions
& /*options*/, ColumnFamilyHandle
* /*column_family*/,
3092 const Slice
& key
, PinnableSlice
* /*slice*/,
3093 GetMergeOperandsOptions
* /*merge_operands_options*/,
3094 int* /*number_of_operands*/) override
{
3095 return Status::NotSupported(key
);
3099 std::vector
<Status
> MultiGet(
3100 const ReadOptions
& /*options*/,
3101 const std::vector
<ColumnFamilyHandle
*>& /*column_family*/,
3102 const std::vector
<Slice
>& keys
,
3103 std::vector
<std::string
>* /*values*/) override
{
3104 std::vector
<Status
> s(keys
.size(),
3105 Status::NotSupported("Not implemented."));
3109 #ifndef ROCKSDB_LITE
3110 using DB::IngestExternalFile
;
3111 Status
IngestExternalFile(
3112 ColumnFamilyHandle
* /*column_family*/,
3113 const std::vector
<std::string
>& /*external_files*/,
3114 const IngestExternalFileOptions
& /*options*/) override
{
3115 return Status::NotSupported("Not implemented.");
3118 using DB::IngestExternalFiles
;
3119 Status
IngestExternalFiles(
3120 const std::vector
<IngestExternalFileArg
>& /*args*/) override
{
3121 return Status::NotSupported("Not implemented");
3124 using DB::CreateColumnFamilyWithImport
;
3125 virtual Status
CreateColumnFamilyWithImport(
3126 const ColumnFamilyOptions
& /*options*/,
3127 const std::string
& /*column_family_name*/,
3128 const ImportColumnFamilyOptions
& /*import_options*/,
3129 const ExportImportFilesMetaData
& /*metadata*/,
3130 ColumnFamilyHandle
** /*handle*/) override
{
3131 return Status::NotSupported("Not implemented.");
3134 using DB::VerifyChecksum
;
3135 Status
VerifyChecksum(const ReadOptions
&) override
{
3136 return Status::NotSupported("Not implemented.");
3139 using DB::GetPropertiesOfAllTables
;
3140 Status
GetPropertiesOfAllTables(
3141 ColumnFamilyHandle
* /*column_family*/,
3142 TablePropertiesCollection
* /*props*/) override
{
3146 Status
GetPropertiesOfTablesInRange(
3147 ColumnFamilyHandle
* /*column_family*/, const Range
* /*range*/,
3148 std::size_t /*n*/, TablePropertiesCollection
* /*props*/) override
{
3151 #endif // ROCKSDB_LITE
3153 using DB::KeyMayExist
;
3154 bool KeyMayExist(const ReadOptions
& /*options*/,
3155 ColumnFamilyHandle
* /*column_family*/, const Slice
& /*key*/,
3156 std::string
* /*value*/,
3157 bool* value_found
= nullptr) override
{
3158 if (value_found
!= nullptr) {
3159 *value_found
= false;
3161 return true; // Not Supported directly
3163 using DB::NewIterator
;
3164 Iterator
* NewIterator(const ReadOptions
& options
,
3165 ColumnFamilyHandle
* /*column_family*/) override
{
3166 if (options
.snapshot
== nullptr) {
3167 KVMap
* saved
= new KVMap
;
3169 return new ModelIter(saved
, true);
3171 const KVMap
* snapshot_state
=
3172 &(reinterpret_cast<const ModelSnapshot
*>(options
.snapshot
)->map_
);
3173 return new ModelIter(snapshot_state
, false);
3176 Status
NewIterators(const ReadOptions
& /*options*/,
3177 const std::vector
<ColumnFamilyHandle
*>& /*column_family*/,
3178 std::vector
<Iterator
*>* /*iterators*/) override
{
3179 return Status::NotSupported("Not supported yet");
3181 const Snapshot
* GetSnapshot() override
{
3182 ModelSnapshot
* snapshot
= new ModelSnapshot
;
3183 snapshot
->map_
= map_
;
3187 void ReleaseSnapshot(const Snapshot
* snapshot
) override
{
3188 delete reinterpret_cast<const ModelSnapshot
*>(snapshot
);
3191 Status
Write(const WriteOptions
& /*options*/, WriteBatch
* batch
) override
{
3192 class Handler
: public WriteBatch::Handler
{
3195 void Put(const Slice
& key
, const Slice
& value
) override
{
3196 (*map_
)[key
.ToString()] = value
.ToString();
3198 void Merge(const Slice
& /*key*/, const Slice
& /*value*/) override
{
3199 // ignore merge for now
3200 // (*map_)[key.ToString()] = value.ToString();
3202 void Delete(const Slice
& key
) override
{ map_
->erase(key
.ToString()); }
3205 handler
.map_
= &map_
;
3206 return batch
->Iterate(&handler
);
3209 using DB::GetProperty
;
3210 bool GetProperty(ColumnFamilyHandle
* /*column_family*/,
3211 const Slice
& /*property*/, std::string
* /*value*/) override
{
3214 using DB::GetIntProperty
;
3215 bool GetIntProperty(ColumnFamilyHandle
* /*column_family*/,
3216 const Slice
& /*property*/, uint64_t* /*value*/) override
{
3219 using DB::GetMapProperty
;
3220 bool GetMapProperty(ColumnFamilyHandle
* /*column_family*/,
3221 const Slice
& /*property*/,
3222 std::map
<std::string
, std::string
>* /*value*/) override
{
3225 using DB::GetAggregatedIntProperty
;
3226 bool GetAggregatedIntProperty(const Slice
& /*property*/,
3227 uint64_t* /*value*/) override
{
3230 using DB::GetApproximateSizes
;
3231 Status
GetApproximateSizes(const SizeApproximationOptions
& /*options*/,
3232 ColumnFamilyHandle
* /*column_family*/,
3233 const Range
* /*range*/, int n
,
3234 uint64_t* sizes
) override
{
3235 for (int i
= 0; i
< n
; i
++) {
3238 return Status::OK();
3240 using DB::GetApproximateMemTableStats
;
3241 void GetApproximateMemTableStats(ColumnFamilyHandle
* /*column_family*/,
3242 const Range
& /*range*/,
3243 uint64_t* const count
,
3244 uint64_t* const size
) override
{
3248 using DB::CompactRange
;
3249 Status
CompactRange(const CompactRangeOptions
& /*options*/,
3250 ColumnFamilyHandle
* /*column_family*/,
3251 const Slice
* /*start*/, const Slice
* /*end*/) override
{
3252 return Status::NotSupported("Not supported operation.");
3255 Status
SetDBOptions(
3256 const std::unordered_map
<std::string
, std::string
>& /*new_options*/)
3258 return Status::NotSupported("Not supported operation.");
3261 using DB::CompactFiles
;
3262 Status
CompactFiles(
3263 const CompactionOptions
& /*compact_options*/,
3264 ColumnFamilyHandle
* /*column_family*/,
3265 const std::vector
<std::string
>& /*input_file_names*/,
3266 const int /*output_level*/, const int /*output_path_id*/ = -1,
3267 std::vector
<std::string
>* const /*output_file_names*/ = nullptr,
3268 CompactionJobInfo
* /*compaction_job_info*/ = nullptr) override
{
3269 return Status::NotSupported("Not supported operation.");
3272 Status
PauseBackgroundWork() override
{
3273 return Status::NotSupported("Not supported operation.");
3276 Status
ContinueBackgroundWork() override
{
3277 return Status::NotSupported("Not supported operation.");
3280 Status
EnableAutoCompaction(
3281 const std::vector
<ColumnFamilyHandle
*>& /*column_family_handles*/)
3283 return Status::NotSupported("Not supported operation.");
3286 void EnableManualCompaction() override
{ return; }
3288 void DisableManualCompaction() override
{ return; }
3290 using DB::NumberLevels
;
3291 int NumberLevels(ColumnFamilyHandle
* /*column_family*/) override
{ return 1; }
3293 using DB::MaxMemCompactionLevel
;
3294 int MaxMemCompactionLevel(ColumnFamilyHandle
* /*column_family*/) override
{
3298 using DB::Level0StopWriteTrigger
;
3299 int Level0StopWriteTrigger(ColumnFamilyHandle
* /*column_family*/) override
{
3303 const std::string
& GetName() const override
{ return name_
; }
3305 Env
* GetEnv() const override
{ return nullptr; }
3307 using DB::GetOptions
;
3308 Options
GetOptions(ColumnFamilyHandle
* /*column_family*/) const override
{
3312 using DB::GetDBOptions
;
3313 DBOptions
GetDBOptions() const override
{ return options_
; }
3316 Status
Flush(const ROCKSDB_NAMESPACE::FlushOptions
& /*options*/,
3317 ColumnFamilyHandle
* /*column_family*/) override
{
3322 const ROCKSDB_NAMESPACE::FlushOptions
& /*options*/,
3323 const std::vector
<ColumnFamilyHandle
*>& /*column_families*/) override
{
3324 return Status::OK();
3327 Status
SyncWAL() override
{ return Status::OK(); }
3329 Status
DisableFileDeletions() override
{ return Status::OK(); }
3331 Status
EnableFileDeletions(bool /*force*/) override
{ return Status::OK(); }
3332 #ifndef ROCKSDB_LITE
3334 Status
GetLiveFiles(std::vector
<std::string
>&, uint64_t* /*size*/,
3335 bool /*flush_memtable*/ = true) override
{
3336 return Status::OK();
3339 Status
GetLiveFilesChecksumInfo(
3340 FileChecksumList
* /*checksum_list*/) override
{
3341 return Status::OK();
3344 Status
GetLiveFilesStorageInfo(
3345 const LiveFilesStorageInfoOptions
& /*opts*/,
3346 std::vector
<LiveFileStorageInfo
>* /*files*/) override
{
3347 return Status::OK();
3350 Status
GetSortedWalFiles(VectorLogPtr
& /*files*/) override
{
3351 return Status::OK();
3354 Status
GetCurrentWalFile(
3355 std::unique_ptr
<LogFile
>* /*current_log_file*/) override
{
3356 return Status::OK();
3359 virtual Status
GetCreationTimeOfOldestFile(
3360 uint64_t* /*creation_time*/) override
{
3361 return Status::NotSupported();
3364 Status
DeleteFile(std::string
/*name*/) override
{ return Status::OK(); }
3366 Status
GetUpdatesSince(
3367 ROCKSDB_NAMESPACE::SequenceNumber
,
3368 std::unique_ptr
<ROCKSDB_NAMESPACE::TransactionLogIterator
>*,
3369 const TransactionLogIterator::ReadOptions
& /*read_options*/ =
3370 TransactionLogIterator::ReadOptions()) override
{
3371 return Status::NotSupported("Not supported in Model DB");
3374 void GetColumnFamilyMetaData(ColumnFamilyHandle
* /*column_family*/,
3375 ColumnFamilyMetaData
* /*metadata*/) override
{}
3376 #endif // ROCKSDB_LITE
3378 Status
GetDbIdentity(std::string
& /*identity*/) const override
{
3379 return Status::OK();
3382 Status
GetDbSessionId(std::string
& /*session_id*/) const override
{
3383 return Status::OK();
3386 SequenceNumber
GetLatestSequenceNumber() const override
{ return 0; }
3388 Status
IncreaseFullHistoryTsLow(ColumnFamilyHandle
* /*cf*/,
3389 std::string
/*ts_low*/) override
{
3390 return Status::OK();
3393 Status
GetFullHistoryTsLow(ColumnFamilyHandle
* /*cf*/,
3394 std::string
* /*ts_low*/) override
{
3395 return Status::OK();
3398 ColumnFamilyHandle
* DefaultColumnFamily() const override
{ return nullptr; }
3401 class ModelIter
: public Iterator
{
3403 ModelIter(const KVMap
* map
, bool owned
)
3404 : map_(map
), owned_(owned
), iter_(map_
->end()) {}
3405 ~ModelIter() override
{
3406 if (owned_
) delete map_
;
3408 bool Valid() const override
{ return iter_
!= map_
->end(); }
3409 void SeekToFirst() override
{ iter_
= map_
->begin(); }
3410 void SeekToLast() override
{
3411 if (map_
->empty()) {
3412 iter_
= map_
->end();
3414 iter_
= map_
->find(map_
->rbegin()->first
);
3417 void Seek(const Slice
& k
) override
{
3418 iter_
= map_
->lower_bound(k
.ToString());
3420 void SeekForPrev(const Slice
& k
) override
{
3421 iter_
= map_
->upper_bound(k
.ToString());
3424 void Next() override
{ ++iter_
; }
3425 void Prev() override
{
3426 if (iter_
== map_
->begin()) {
3427 iter_
= map_
->end();
3433 Slice
key() const override
{ return iter_
->first
; }
3434 Slice
value() const override
{ return iter_
->second
; }
3435 Status
status() const override
{ return Status::OK(); }
3438 const KVMap
* const map_
;
3439 const bool owned_
; // Do we own map_
3440 KVMap::const_iterator iter_
;
3442 const Options options_
;
3444 std::string name_
= "";
3447 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
3448 static std::string
RandomKey(Random
* rnd
, int minimum
= 0) {
3451 len
= (rnd
->OneIn(3)
3452 ? 1 // Short sometimes to encourage collisions
3453 : (rnd
->OneIn(100) ? rnd
->Skewed(10) : rnd
->Uniform(10)));
3454 } while (len
< minimum
);
3455 return test::RandomKey(rnd
, len
);
3458 static bool CompareIterators(int step
, DB
* model
, DB
* db
,
3459 const Snapshot
* model_snap
,
3460 const Snapshot
* db_snap
) {
3461 ReadOptions options
;
3462 options
.snapshot
= model_snap
;
3463 Iterator
* miter
= model
->NewIterator(options
);
3464 options
.snapshot
= db_snap
;
3465 Iterator
* dbiter
= db
->NewIterator(options
);
3468 for (miter
->SeekToFirst(), dbiter
->SeekToFirst();
3469 ok
&& miter
->Valid() && dbiter
->Valid(); miter
->Next(), dbiter
->Next()) {
3471 if (miter
->key().compare(dbiter
->key()) != 0) {
3472 fprintf(stderr
, "step %d: Key mismatch: '%s' vs. '%s'\n", step
,
3473 EscapeString(miter
->key()).c_str(),
3474 EscapeString(dbiter
->key()).c_str());
3479 if (miter
->value().compare(dbiter
->value()) != 0) {
3480 fprintf(stderr
, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
3481 step
, EscapeString(miter
->key()).c_str(),
3482 EscapeString(miter
->value()).c_str(),
3483 EscapeString(dbiter
->value()).c_str());
3489 if (miter
->Valid() != dbiter
->Valid()) {
3490 fprintf(stderr
, "step %d: Mismatch at end of iterators: %d vs. %d\n",
3491 step
, miter
->Valid(), dbiter
->Valid());
3500 class DBTestRandomized
: public DBTest
,
3501 public ::testing::WithParamInterface
<int> {
3503 void SetUp() override
{ option_config_
= GetParam(); }
3505 static std::vector
<int> GenerateOptionConfigs() {
3506 std::vector
<int> option_configs
;
3507 // skip cuckoo hash as it does not support snapshot.
3508 for (int option_config
= kDefault
; option_config
< kEnd
; ++option_config
) {
3509 if (!ShouldSkipOptions(option_config
,
3510 kSkipDeletesFilterFirst
| kSkipNoSeekToLast
)) {
3511 option_configs
.push_back(option_config
);
3514 option_configs
.push_back(kBlockBasedTableWithIndexRestartInterval
);
3515 return option_configs
;
3519 INSTANTIATE_TEST_CASE_P(
3520 DBTestRandomized
, DBTestRandomized
,
3521 ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
3523 TEST_P(DBTestRandomized
, Randomized
) {
3524 anon::OptionsOverride options_override
;
3525 options_override
.skip_policy
= kSkipNoSnapshot
;
3526 Options options
= CurrentOptions(options_override
);
3527 DestroyAndReopen(options
);
3529 Random
rnd(test::RandomSeed() + GetParam());
3530 ModelDB
model(options
);
3531 const int N
= 10000;
3532 const Snapshot
* model_snap
= nullptr;
3533 const Snapshot
* db_snap
= nullptr;
3535 for (int step
= 0; step
< N
; step
++) {
3536 // TODO(sanjay): Test Get() works
3537 int p
= rnd
.Uniform(100);
3539 if (option_config_
== kHashSkipList
|| option_config_
== kHashLinkList
||
3540 option_config_
== kPlainTableFirstBytePrefix
||
3541 option_config_
== kBlockBasedTableWithWholeKeyHashIndex
||
3542 option_config_
== kBlockBasedTableWithPrefixHashIndex
) {
3545 if (p
< 45) { // Put
3546 k
= RandomKey(&rnd
, minimum
);
3547 v
= rnd
.RandomString(rnd
.OneIn(20) ? 100 + rnd
.Uniform(100)
3549 ASSERT_OK(model
.Put(WriteOptions(), k
, v
));
3550 ASSERT_OK(db_
->Put(WriteOptions(), k
, v
));
3551 } else if (p
< 90) { // Delete
3552 k
= RandomKey(&rnd
, minimum
);
3553 ASSERT_OK(model
.Delete(WriteOptions(), k
));
3554 ASSERT_OK(db_
->Delete(WriteOptions(), k
));
3555 } else { // Multi-element batch
3557 const int num
= rnd
.Uniform(8);
3558 for (int i
= 0; i
< num
; i
++) {
3559 if (i
== 0 || !rnd
.OneIn(10)) {
3560 k
= RandomKey(&rnd
, minimum
);
3562 // Periodically re-use the same key from the previous iter, so
3563 // we have multiple entries in the write batch for the same key
3566 v
= rnd
.RandomString(rnd
.Uniform(10));
3567 ASSERT_OK(b
.Put(k
, v
));
3569 ASSERT_OK(b
.Delete(k
));
3572 ASSERT_OK(model
.Write(WriteOptions(), &b
));
3573 ASSERT_OK(db_
->Write(WriteOptions(), &b
));
3576 if ((step
% 100) == 0) {
3577 // For DB instances that use the hash index + block-based table, the
3578 // iterator will be invalid right when seeking a non-existent key, right
3579 // than return a key that is close to it.
3580 if (option_config_
!= kBlockBasedTableWithWholeKeyHashIndex
&&
3581 option_config_
!= kBlockBasedTableWithPrefixHashIndex
) {
3582 ASSERT_TRUE(CompareIterators(step
, &model
, db_
, nullptr, nullptr));
3583 ASSERT_TRUE(CompareIterators(step
, &model
, db_
, model_snap
, db_snap
));
3586 // Save a snapshot from each DB this time that we'll use next
3587 // time we compare things, to make sure the current state is
3588 // preserved with the snapshot
3589 if (model_snap
!= nullptr) model
.ReleaseSnapshot(model_snap
);
3590 if (db_snap
!= nullptr) db_
->ReleaseSnapshot(db_snap
);
3593 ASSERT_TRUE(CompareIterators(step
, &model
, db_
, nullptr, nullptr));
3595 model_snap
= model
.GetSnapshot();
3596 db_snap
= db_
->GetSnapshot();
3599 if (model_snap
!= nullptr) model
.ReleaseSnapshot(model_snap
);
3600 if (db_snap
!= nullptr) db_
->ReleaseSnapshot(db_snap
);
3602 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
3604 TEST_F(DBTest
, BlockBasedTablePrefixIndexTest
) {
3605 // create a DB with block prefix index
3606 BlockBasedTableOptions table_options
;
3607 Options options
= CurrentOptions();
3608 table_options
.index_type
= BlockBasedTableOptions::kHashSearch
;
3609 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3610 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
3613 ASSERT_OK(Put("k1", "v1"));
3615 ASSERT_OK(Put("k2", "v2"));
3617 // Reopen with different prefix extractor, make sure everything still works.
3618 // RocksDB should just fall back to the binary index.
3619 options
.prefix_extractor
.reset(NewFixedPrefixTransform(2));
3622 ASSERT_EQ("v1", Get("k1"));
3623 ASSERT_EQ("v2", Get("k2"));
3625 #ifndef ROCKSDB_LITE
3627 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
3628 ASSERT_EQ("v1", Get("k1"));
3629 ASSERT_EQ("v2", Get("k2"));
3630 #endif // !ROCKSDB_LITE
3632 // Same if there's a problem initally loading prefix transform
3633 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
3634 SyncPoint::GetInstance()->SetCallBack(
3635 "BlockBasedTable::Open::ForceNullTablePrefixExtractor",
3636 [&](void* arg
) { *static_cast<bool*>(arg
) = true; });
3637 SyncPoint::GetInstance()->EnableProcessing();
3639 ASSERT_EQ("v1", Get("k1"));
3640 ASSERT_EQ("v2", Get("k2"));
3642 #ifndef ROCKSDB_LITE
3644 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}}));
3645 ASSERT_EQ("v1", Get("k1"));
3646 ASSERT_EQ("v2", Get("k2"));
3647 #endif // !ROCKSDB_LITE
3648 SyncPoint::GetInstance()->DisableProcessing();
3650 // Reopen with no prefix extractor, make sure everything still works.
3651 // RocksDB should just fall back to the binary index.
3652 table_options
.index_type
= BlockBasedTableOptions::kBinarySearch
;
3653 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3654 options
.prefix_extractor
.reset();
3657 ASSERT_EQ("v1", Get("k1"));
3658 ASSERT_EQ("v2", Get("k2"));
3661 TEST_F(DBTest
, BlockBasedTablePrefixHashIndexTest
) {
3662 // create a DB with block prefix index
3663 BlockBasedTableOptions table_options
;
3664 Options options
= CurrentOptions();
3665 table_options
.index_type
= BlockBasedTableOptions::kHashSearch
;
3666 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3667 options
.prefix_extractor
.reset(NewCappedPrefixTransform(2));
3670 ASSERT_OK(Put("kk1", "v1"));
3671 ASSERT_OK(Put("kk2", "v2"));
3672 ASSERT_OK(Put("kk", "v3"));
3673 ASSERT_OK(Put("k", "v4"));
3676 ASSERT_EQ("v1", Get("kk1"));
3677 ASSERT_EQ("v2", Get("kk2"));
3679 ASSERT_EQ("v3", Get("kk"));
3680 ASSERT_EQ("v4", Get("k"));
3683 TEST_F(DBTest
, BlockBasedTablePrefixIndexTotalOrderSeek
) {
3684 // create a DB with block prefix index
3685 BlockBasedTableOptions table_options
;
3686 Options options
= CurrentOptions();
3687 options
.max_open_files
= 10;
3688 table_options
.index_type
= BlockBasedTableOptions::kHashSearch
;
3689 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3690 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
3692 // RocksDB sanitize max open files to at least 20. Modify it back.
3693 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
3694 "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg
) {
3695 int* max_open_files
= static_cast<int*>(arg
);
3696 *max_open_files
= 11;
3698 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3701 ASSERT_OK(Put("k1", "v1"));
3704 CompactRangeOptions cro
;
3705 cro
.change_level
= true;
3706 cro
.target_level
= 1;
3707 ASSERT_OK(db_
->CompactRange(cro
, nullptr, nullptr));
3709 // Force evict tables
3710 dbfull()->TEST_table_cache()->SetCapacity(0);
3711 // Make table cache to keep one entry.
3712 dbfull()->TEST_table_cache()->SetCapacity(1);
3714 ReadOptions read_options
;
3715 read_options
.total_order_seek
= true;
3717 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
3719 ASSERT_TRUE(iter
->Valid());
3720 ASSERT_EQ("k1", iter
->key().ToString());
3723 // After total order seek, prefix index should still be used.
3724 read_options
.total_order_seek
= false;
3726 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
3728 ASSERT_TRUE(iter
->Valid());
3729 ASSERT_EQ("k1", iter
->key().ToString());
3731 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3734 TEST_F(DBTest
, ChecksumTest
) {
3735 BlockBasedTableOptions table_options
;
3736 Options options
= CurrentOptions();
3738 table_options
.checksum
= kCRC32c
;
3739 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3741 ASSERT_OK(Put("a", "b"));
3742 ASSERT_OK(Put("c", "d"));
3743 ASSERT_OK(Flush()); // table with crc checksum
3745 table_options
.checksum
= kxxHash
;
3746 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3748 ASSERT_OK(Put("e", "f"));
3749 ASSERT_OK(Put("g", "h"));
3750 ASSERT_OK(Flush()); // table with xxhash checksum
3752 table_options
.checksum
= kCRC32c
;
3753 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3755 ASSERT_EQ("b", Get("a"));
3756 ASSERT_EQ("d", Get("c"));
3757 ASSERT_EQ("f", Get("e"));
3758 ASSERT_EQ("h", Get("g"));
3760 table_options
.checksum
= kCRC32c
;
3761 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
3763 ASSERT_EQ("b", Get("a"));
3764 ASSERT_EQ("d", Get("c"));
3765 ASSERT_EQ("f", Get("e"));
3766 ASSERT_EQ("h", Get("g"));
3769 #ifndef ROCKSDB_LITE
3770 TEST_P(DBTestWithParam
, FIFOCompactionTest
) {
3771 for (int iter
= 0; iter
< 2; ++iter
) {
3772 // first iteration -- auto compaction
3773 // second iteration -- manual compaction
3775 options
.compaction_style
= kCompactionStyleFIFO
;
3776 options
.write_buffer_size
= 100 << 10; // 100KB
3777 options
.arena_block_size
= 4096;
3778 options
.compaction_options_fifo
.max_table_files_size
= 500 << 10; // 500KB
3779 options
.compression
= kNoCompression
;
3780 options
.create_if_missing
= true;
3781 options
.max_subcompactions
= max_subcompactions_
;
3783 options
.disable_auto_compactions
= true;
3785 options
= CurrentOptions(options
);
3786 DestroyAndReopen(options
);
3789 for (int i
= 0; i
< 6; ++i
) {
3790 for (int j
= 0; j
< 110; ++j
) {
3791 ASSERT_OK(Put(std::to_string(i
* 100 + j
), rnd
.RandomString(980)));
3793 // flush should happen here
3794 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
3797 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3799 CompactRangeOptions cro
;
3800 cro
.exclusive_manual_compaction
= exclusive_manual_compaction_
;
3801 ASSERT_OK(db_
->CompactRange(cro
, nullptr, nullptr));
3803 // only 5 files should survive
3804 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3805 for (int i
= 0; i
< 50; ++i
) {
3806 // these keys should be deleted in previous compaction
3807 ASSERT_EQ("NOT_FOUND", Get(std::to_string(i
)));
3812 TEST_F(DBTest
, FIFOCompactionTestWithCompaction
) {
3814 options
.compaction_style
= kCompactionStyleFIFO
;
3815 options
.write_buffer_size
= 20 << 10; // 20K
3816 options
.arena_block_size
= 4096;
3817 options
.compaction_options_fifo
.max_table_files_size
= 1500 << 10; // 1MB
3818 options
.compaction_options_fifo
.allow_compaction
= true;
3819 options
.level0_file_num_compaction_trigger
= 6;
3820 options
.compression
= kNoCompression
;
3821 options
.create_if_missing
= true;
3822 options
= CurrentOptions(options
);
3823 DestroyAndReopen(options
);
3826 for (int i
= 0; i
< 60; i
++) {
3827 // Generate and flush a file about 20KB.
3828 for (int j
= 0; j
< 20; j
++) {
3829 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
3832 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3834 // It should be compacted to 10 files.
3835 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3837 for (int i
= 0; i
< 60; i
++) {
3838 // Generate and flush a file about 20KB.
3839 for (int j
= 0; j
< 20; j
++) {
3840 ASSERT_OK(Put(std::to_string(i
* 20 + j
+ 2000), rnd
.RandomString(980)));
3843 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3846 // It should be compacted to no more than 20 files.
3847 ASSERT_GT(NumTableFilesAtLevel(0), 10);
3848 ASSERT_LT(NumTableFilesAtLevel(0), 18);
3849 // Size limit is still guaranteed.
3850 ASSERT_LE(SizeAtLevel(0),
3851 options
.compaction_options_fifo
.max_table_files_size
);
3854 TEST_F(DBTest
, FIFOCompactionStyleWithCompactionAndDelete
) {
3856 options
.compaction_style
= kCompactionStyleFIFO
;
3857 options
.write_buffer_size
= 20 << 10; // 20K
3858 options
.arena_block_size
= 4096;
3859 options
.compaction_options_fifo
.max_table_files_size
= 1500 << 10; // 1MB
3860 options
.compaction_options_fifo
.allow_compaction
= true;
3861 options
.level0_file_num_compaction_trigger
= 3;
3862 options
.compression
= kNoCompression
;
3863 options
.create_if_missing
= true;
3864 options
= CurrentOptions(options
);
3865 DestroyAndReopen(options
);
3868 for (int i
= 0; i
< 3; i
++) {
3869 // Each file contains a different key which will be dropped later.
3870 ASSERT_OK(Put("a" + std::to_string(i
), rnd
.RandomString(500)));
3871 ASSERT_OK(Put("key" + std::to_string(i
), ""));
3872 ASSERT_OK(Put("z" + std::to_string(i
), rnd
.RandomString(500)));
3874 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3876 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
3877 for (int i
= 0; i
< 3; i
++) {
3878 ASSERT_EQ("", Get("key" + std::to_string(i
)));
3880 for (int i
= 0; i
< 3; i
++) {
3881 // Each file contains a different key which will be dropped later.
3882 ASSERT_OK(Put("a" + std::to_string(i
), rnd
.RandomString(500)));
3883 ASSERT_OK(Delete("key" + std::to_string(i
)));
3884 ASSERT_OK(Put("z" + std::to_string(i
), rnd
.RandomString(500)));
3886 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3888 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
3889 for (int i
= 0; i
< 3; i
++) {
3890 ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i
)));
3894 // Check that FIFO-with-TTL is not supported with max_open_files != -1.
3895 // Github issue #8014
3896 TEST_F(DBTest
, FIFOCompactionWithTTLAndMaxOpenFilesTest
) {
3897 Options options
= CurrentOptions();
3898 options
.compaction_style
= kCompactionStyleFIFO
;
3899 options
.create_if_missing
= true;
3900 options
.ttl
= 600; // seconds
3902 // TTL is not supported with max_open_files != -1.
3903 options
.max_open_files
= 0;
3904 ASSERT_TRUE(TryReopen(options
).IsNotSupported());
3906 options
.max_open_files
= 100;
3907 ASSERT_TRUE(TryReopen(options
).IsNotSupported());
3909 // TTL is supported with unlimited max_open_files
3910 options
.max_open_files
= -1;
3911 ASSERT_OK(TryReopen(options
));
3914 // Check that FIFO-with-TTL is supported only with BlockBasedTableFactory.
3915 TEST_F(DBTest
, FIFOCompactionWithTTLAndVariousTableFormatsTest
) {
3917 options
.compaction_style
= kCompactionStyleFIFO
;
3918 options
.create_if_missing
= true;
3919 options
.ttl
= 600; // seconds
3921 options
= CurrentOptions(options
);
3922 options
.table_factory
.reset(NewBlockBasedTableFactory());
3923 ASSERT_OK(TryReopen(options
));
3926 options
.table_factory
.reset(NewPlainTableFactory());
3927 ASSERT_TRUE(TryReopen(options
).IsNotSupported());
3930 options
.table_factory
.reset(NewAdaptiveTableFactory());
3931 ASSERT_TRUE(TryReopen(options
).IsNotSupported());
3934 TEST_F(DBTest
, FIFOCompactionWithTTLTest
) {
3936 options
.compaction_style
= kCompactionStyleFIFO
;
3937 options
.write_buffer_size
= 10 << 10; // 10KB
3938 options
.arena_block_size
= 4096;
3939 options
.compression
= kNoCompression
;
3940 options
.create_if_missing
= true;
3941 env_
->SetMockSleep();
3944 // Test to make sure that all files with expired ttl are deleted on next
3945 // manual compaction.
3947 // NOTE: Presumed unnecessary and removed: resetting mock time in env
3949 options
.compaction_options_fifo
.max_table_files_size
= 150 << 10; // 150KB
3950 options
.compaction_options_fifo
.allow_compaction
= false;
3951 options
.ttl
= 1 * 60 * 60; // 1 hour
3952 options
= CurrentOptions(options
);
3953 DestroyAndReopen(options
);
3956 for (int i
= 0; i
< 10; i
++) {
3957 // Generate and flush a file about 10KB.
3958 for (int j
= 0; j
< 10; j
++) {
3959 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
3962 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3964 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3966 // Sleep for 2 hours -- which is much greater than TTL.
3967 env_
->MockSleepForSeconds(2 * 60 * 60);
3969 // Since no flushes and compactions have run, the db should still be in
3970 // the same state even after considerable time has passed.
3971 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3972 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3974 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
3975 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
3978 // Test to make sure that all files with expired ttl are deleted on next
3979 // automatic compaction.
3981 options
.compaction_options_fifo
.max_table_files_size
= 150 << 10; // 150KB
3982 options
.compaction_options_fifo
.allow_compaction
= false;
3983 options
.ttl
= 1 * 60 * 60; // 1 hour
3984 options
= CurrentOptions(options
);
3985 DestroyAndReopen(options
);
3988 for (int i
= 0; i
< 10; i
++) {
3989 // Generate and flush a file about 10KB.
3990 for (int j
= 0; j
< 10; j
++) {
3991 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
3994 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3996 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3998 // Sleep for 2 hours -- which is much greater than TTL.
3999 env_
->MockSleepForSeconds(2 * 60 * 60);
4000 // Just to make sure that we are in the same state even after sleeping.
4001 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4002 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
4004 // Create 1 more file to trigger TTL compaction. The old files are dropped.
4005 for (int i
= 0; i
< 1; i
++) {
4006 for (int j
= 0; j
< 10; j
++) {
4007 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4012 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4013 // Only the new 10 files remain.
4014 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4015 ASSERT_LE(SizeAtLevel(0),
4016 options
.compaction_options_fifo
.max_table_files_size
);
4019 // Test that shows the fall back to size-based FIFO compaction if TTL-based
4020 // deletion doesn't move the total size to be less than max_table_files_size.
4022 options
.write_buffer_size
= 10 << 10; // 10KB
4023 options
.compaction_options_fifo
.max_table_files_size
= 150 << 10; // 150KB
4024 options
.compaction_options_fifo
.allow_compaction
= false;
4025 options
.ttl
= 1 * 60 * 60; // 1 hour
4026 options
= CurrentOptions(options
);
4027 DestroyAndReopen(options
);
4030 for (int i
= 0; i
< 3; i
++) {
4031 // Generate and flush a file about 10KB.
4032 for (int j
= 0; j
< 10; j
++) {
4033 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4036 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4038 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
4040 // Sleep for 2 hours -- which is much greater than TTL.
4041 env_
->MockSleepForSeconds(2 * 60 * 60);
4042 // Just to make sure that we are in the same state even after sleeping.
4043 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4044 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
4046 for (int i
= 0; i
< 5; i
++) {
4047 for (int j
= 0; j
< 140; j
++) {
4048 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4051 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4053 // Size limit is still guaranteed.
4054 ASSERT_LE(SizeAtLevel(0),
4055 options
.compaction_options_fifo
.max_table_files_size
);
4058 // Test with TTL + Intra-L0 compactions.
4060 options
.compaction_options_fifo
.max_table_files_size
= 150 << 10; // 150KB
4061 options
.compaction_options_fifo
.allow_compaction
= true;
4062 options
.ttl
= 1 * 60 * 60; // 1 hour
4063 options
.level0_file_num_compaction_trigger
= 6;
4064 options
= CurrentOptions(options
);
4065 DestroyAndReopen(options
);
4068 for (int i
= 0; i
< 10; i
++) {
4069 // Generate and flush a file about 10KB.
4070 for (int j
= 0; j
< 10; j
++) {
4071 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4074 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4076 // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1
4077 // (due to level0_file_num_compaction_trigger = 6).
4078 // So total files = 1 + remaining 4 = 5.
4079 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4081 // Sleep for 2 hours -- which is much greater than TTL.
4082 env_
->MockSleepForSeconds(2 * 60 * 60);
4083 // Just to make sure that we are in the same state even after sleeping.
4084 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4085 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4087 // Create 10 more files. The old 5 files are dropped as their ttl expired.
4088 for (int i
= 0; i
< 10; i
++) {
4089 for (int j
= 0; j
< 10; j
++) {
4090 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4093 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4095 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4096 ASSERT_LE(SizeAtLevel(0),
4097 options
.compaction_options_fifo
.max_table_files_size
);
4100 // Test with large TTL + Intra-L0 compactions.
4101 // Files dropped based on size, as ttl doesn't kick in.
4103 options
.write_buffer_size
= 20 << 10; // 20K
4104 options
.compaction_options_fifo
.max_table_files_size
= 1500 << 10; // 1.5MB
4105 options
.compaction_options_fifo
.allow_compaction
= true;
4106 options
.ttl
= 1 * 60 * 60; // 1 hour
4107 options
.level0_file_num_compaction_trigger
= 6;
4108 options
= CurrentOptions(options
);
4109 DestroyAndReopen(options
);
4112 for (int i
= 0; i
< 60; i
++) {
4113 // Generate and flush a file about 20KB.
4114 for (int j
= 0; j
< 20; j
++) {
4115 ASSERT_OK(Put(std::to_string(i
* 20 + j
), rnd
.RandomString(980)));
4118 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4120 // It should be compacted to 10 files.
4121 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
4123 for (int i
= 0; i
< 60; i
++) {
4124 // Generate and flush a file about 20KB.
4125 for (int j
= 0; j
< 20; j
++) {
4127 Put(std::to_string(i
* 20 + j
+ 2000), rnd
.RandomString(980)));
4130 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4133 // It should be compacted to no more than 20 files.
4134 ASSERT_GT(NumTableFilesAtLevel(0), 10);
4135 ASSERT_LT(NumTableFilesAtLevel(0), 18);
4136 // Size limit is still guaranteed.
4137 ASSERT_LE(SizeAtLevel(0),
4138 options
.compaction_options_fifo
.max_table_files_size
);
4141 #endif // ROCKSDB_LITE
4143 #ifndef ROCKSDB_LITE
4145 * This test is not reliable enough as it heavily depends on disk behavior.
4146 * Disable as it is flaky.
4148 TEST_F(DBTest
, DISABLED_RateLimitingTest
) {
4149 Options options
= CurrentOptions();
4150 options
.write_buffer_size
= 1 << 20; // 1MB
4151 options
.level0_file_num_compaction_trigger
= 2;
4152 options
.target_file_size_base
= 1 << 20; // 1MB
4153 options
.max_bytes_for_level_base
= 4 << 20; // 4MB
4154 options
.max_bytes_for_level_multiplier
= 4;
4155 options
.compression
= kNoCompression
;
4156 options
.create_if_missing
= true;
4158 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
4159 options
.IncreaseParallelism(4);
4160 DestroyAndReopen(options
);
4163 wo
.disableWAL
= true;
4165 // # no rate limiting
4167 uint64_t start
= env_
->NowMicros();
4169 for (int64_t i
= 0; i
< (96 << 10); ++i
) {
4170 ASSERT_OK(Put(rnd
.RandomString(32), rnd
.RandomString((1 << 10) + 1), wo
));
4172 uint64_t elapsed
= env_
->NowMicros() - start
;
4173 double raw_rate
= env_
->bytes_written_
* 1000000.0 / elapsed
;
4174 uint64_t rate_limiter_drains
=
4175 TestGetTickerCount(options
, NUMBER_RATE_LIMITER_DRAINS
);
4176 ASSERT_EQ(0, rate_limiter_drains
);
4179 // # rate limiting with 0.7 x threshold
4180 options
.rate_limiter
.reset(
4181 NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate
)));
4182 env_
->bytes_written_
= 0;
4183 DestroyAndReopen(options
);
4185 start
= env_
->NowMicros();
4187 for (int64_t i
= 0; i
< (96 << 10); ++i
) {
4188 ASSERT_OK(Put(rnd
.RandomString(32), rnd
.RandomString((1 << 10) + 1), wo
));
4190 rate_limiter_drains
=
4191 TestGetTickerCount(options
, NUMBER_RATE_LIMITER_DRAINS
) -
4192 rate_limiter_drains
;
4193 elapsed
= env_
->NowMicros() - start
;
4195 ASSERT_EQ(options
.rate_limiter
->GetTotalBytesThrough(), env_
->bytes_written_
);
4196 // Most intervals should've been drained (interval time is 100ms, elapsed is
4198 ASSERT_GT(rate_limiter_drains
, 0);
4199 ASSERT_LE(rate_limiter_drains
, elapsed
/ 100000 + 1);
4200 double ratio
= env_
->bytes_written_
* 1000000 / elapsed
/ raw_rate
;
4201 fprintf(stderr
, "write rate ratio = %.2lf, expected 0.7\n", ratio
);
4202 ASSERT_TRUE(ratio
< 0.8);
4204 // # rate limiting with half of the raw_rate
4205 options
.rate_limiter
.reset(
4206 NewGenericRateLimiter(static_cast<int64_t>(raw_rate
/ 2)));
4207 env_
->bytes_written_
= 0;
4208 DestroyAndReopen(options
);
4210 start
= env_
->NowMicros();
4212 for (int64_t i
= 0; i
< (96 << 10); ++i
) {
4213 ASSERT_OK(Put(rnd
.RandomString(32), rnd
.RandomString((1 << 10) + 1), wo
));
4215 elapsed
= env_
->NowMicros() - start
;
4216 rate_limiter_drains
=
4217 TestGetTickerCount(options
, NUMBER_RATE_LIMITER_DRAINS
) -
4218 rate_limiter_drains
;
4220 ASSERT_EQ(options
.rate_limiter
->GetTotalBytesThrough(), env_
->bytes_written_
);
4221 // Most intervals should've been drained (interval time is 100ms, elapsed is
4223 ASSERT_GT(rate_limiter_drains
, elapsed
/ 100000 / 2);
4224 ASSERT_LE(rate_limiter_drains
, elapsed
/ 100000 + 1);
4225 ratio
= env_
->bytes_written_
* 1000000 / elapsed
/ raw_rate
;
4226 fprintf(stderr
, "write rate ratio = %.2lf, expected 0.5\n", ratio
);
4227 ASSERT_LT(ratio
, 0.6);
4230 // This is a mocked customed rate limiter without implementing optional APIs
4231 // (e.g, RateLimiter::GetTotalPendingRequests())
4232 class MockedRateLimiterWithNoOptionalAPIImpl
: public RateLimiter
{
4234 MockedRateLimiterWithNoOptionalAPIImpl() {}
4236 ~MockedRateLimiterWithNoOptionalAPIImpl() override
{}
4238 void SetBytesPerSecond(int64_t bytes_per_second
) override
{
4239 (void)bytes_per_second
;
4242 using RateLimiter::Request
;
4243 void Request(const int64_t bytes
, const Env::IOPriority pri
,
4244 Statistics
* stats
) override
{
4250 int64_t GetSingleBurstBytes() const override
{ return 200; }
4252 int64_t GetTotalBytesThrough(
4253 const Env::IOPriority pri
= Env::IO_TOTAL
) const override
{
4258 int64_t GetTotalRequests(
4259 const Env::IOPriority pri
= Env::IO_TOTAL
) const override
{
4264 int64_t GetBytesPerSecond() const override
{ return 0; }
4267 // To test that customed rate limiter not implementing optional APIs (e.g,
4268 // RateLimiter::GetTotalPendingRequests()) works fine with RocksDB basic
4269 // operations (e.g, Put, Get, Flush)
4270 TEST_F(DBTest
, CustomedRateLimiterWithNoOptionalAPIImplTest
) {
4271 Options options
= CurrentOptions();
4272 options
.rate_limiter
.reset(new MockedRateLimiterWithNoOptionalAPIImpl());
4273 DestroyAndReopen(options
);
4274 ASSERT_OK(Put("abc", "def"));
4275 ASSERT_EQ(Get("abc"), "def");
4277 ASSERT_EQ(Get("abc"), "def");
4280 TEST_F(DBTest
, TableOptionsSanitizeTest
) {
4281 Options options
= CurrentOptions();
4282 options
.create_if_missing
= true;
4283 DestroyAndReopen(options
);
4284 ASSERT_EQ(db_
->GetOptions().allow_mmap_reads
, false);
4286 options
.table_factory
.reset(NewPlainTableFactory());
4287 options
.prefix_extractor
.reset(NewNoopTransform());
4289 ASSERT_TRUE(!TryReopen(options
).IsNotSupported());
4291 // Test for check of prefix_extractor when hash index is used for
4292 // block-based table
4293 BlockBasedTableOptions to
;
4294 to
.index_type
= BlockBasedTableOptions::kHashSearch
;
4295 options
= CurrentOptions();
4296 options
.create_if_missing
= true;
4297 options
.table_factory
.reset(NewBlockBasedTableFactory(to
));
4298 ASSERT_TRUE(TryReopen(options
).IsInvalidArgument());
4299 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
4300 ASSERT_OK(TryReopen(options
));
4303 TEST_F(DBTest
, ConcurrentMemtableNotSupported
) {
4304 Options options
= CurrentOptions();
4305 options
.allow_concurrent_memtable_write
= true;
4306 options
.soft_pending_compaction_bytes_limit
= 0;
4307 options
.hard_pending_compaction_bytes_limit
= 100;
4308 options
.create_if_missing
= true;
4310 DestroyDB(dbname_
, options
);
4311 options
.memtable_factory
.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
4312 ASSERT_NOK(TryReopen(options
));
4314 options
.memtable_factory
.reset(new SkipListFactory
);
4315 ASSERT_OK(TryReopen(options
));
4317 ColumnFamilyOptions
cf_options(options
);
4318 cf_options
.memtable_factory
.reset(
4319 NewHashLinkListRepFactory(4, 0, 3, true, 4));
4320 ColumnFamilyHandle
* handle
;
4321 ASSERT_NOK(db_
->CreateColumnFamily(cf_options
, "name", &handle
));
4324 #endif // ROCKSDB_LITE
4326 TEST_F(DBTest
, SanitizeNumThreads
) {
4327 for (int attempt
= 0; attempt
< 2; attempt
++) {
4328 const size_t kTotalTasks
= 8;
4329 test::SleepingBackgroundTask sleeping_tasks
[kTotalTasks
];
4331 Options options
= CurrentOptions();
4333 options
.max_background_compactions
= 3;
4334 options
.max_background_flushes
= 2;
4336 options
.create_if_missing
= true;
4337 DestroyAndReopen(options
);
4339 for (size_t i
= 0; i
< kTotalTasks
; i
++) {
4340 // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
4341 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
,
4343 (i
< 4) ? Env::Priority::LOW
: Env::Priority::HIGH
);
4346 // Wait until 10s for they are scheduled.
4347 for (int i
= 0; i
< 10000; i
++) {
4348 if (options
.env
->GetThreadPoolQueueLen(Env::Priority::LOW
) <= 1 &&
4349 options
.env
->GetThreadPoolQueueLen(Env::Priority::HIGH
) <= 2) {
4352 env_
->SleepForMicroseconds(1000);
4355 // pool size 3, total task 4. Queue size should be 1.
4356 ASSERT_EQ(1U, options
.env
->GetThreadPoolQueueLen(Env::Priority::LOW
));
4357 // pool size 2, total task 4. Queue size should be 2.
4358 ASSERT_EQ(2U, options
.env
->GetThreadPoolQueueLen(Env::Priority::HIGH
));
4360 for (size_t i
= 0; i
< kTotalTasks
; i
++) {
4361 sleeping_tasks
[i
].WakeUp();
4362 sleeping_tasks
[i
].WaitUntilDone();
4365 ASSERT_OK(Put("abc", "def"));
4366 ASSERT_EQ("def", Get("abc"));
4368 ASSERT_EQ("def", Get("abc"));
4372 TEST_F(DBTest
, WriteSingleThreadEntry
) {
4373 std::vector
<port::Thread
> threads
;
4374 dbfull()->TEST_LockMutex();
4375 auto w
= dbfull()->TEST_BeginWrite();
4376 threads
.emplace_back([&] { ASSERT_OK(Put("a", "b")); });
4377 env_
->SleepForMicroseconds(10000);
4378 threads
.emplace_back([&] { ASSERT_OK(Flush()); });
4379 env_
->SleepForMicroseconds(10000);
4380 dbfull()->TEST_UnlockMutex();
4381 dbfull()->TEST_LockMutex();
4382 dbfull()->TEST_EndWrite(w
);
4383 dbfull()->TEST_UnlockMutex();
4385 for (auto& t
: threads
) {
4390 TEST_F(DBTest
, ConcurrentFlushWAL
) {
4391 const size_t cnt
= 100;
4396 for (bool two_write_queues
: {false, true}) {
4397 for (bool manual_wal_flush
: {false, true}) {
4398 options
.two_write_queues
= two_write_queues
;
4399 options
.manual_wal_flush
= manual_wal_flush
;
4400 options
.create_if_missing
= true;
4401 DestroyAndReopen(options
);
4402 std::vector
<port::Thread
> threads
;
4403 threads
.emplace_back([&] {
4404 for (size_t i
= 0; i
< cnt
; i
++) {
4405 auto istr
= std::to_string(i
);
4406 ASSERT_OK(db_
->Put(wopt
, db_
->DefaultColumnFamily(), "a" + istr
,
4410 if (two_write_queues
) {
4411 threads
.emplace_back([&] {
4412 for (size_t i
= cnt
; i
< 2 * cnt
; i
++) {
4413 auto istr
= std::to_string(i
);
4414 WriteBatch
batch(0 /* reserved_bytes */, 0 /* max_bytes */,
4415 wopt
.protection_bytes_per_key
,
4416 0 /* default_cf_ts_sz */);
4417 ASSERT_OK(batch
.Put("a" + istr
, "b" + istr
));
4419 dbfull()->WriteImpl(wopt
, &batch
, nullptr, nullptr, 0, true));
4423 threads
.emplace_back([&] {
4424 for (size_t i
= 0; i
< cnt
* 100; i
++) { // FlushWAL is faster than Put
4425 ASSERT_OK(db_
->FlushWAL(false));
4428 for (auto& t
: threads
) {
4431 options
.create_if_missing
= false;
4432 // Recover from the wal and make sure that it is not corrupted
4434 for (size_t i
= 0; i
< cnt
; i
++) {
4436 auto istr
= std::to_string(i
);
4438 db_
->Get(ropt
, db_
->DefaultColumnFamily(), "a" + istr
, &pval
));
4439 ASSERT_TRUE(pval
== ("b" + istr
));
4445 // This test failure will be caught with a probability
4446 TEST_F(DBTest
, ManualFlushWalAndWriteRace
) {
4449 options
.manual_wal_flush
= true;
4450 options
.create_if_missing
= true;
4452 DestroyAndReopen(options
);
4457 port::Thread
writeThread([&]() {
4458 for (int i
= 0; i
< 100; i
++) {
4459 auto istr
= std::to_string(i
);
4460 ASSERT_OK(dbfull()->Put(wopts
, "key_" + istr
, "value_" + istr
));
4463 port::Thread
flushThread([&]() {
4464 for (int i
= 0; i
< 100; i
++) {
4465 ASSERT_OK(dbfull()->FlushWAL(false));
4471 ASSERT_OK(dbfull()->Put(wopts
, "foo1", "value1"));
4472 ASSERT_OK(dbfull()->Put(wopts
, "foo2", "value2"));
4474 ASSERT_EQ("value1", Get("foo1"));
4475 ASSERT_EQ("value2", Get("foo2"));
4478 #ifndef ROCKSDB_LITE
4479 TEST_F(DBTest
, DynamicMemtableOptions
) {
4480 const uint64_t k64KB
= 1 << 16;
4481 const uint64_t k128KB
= 1 << 17;
4482 const uint64_t k5KB
= 5 * 1024;
4485 options
.create_if_missing
= true;
4486 options
.compression
= kNoCompression
;
4487 options
.max_background_compactions
= 1;
4488 options
.write_buffer_size
= k64KB
;
4489 options
.arena_block_size
= 16 * 1024;
4490 options
.max_write_buffer_number
= 2;
4491 // Don't trigger compact/slowdown/stop
4492 options
.level0_file_num_compaction_trigger
= 1024;
4493 options
.level0_slowdown_writes_trigger
= 1024;
4494 options
.level0_stop_writes_trigger
= 1024;
4495 DestroyAndReopen(options
);
4497 auto gen_l0_kb
= [this](int size
) {
4498 const int kNumPutsBeforeWaitForFlush
= 64;
4500 for (int i
= 0; i
< size
; i
++) {
4501 ASSERT_OK(Put(Key(i
), rnd
.RandomString(1024)));
4503 // The following condition prevents a race condition between flush jobs
4504 // acquiring work and this thread filling up multiple memtables. Without
4505 // this, the flush might produce less files than expected because
4506 // multiple memtables are flushed into a single L0 file. This race
4507 // condition affects assertion (A).
4508 if (i
% kNumPutsBeforeWaitForFlush
== kNumPutsBeforeWaitForFlush
- 1) {
4509 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
4512 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
4515 // Test write_buffer_size
4517 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4518 ASSERT_LT(SizeAtLevel(0), k64KB
+ k5KB
);
4519 ASSERT_GT(SizeAtLevel(0), k64KB
- k5KB
* 2);
4522 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4523 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4525 // Increase buffer size
4526 ASSERT_OK(dbfull()->SetOptions({
4527 {"write_buffer_size", "131072"},
4530 // The existing memtable inflated 64KB->128KB when we invoked SetOptions().
4531 // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data.
4533 ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A)
4534 ASSERT_LT(SizeAtLevel(0), k128KB
+ 2 * k5KB
);
4535 ASSERT_GT(SizeAtLevel(0), k128KB
- 4 * k5KB
);
4537 // Decrease buffer size below current usage
4538 ASSERT_OK(dbfull()->SetOptions({
4539 {"write_buffer_size", "65536"},
4541 // The existing memtable became eligible for flush when we reduced its
4542 // capacity to 64KB. Two keys need to be added to trigger flush: first causes
4543 // memtable to be marked full, second schedules the flush. Then we should have
4544 // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key.
4546 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
4547 ASSERT_LT(SizeAtLevel(0), k128KB
+ k64KB
+ 2 * k5KB
);
4548 ASSERT_GT(SizeAtLevel(0), k128KB
+ k64KB
- 4 * k5KB
);
4550 // Test max_write_buffer_number
4551 // Block compaction thread, which will also block the flushes because
4552 // max_background_flushes == 0, so flushes are getting executed by the
4553 // compaction thread
4554 env_
->SetBackgroundThreads(1, Env::LOW
);
4555 test::SleepingBackgroundTask sleeping_task_low
;
4556 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
4557 Env::Priority::LOW
);
4558 // Start from scratch and disable compaction/flush. Flush can only happen
4559 // during compaction but trigger is pretty high
4560 options
.disable_auto_compactions
= true;
4561 DestroyAndReopen(options
);
4562 env_
->SetBackgroundThreads(0, Env::HIGH
);
4564 // Put until writes are stopped, bounded by 256 puts. We should see stop at
4569 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
4570 "DBImpl::DelayWrite:Wait",
4571 [&](void* /*arg*/) { sleeping_task_low
.WakeUp(); });
4572 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4574 while (!sleeping_task_low
.WokenUp() && count
< 256) {
4575 ASSERT_OK(Put(Key(count
), rnd
.RandomString(1024), WriteOptions()));
4578 ASSERT_GT(static_cast<double>(count
), 128 * 0.8);
4579 ASSERT_LT(static_cast<double>(count
), 128 * 1.2);
4581 sleeping_task_low
.WaitUntilDone();
4584 ASSERT_OK(dbfull()->SetOptions({
4585 {"max_write_buffer_number", "8"},
4587 // Clean up memtable and L0
4588 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4590 sleeping_task_low
.Reset();
4591 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
4592 Env::Priority::LOW
);
4594 while (!sleeping_task_low
.WokenUp() && count
< 1024) {
4595 ASSERT_OK(Put(Key(count
), rnd
.RandomString(1024), WriteOptions()));
4598 // Windows fails this test. Will tune in the future and figure out
4601 ASSERT_GT(static_cast<double>(count
), 512 * 0.8);
4602 ASSERT_LT(static_cast<double>(count
), 512 * 1.2);
4604 sleeping_task_low
.WaitUntilDone();
4607 ASSERT_OK(dbfull()->SetOptions({
4608 {"max_write_buffer_number", "4"},
4610 // Clean up memtable and L0
4611 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4613 sleeping_task_low
.Reset();
4614 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
4615 Env::Priority::LOW
);
4618 while (!sleeping_task_low
.WokenUp() && count
< 1024) {
4619 ASSERT_OK(Put(Key(count
), rnd
.RandomString(1024), WriteOptions()));
4622 // Windows fails this test. Will tune in the future and figure out
4625 ASSERT_GT(static_cast<double>(count
), 256 * 0.8);
4626 ASSERT_LT(static_cast<double>(count
), 266 * 1.2);
4628 sleeping_task_low
.WaitUntilDone();
4630 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4632 #endif // ROCKSDB_LITE
4634 #ifdef ROCKSDB_USING_THREAD_STATUS
4636 void VerifyOperationCount(Env
* env
, ThreadStatus::OperationType op_type
,
4637 int expected_count
) {
4639 std::vector
<ThreadStatus
> thread_list
;
4640 ASSERT_OK(env
->GetThreadList(&thread_list
));
4641 for (auto thread
: thread_list
) {
4642 if (thread
.operation_type
== op_type
) {
4646 ASSERT_EQ(op_count
, expected_count
);
4648 } // anonymous namespace
4650 TEST_F(DBTest
, GetThreadStatus
) {
4653 options
.enable_thread_tracking
= true;
4656 std::vector
<ThreadStatus
> thread_list
;
4657 Status s
= env_
->GetThreadList(&thread_list
);
4659 for (int i
= 0; i
< 2; ++i
) {
4660 // repeat the test with differet number of high / low priority threads
4661 const int kTestCount
= 3;
4662 const unsigned int kHighPriCounts
[kTestCount
] = {3, 2, 5};
4663 const unsigned int kLowPriCounts
[kTestCount
] = {10, 15, 3};
4664 const unsigned int kBottomPriCounts
[kTestCount
] = {2, 1, 4};
4665 for (int test
= 0; test
< kTestCount
; ++test
) {
4666 // Change the number of threads in high / low priority pool.
4667 env_
->SetBackgroundThreads(kHighPriCounts
[test
], Env::HIGH
);
4668 env_
->SetBackgroundThreads(kLowPriCounts
[test
], Env::LOW
);
4669 env_
->SetBackgroundThreads(kBottomPriCounts
[test
], Env::BOTTOM
);
4670 // Wait to ensure the all threads has been registered
4671 unsigned int thread_type_counts
[ThreadStatus::NUM_THREAD_TYPES
];
4672 // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after
4673 // all threads have been registered.
4674 // Try up to 60 seconds.
4675 for (int num_try
= 0; num_try
< 60000; num_try
++) {
4676 env_
->SleepForMicroseconds(1000);
4677 thread_list
.clear();
4678 s
= env_
->GetThreadList(&thread_list
);
4680 memset(thread_type_counts
, 0, sizeof(thread_type_counts
));
4681 for (auto thread
: thread_list
) {
4682 ASSERT_LT(thread
.thread_type
, ThreadStatus::NUM_THREAD_TYPES
);
4683 thread_type_counts
[thread
.thread_type
]++;
4685 if (thread_type_counts
[ThreadStatus::HIGH_PRIORITY
] ==
4686 kHighPriCounts
[test
] &&
4687 thread_type_counts
[ThreadStatus::LOW_PRIORITY
] ==
4688 kLowPriCounts
[test
] &&
4689 thread_type_counts
[ThreadStatus::BOTTOM_PRIORITY
] ==
4690 kBottomPriCounts
[test
]) {
4694 // Verify the number of high-priority threads
4695 ASSERT_EQ(thread_type_counts
[ThreadStatus::HIGH_PRIORITY
],
4696 kHighPriCounts
[test
]);
4697 // Verify the number of low-priority threads
4698 ASSERT_EQ(thread_type_counts
[ThreadStatus::LOW_PRIORITY
],
4699 kLowPriCounts
[test
]);
4700 // Verify the number of bottom-priority threads
4701 ASSERT_EQ(thread_type_counts
[ThreadStatus::BOTTOM_PRIORITY
],
4702 kBottomPriCounts
[test
]);
4705 // repeat the test with multiple column families
4706 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options
);
4707 env_
->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_
,
4711 ASSERT_OK(db_
->DropColumnFamily(handles_
[2]));
4713 handles_
.erase(handles_
.begin() + 2);
4714 env_
->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_
,
4717 env_
->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_
,
4721 TEST_F(DBTest
, DisableThreadStatus
) {
4724 options
.enable_thread_tracking
= false;
4726 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options
);
4727 // Verify non of the column family info exists
4728 env_
->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_
,
4732 TEST_F(DBTest
, ThreadStatusFlush
) {
4735 options
.write_buffer_size
= 100000; // Small write buffer
4736 options
.enable_thread_tracking
= true;
4737 options
= CurrentOptions(options
);
4739 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
4740 {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
4741 {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"},
4743 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4745 CreateAndReopenWithCF({"pikachu"}, options
);
4746 VerifyOperationCount(env_
, ThreadStatus::OP_FLUSH
, 0);
4748 ASSERT_OK(Put(1, "foo", "v1"));
4749 ASSERT_EQ("v1", Get(1, "foo"));
4750 VerifyOperationCount(env_
, ThreadStatus::OP_FLUSH
, 0);
4752 uint64_t num_running_flushes
= 0;
4753 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kNumRunningFlushes
,
4754 &num_running_flushes
));
4755 ASSERT_EQ(num_running_flushes
, 0);
4757 ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
4758 ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
4760 // The first sync point is to make sure there's one flush job
4761 // running when we perform VerifyOperationCount().
4762 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
4763 VerifyOperationCount(env_
, ThreadStatus::OP_FLUSH
, 1);
4764 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kNumRunningFlushes
,
4765 &num_running_flushes
));
4766 ASSERT_EQ(num_running_flushes
, 1);
4767 // This second sync point is to ensure the flush job will not
4768 // be completed until we already perform VerifyOperationCount().
4769 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
4770 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4773 TEST_P(DBTestWithParam
, ThreadStatusSingleCompaction
) {
4774 const int kTestKeySize
= 16;
4775 const int kTestValueSize
= 984;
4776 const int kEntrySize
= kTestKeySize
+ kTestValueSize
;
4777 const int kEntriesPerBuffer
= 100;
4779 options
.create_if_missing
= true;
4780 options
.write_buffer_size
= kEntrySize
* kEntriesPerBuffer
;
4781 options
.compaction_style
= kCompactionStyleLevel
;
4782 options
.target_file_size_base
= options
.write_buffer_size
;
4783 options
.max_bytes_for_level_base
= options
.target_file_size_base
* 2;
4784 options
.max_bytes_for_level_multiplier
= 2;
4785 options
.compression
= kNoCompression
;
4786 options
= CurrentOptions(options
);
4788 options
.enable_thread_tracking
= true;
4789 const int kNumL0Files
= 4;
4790 options
.level0_file_num_compaction_trigger
= kNumL0Files
;
4791 options
.max_subcompactions
= max_subcompactions_
;
4793 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
4794 {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
4795 {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
4796 {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
4798 for (int tests
= 0; tests
< 2; ++tests
) {
4799 DestroyAndReopen(options
);
4800 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
4801 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4805 for (int file
= 0; file
< kNumL0Files
; ++file
) {
4806 for (int key
= 0; key
< kEntriesPerBuffer
; ++key
) {
4807 ASSERT_OK(Put(std::to_string(key
+ file
* kEntriesPerBuffer
),
4808 rnd
.RandomString(kTestValueSize
)));
4812 // This makes sure a compaction won't be scheduled until
4813 // we have done with the above Put Phase.
4814 uint64_t num_running_compactions
= 0;
4815 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kNumRunningCompactions
,
4816 &num_running_compactions
));
4817 ASSERT_EQ(num_running_compactions
, 0);
4818 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
4819 ASSERT_GE(NumTableFilesAtLevel(0),
4820 options
.level0_file_num_compaction_trigger
);
4822 // This makes sure at least one compaction is running.
4823 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
4825 if (options
.enable_thread_tracking
) {
4826 // expecting one single L0 to L1 compaction
4827 VerifyOperationCount(env_
, ThreadStatus::OP_COMPACTION
, 1);
4829 // If thread tracking is not enabled, compaction count should be 0.
4830 VerifyOperationCount(env_
, ThreadStatus::OP_COMPACTION
, 0);
4832 ASSERT_TRUE(db_
->GetIntProperty(DB::Properties::kNumRunningCompactions
,
4833 &num_running_compactions
));
4834 ASSERT_EQ(num_running_compactions
, 1);
4835 // TODO(yhchiang): adding assert to verify each compaction stage.
4836 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
4838 // repeat the test with disabling thread tracking.
4839 options
.enable_thread_tracking
= false;
4840 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4844 TEST_P(DBTestWithParam
, PreShutdownManualCompaction
) {
4845 Options options
= CurrentOptions();
4846 options
.max_subcompactions
= max_subcompactions_
;
4847 CreateAndReopenWithCF({"pikachu"}, options
);
4849 // iter - 0 with 7 levels
4850 // iter - 1 with 3 levels
4851 for (int iter
= 0; iter
< 2; ++iter
) {
4852 MakeTables(3, "p", "q", 1);
4853 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4855 // Compaction range falls before files
4856 Compact(1, "", "c");
4857 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4859 // Compaction range falls after files
4860 Compact(1, "r", "z");
4861 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4863 // Compaction range overlaps files
4864 Compact(1, "p", "q");
4865 ASSERT_EQ("0,0,1", FilesPerLevel(1));
4867 // Populate a different range
4868 MakeTables(3, "c", "e", 1);
4869 ASSERT_EQ("1,1,2", FilesPerLevel(1));
4871 // Compact just the new range
4872 Compact(1, "b", "f");
4873 ASSERT_EQ("0,0,2", FilesPerLevel(1));
4876 MakeTables(1, "a", "z", 1);
4877 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4878 CancelAllBackgroundWork(db_
);
4880 db_
->CompactRange(CompactRangeOptions(), handles_
[1], nullptr, nullptr)
4881 .IsShutdownInProgress());
4882 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4885 options
= CurrentOptions();
4886 options
.num_levels
= 3;
4887 options
.create_if_missing
= true;
4888 DestroyAndReopen(options
);
4889 CreateAndReopenWithCF({"pikachu"}, options
);
4894 TEST_F(DBTest
, PreShutdownFlush
) {
4895 Options options
= CurrentOptions();
4896 CreateAndReopenWithCF({"pikachu"}, options
);
4897 ASSERT_OK(Put(1, "key", "value"));
4898 CancelAllBackgroundWork(db_
);
4900 db_
->CompactRange(CompactRangeOptions(), handles_
[1], nullptr, nullptr);
4901 ASSERT_TRUE(s
.IsShutdownInProgress());
4904 TEST_P(DBTestWithParam
, PreShutdownMultipleCompaction
) {
4905 const int kTestKeySize
= 16;
4906 const int kTestValueSize
= 984;
4907 const int kEntrySize
= kTestKeySize
+ kTestValueSize
;
4908 const int kEntriesPerBuffer
= 40;
4909 const int kNumL0Files
= 4;
4911 const int kHighPriCount
= 3;
4912 const int kLowPriCount
= 5;
4913 env_
->SetBackgroundThreads(kHighPriCount
, Env::HIGH
);
4914 env_
->SetBackgroundThreads(kLowPriCount
, Env::LOW
);
4917 options
.create_if_missing
= true;
4918 options
.write_buffer_size
= kEntrySize
* kEntriesPerBuffer
;
4919 options
.compaction_style
= kCompactionStyleLevel
;
4920 options
.target_file_size_base
= options
.write_buffer_size
;
4921 options
.max_bytes_for_level_base
=
4922 options
.target_file_size_base
* kNumL0Files
;
4923 options
.compression
= kNoCompression
;
4924 options
= CurrentOptions(options
);
4926 options
.enable_thread_tracking
= true;
4927 options
.level0_file_num_compaction_trigger
= kNumL0Files
;
4928 options
.max_bytes_for_level_multiplier
= 2;
4929 options
.max_background_compactions
= kLowPriCount
;
4930 options
.level0_stop_writes_trigger
= 1 << 10;
4931 options
.level0_slowdown_writes_trigger
= 1 << 10;
4932 options
.max_subcompactions
= max_subcompactions_
;
4937 std::vector
<ThreadStatus
> thread_list
;
4938 // Delay both flush and compaction
4939 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
4940 {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
4941 {"CompactionJob::Run():Start",
4942 "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
4943 {"CompactionJob::Run():Start",
4944 "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
4945 {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
4946 "CompactionJob::Run():End"},
4947 {"CompactionJob::Run():End",
4948 "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
4950 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4952 // Make rocksdb busy
4954 // check how many threads are doing compaction using GetThreadList
4955 int operation_count
[ThreadStatus::NUM_OP_TYPES
] = {0};
4956 for (int file
= 0; file
< 16 * kNumL0Files
; ++file
) {
4957 for (int k
= 0; k
< kEntriesPerBuffer
; ++k
) {
4958 ASSERT_OK(Put(std::to_string(key
++), rnd
.RandomString(kTestValueSize
)));
4961 ASSERT_OK(env_
->GetThreadList(&thread_list
));
4962 for (auto thread
: thread_list
) {
4963 operation_count
[thread
.operation_type
]++;
4966 // Speed up the test
4967 if (operation_count
[ThreadStatus::OP_FLUSH
] > 1 &&
4968 operation_count
[ThreadStatus::OP_COMPACTION
] >
4969 0.6 * options
.max_background_compactions
) {
4972 if (file
== 15 * kNumL0Files
) {
4973 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4977 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4978 ASSERT_GE(operation_count
[ThreadStatus::OP_COMPACTION
], 1);
4979 CancelAllBackgroundWork(db_
);
4980 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
4981 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4982 // Record the number of compactions at a time.
4983 for (int i
= 0; i
< ThreadStatus::NUM_OP_TYPES
; ++i
) {
4984 operation_count
[i
] = 0;
4986 ASSERT_OK(env_
->GetThreadList(&thread_list
));
4987 for (auto thread
: thread_list
) {
4988 operation_count
[thread
.operation_type
]++;
4990 ASSERT_EQ(operation_count
[ThreadStatus::OP_COMPACTION
], 0);
4993 TEST_P(DBTestWithParam
, PreShutdownCompactionMiddle
) {
4994 const int kTestKeySize
= 16;
4995 const int kTestValueSize
= 984;
4996 const int kEntrySize
= kTestKeySize
+ kTestValueSize
;
4997 const int kEntriesPerBuffer
= 40;
4998 const int kNumL0Files
= 4;
5000 const int kHighPriCount
= 3;
5001 const int kLowPriCount
= 5;
5002 env_
->SetBackgroundThreads(kHighPriCount
, Env::HIGH
);
5003 env_
->SetBackgroundThreads(kLowPriCount
, Env::LOW
);
5006 options
.create_if_missing
= true;
5007 options
.write_buffer_size
= kEntrySize
* kEntriesPerBuffer
;
5008 options
.compaction_style
= kCompactionStyleLevel
;
5009 options
.target_file_size_base
= options
.write_buffer_size
;
5010 options
.max_bytes_for_level_base
=
5011 options
.target_file_size_base
* kNumL0Files
;
5012 options
.compression
= kNoCompression
;
5013 options
= CurrentOptions(options
);
5015 options
.enable_thread_tracking
= true;
5016 options
.level0_file_num_compaction_trigger
= kNumL0Files
;
5017 options
.max_bytes_for_level_multiplier
= 2;
5018 options
.max_background_compactions
= kLowPriCount
;
5019 options
.level0_stop_writes_trigger
= 1 << 10;
5020 options
.level0_slowdown_writes_trigger
= 1 << 10;
5021 options
.max_subcompactions
= max_subcompactions_
;
5026 std::vector
<ThreadStatus
> thread_list
;
5027 // Delay both flush and compaction
5028 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
5029 {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
5030 "CompactionJob::Run():Inprogress"},
5031 {"CompactionJob::Run():Start",
5032 "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
5033 {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
5034 {"CompactionJob::Run():End",
5035 "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
5037 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5039 // Make rocksdb busy
5041 // check how many threads are doing compaction using GetThreadList
5042 int operation_count
[ThreadStatus::NUM_OP_TYPES
] = {0};
5043 for (int file
= 0; file
< 16 * kNumL0Files
; ++file
) {
5044 for (int k
= 0; k
< kEntriesPerBuffer
; ++k
) {
5045 ASSERT_OK(Put(std::to_string(key
++), rnd
.RandomString(kTestValueSize
)));
5048 ASSERT_OK(env_
->GetThreadList(&thread_list
));
5049 for (auto thread
: thread_list
) {
5050 operation_count
[thread
.operation_type
]++;
5053 // Speed up the test
5054 if (operation_count
[ThreadStatus::OP_FLUSH
] > 1 &&
5055 operation_count
[ThreadStatus::OP_COMPACTION
] >
5056 0.6 * options
.max_background_compactions
) {
5059 if (file
== 15 * kNumL0Files
) {
5060 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
5064 ASSERT_GE(operation_count
[ThreadStatus::OP_COMPACTION
], 1);
5065 CancelAllBackgroundWork(db_
);
5066 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
5067 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
5068 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5069 // Record the number of compactions at a time.
5070 for (int i
= 0; i
< ThreadStatus::NUM_OP_TYPES
; ++i
) {
5071 operation_count
[i
] = 0;
5073 ASSERT_OK(env_
->GetThreadList(&thread_list
));
5074 for (auto thread
: thread_list
) {
5075 operation_count
[thread
.operation_type
]++;
5077 ASSERT_EQ(operation_count
[ThreadStatus::OP_COMPACTION
], 0);
5080 #endif // ROCKSDB_USING_THREAD_STATUS
5082 #ifndef ROCKSDB_LITE
5083 TEST_F(DBTest
, FlushOnDestroy
) {
5085 wo
.disableWAL
= true;
5086 ASSERT_OK(Put("foo", "v1", wo
));
5087 CancelAllBackgroundWork(db_
);
5090 TEST_F(DBTest
, DynamicLevelCompressionPerLevel
) {
5091 if (!Snappy_Supported()) {
5094 const int kNKeys
= 120;
5096 for (int i
= 0; i
< kNKeys
; i
++) {
5099 RandomShuffle(std::begin(keys
), std::end(keys
));
5104 options
.create_if_missing
= true;
5105 options
.db_write_buffer_size
= 20480;
5106 options
.write_buffer_size
= 20480;
5107 options
.max_write_buffer_number
= 2;
5108 options
.level0_file_num_compaction_trigger
= 2;
5109 options
.level0_slowdown_writes_trigger
= 2;
5110 options
.level0_stop_writes_trigger
= 2;
5111 options
.target_file_size_base
= 20480;
5112 options
.level_compaction_dynamic_level_bytes
= true;
5113 options
.max_bytes_for_level_base
= 102400;
5114 options
.max_bytes_for_level_multiplier
= 4;
5115 options
.max_background_compactions
= 1;
5116 options
.num_levels
= 5;
5118 options
.compression_per_level
.resize(3);
5119 options
.compression_per_level
[0] = kNoCompression
;
5120 options
.compression_per_level
[1] = kNoCompression
;
5121 options
.compression_per_level
[2] = kSnappyCompression
;
5123 OnFileDeletionListener
* listener
= new OnFileDeletionListener();
5124 options
.listeners
.emplace_back(listener
);
5126 DestroyAndReopen(options
);
5128 // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
5129 // be compressed, so total data size should be more than 80K.
5130 for (int i
= 0; i
< 20; i
++) {
5131 ASSERT_OK(Put(Key(keys
[i
]), CompressibleString(&rnd
, 4000)));
5134 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5136 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5137 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5138 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5139 // Assuming each files' metadata is at least 50 bytes/
5140 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4);
5142 // Insert 400KB. Some data will be compressed
5143 for (int i
= 21; i
< 120; i
++) {
5144 ASSERT_OK(Put(Key(keys
[i
]), CompressibleString(&rnd
, 4000)));
5147 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5148 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5149 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5151 ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4),
5152 120U * 4000U + 50U * 24);
5153 // Make sure data in files in L3 is not compacted by removing all files
5154 // in L4 and calculate number of rows
5155 ASSERT_OK(dbfull()->SetOptions({
5156 {"disable_auto_compactions", "true"},
5158 ColumnFamilyMetaData cf_meta
;
5159 db_
->GetColumnFamilyMetaData(&cf_meta
);
5160 for (auto file
: cf_meta
.levels
[4].files
) {
5161 listener
->SetExpectedFileName(dbname_
+ file
.name
);
5162 ASSERT_OK(dbfull()->DeleteFile(file
.name
));
5164 listener
->VerifyMatchedCount(cf_meta
.levels
[4].files
.size());
5167 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(ReadOptions()));
5168 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
5171 ASSERT_OK(iter
->status());
5172 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys
* 4000U + num_keys
* 10U);
5175 TEST_F(DBTest
, DynamicLevelCompressionPerLevel2
) {
5176 if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
5179 const int kNKeys
= 500;
5181 for (int i
= 0; i
< kNKeys
; i
++) {
5184 RandomShuffle(std::begin(keys
), std::end(keys
));
5188 options
.create_if_missing
= true;
5189 options
.db_write_buffer_size
= 6000000;
5190 options
.write_buffer_size
= 600000;
5191 options
.max_write_buffer_number
= 2;
5192 options
.level0_file_num_compaction_trigger
= 2;
5193 options
.level0_slowdown_writes_trigger
= 2;
5194 options
.level0_stop_writes_trigger
= 2;
5195 options
.soft_pending_compaction_bytes_limit
= 1024 * 1024;
5196 options
.target_file_size_base
= 20;
5198 options
.level_compaction_dynamic_level_bytes
= true;
5199 options
.max_bytes_for_level_base
= 200;
5200 options
.max_bytes_for_level_multiplier
= 8;
5201 options
.max_background_compactions
= 1;
5202 options
.num_levels
= 5;
5203 std::shared_ptr
<mock::MockTableFactory
> mtf(new mock::MockTableFactory
);
5204 options
.table_factory
= mtf
;
5206 options
.compression_per_level
.resize(3);
5207 options
.compression_per_level
[0] = kNoCompression
;
5208 options
.compression_per_level
[1] = kLZ4Compression
;
5209 options
.compression_per_level
[2] = kZlibCompression
;
5211 DestroyAndReopen(options
);
5212 // When base level is L4, L4 is LZ4.
5213 std::atomic
<int> num_zlib(0);
5214 std::atomic
<int> num_lz4(0);
5215 std::atomic
<int> num_no(0);
5216 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5217 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg
) {
5218 Compaction
* compaction
= reinterpret_cast<Compaction
*>(arg
);
5219 if (compaction
->output_level() == 4) {
5220 ASSERT_TRUE(compaction
->output_compression() == kLZ4Compression
);
5221 num_lz4
.fetch_add(1);
5224 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5225 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg
) {
5226 auto* compression
= reinterpret_cast<CompressionType
*>(arg
);
5227 ASSERT_TRUE(*compression
== kNoCompression
);
5228 num_no
.fetch_add(1);
5230 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5232 for (int i
= 0; i
< 100; i
++) {
5233 std::string value
= rnd
.RandomString(200);
5234 ASSERT_OK(Put(Key(keys
[i
]), value
));
5237 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5242 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5243 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5244 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
5245 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
5247 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5248 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5249 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5250 ASSERT_GT(NumTableFilesAtLevel(4), 0);
5251 ASSERT_GT(num_no
.load(), 2);
5252 ASSERT_GT(num_lz4
.load(), 0);
5253 int prev_num_files_l4
= NumTableFilesAtLevel(4);
5255 // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
5258 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5259 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg
) {
5260 Compaction
* compaction
= reinterpret_cast<Compaction
*>(arg
);
5261 if (compaction
->output_level() == 4 && compaction
->start_level() == 3) {
5262 ASSERT_TRUE(compaction
->output_compression() == kZlibCompression
);
5263 num_zlib
.fetch_add(1);
5265 ASSERT_TRUE(compaction
->output_compression() == kLZ4Compression
);
5266 num_lz4
.fetch_add(1);
5269 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5270 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg
) {
5271 auto* compression
= reinterpret_cast<CompressionType
*>(arg
);
5272 ASSERT_TRUE(*compression
== kNoCompression
);
5273 num_no
.fetch_add(1);
5275 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5277 for (int i
= 101; i
< 500; i
++) {
5278 std::string value
= rnd
.RandomString(200);
5279 ASSERT_OK(Put(Key(keys
[i
]), value
));
5280 if (i
% 100 == 99) {
5282 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5286 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
5287 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
5288 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5289 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5290 ASSERT_GT(NumTableFilesAtLevel(3), 0);
5291 ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4
);
5292 ASSERT_GT(num_no
.load(), 2);
5293 ASSERT_GT(num_lz4
.load(), 0);
5294 ASSERT_GT(num_zlib
.load(), 0);
5297 TEST_F(DBTest
, DynamicCompactionOptions
) {
5298 // minimum write buffer size is enforced at 64KB
5299 const uint64_t k32KB
= 1 << 15;
5300 const uint64_t k64KB
= 1 << 16;
5301 const uint64_t k128KB
= 1 << 17;
5302 const uint64_t k1MB
= 1 << 20;
5303 const uint64_t k4KB
= 1 << 12;
5306 options
.create_if_missing
= true;
5307 options
.compression
= kNoCompression
;
5308 options
.soft_pending_compaction_bytes_limit
= 1024 * 1024;
5309 options
.write_buffer_size
= k64KB
;
5310 options
.arena_block_size
= 4 * k4KB
;
5311 options
.max_write_buffer_number
= 2;
5312 // Compaction related options
5313 options
.level0_file_num_compaction_trigger
= 3;
5314 options
.level0_slowdown_writes_trigger
= 4;
5315 options
.level0_stop_writes_trigger
= 8;
5316 options
.target_file_size_base
= k64KB
;
5317 options
.max_compaction_bytes
= options
.target_file_size_base
* 10;
5318 options
.target_file_size_multiplier
= 1;
5319 options
.max_bytes_for_level_base
= k128KB
;
5320 options
.max_bytes_for_level_multiplier
= 4;
5322 // Block flush thread and disable compaction thread
5323 env_
->SetBackgroundThreads(1, Env::LOW
);
5324 env_
->SetBackgroundThreads(1, Env::HIGH
);
5325 DestroyAndReopen(options
);
5327 auto gen_l0_kb
= [this](int start
, int size
, int stride
) {
5329 for (int i
= 0; i
< size
; i
++) {
5330 ASSERT_OK(Put(Key(start
+ stride
* i
), rnd
.RandomString(1024)));
5332 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5335 // Write 3 files that have the same key range.
5336 // Since level0_file_num_compaction_trigger is 3, compaction should be
5337 // triggered. The compaction should result in one L1 file
5338 gen_l0_kb(0, 64, 1);
5339 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
5340 gen_l0_kb(0, 64, 1);
5341 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
5342 gen_l0_kb(0, 64, 1);
5343 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5344 ASSERT_EQ("0,1", FilesPerLevel());
5345 std::vector
<LiveFileMetaData
> metadata
;
5346 db_
->GetLiveFilesMetaData(&metadata
);
5347 ASSERT_EQ(1U, metadata
.size());
5348 ASSERT_LE(metadata
[0].size
, k64KB
+ k4KB
);
5349 ASSERT_GE(metadata
[0].size
, k64KB
- k4KB
);
5351 // Test compaction trigger and target_file_size_base
5352 // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
5353 // Writing to 64KB L0 files should trigger a compaction. Since these
5354 // 2 L0 files have the same key range, compaction merge them and should
5355 // result in 2 32KB L1 files.
5357 dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
5358 {"target_file_size_base", std::to_string(k32KB
)}}));
5360 gen_l0_kb(0, 64, 1);
5361 ASSERT_EQ("1,1", FilesPerLevel());
5362 gen_l0_kb(0, 64, 1);
5363 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5364 ASSERT_EQ("0,2", FilesPerLevel());
5366 db_
->GetLiveFilesMetaData(&metadata
);
5367 ASSERT_EQ(2U, metadata
.size());
5368 ASSERT_LE(metadata
[0].size
, k32KB
+ k4KB
);
5369 ASSERT_GE(metadata
[0].size
, k32KB
- k4KB
);
5370 ASSERT_LE(metadata
[1].size
, k32KB
+ k4KB
);
5371 ASSERT_GE(metadata
[1].size
, k32KB
- k4KB
);
5373 // Test max_bytes_for_level_base
5374 // Increase level base size to 256KB and write enough data that will
5375 // fill L1 and L2. L1 size should be around 256KB while L2 size should be
5376 // around 256KB x 4.
5377 ASSERT_OK(dbfull()->SetOptions(
5378 {{"max_bytes_for_level_base", std::to_string(k1MB
)}}));
5380 // writing 96 x 64KB => 6 * 1024KB
5381 // (L1 + L2) = (1 + 4) * 1024KB
5382 for (int i
= 0; i
< 96; ++i
) {
5383 gen_l0_kb(i
, 64, 96);
5385 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5386 ASSERT_GT(SizeAtLevel(1), k1MB
/ 2);
5387 ASSERT_LT(SizeAtLevel(1), k1MB
+ k1MB
/ 2);
5389 // Within (0.5, 1.5) of 4MB.
5390 ASSERT_GT(SizeAtLevel(2), 2 * k1MB
);
5391 ASSERT_LT(SizeAtLevel(2), 6 * k1MB
);
5393 // Test max_bytes_for_level_multiplier and
5394 // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
5395 // After filling enough data that can fit in L1 - L3, we should see L1 size
5396 // reduces to 128KB from 256KB which was asserted previously. Same for L2.
5397 ASSERT_OK(dbfull()->SetOptions(
5398 {{"max_bytes_for_level_multiplier", "2"},
5399 {"max_bytes_for_level_base", std::to_string(k128KB
)}}));
5401 // writing 20 x 64KB = 10 x 128KB
5402 // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
5403 for (int i
= 0; i
< 20; ++i
) {
5404 gen_l0_kb(i
, 64, 32);
5406 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5407 uint64_t total_size
= SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
5408 ASSERT_TRUE(total_size
< k128KB
* 7 * 1.5);
5410 // Test level0_stop_writes_trigger.
5411 // Clean up memtable and L0. Block compaction threads. If continue to write
5412 // and flush memtables. We should see put stop after 8 memtable flushes
5413 // since level0_stop_writes_trigger = 8
5414 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5415 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5417 test::SleepingBackgroundTask sleeping_task_low
;
5418 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
5419 Env::Priority::LOW
);
5420 sleeping_task_low
.WaitUntilSleeping();
5421 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5425 while (count
< 64) {
5426 ASSERT_OK(Put(Key(count
), rnd
.RandomString(1024), wo
));
5427 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5429 if (dbfull()->TEST_write_controler().IsStopped()) {
5430 sleeping_task_low
.WakeUp();
5435 ASSERT_EQ(count
, 8);
5437 sleeping_task_low
.WaitUntilDone();
5439 // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
5440 // Block compaction thread again. Perform the put and memtable flushes
5441 // until we see the stop after 6 memtable flushes.
5442 ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}}));
5443 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5444 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5445 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5447 // Block compaction again
5448 sleeping_task_low
.Reset();
5449 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
5450 Env::Priority::LOW
);
5451 sleeping_task_low
.WaitUntilSleeping();
5453 while (count
< 64) {
5454 ASSERT_OK(Put(Key(count
), rnd
.RandomString(1024), wo
));
5455 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5457 if (dbfull()->TEST_write_controler().IsStopped()) {
5458 sleeping_task_low
.WakeUp();
5462 ASSERT_EQ(count
, 6);
5464 sleeping_task_low
.WaitUntilDone();
5466 // Test disable_auto_compactions
5467 // Compaction thread is unblocked but auto compaction is disabled. Write
5468 // 4 L0 files and compaction should be triggered. If auto compaction is
5469 // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
5470 // L0 files do not change after the call.
5471 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
5472 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5473 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5475 for (int i
= 0; i
< 4; ++i
) {
5476 ASSERT_OK(Put(Key(i
), rnd
.RandomString(1024)));
5477 // Wait for compaction so that put won't stop
5478 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5480 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5481 ASSERT_EQ(NumTableFilesAtLevel(0), 4);
5483 // Enable auto compaction and perform the same test, # of L0 files should be
5484 // reduced after compaction.
5485 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
5486 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5487 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5489 for (int i
= 0; i
< 4; ++i
) {
5490 ASSERT_OK(Put(Key(i
), rnd
.RandomString(1024)));
5491 // Wait for compaction so that put won't stop
5492 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5494 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5495 ASSERT_LT(NumTableFilesAtLevel(0), 4);
5498 // Test dynamic FIFO compaction options.
5499 // This test covers just option parsing and makes sure that the options are
5500 // correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions
5501 // test which makes sure that the FIFO compaction funcionality is working
5502 // as expected on dynamically changing the options.
5503 // Even more FIFOCompactionTests are at DBTest.FIFOCompaction* .
5504 TEST_F(DBTest
, DynamicFIFOCompactionOptions
) {
5507 options
.create_if_missing
= true;
5509 DestroyAndReopen(options
);
5512 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5513 1024 * 1024 * 1024);
5514 ASSERT_EQ(dbfull()->GetOptions().ttl
, 0);
5515 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5518 ASSERT_OK(dbfull()->SetOptions(
5519 {{"compaction_options_fifo", "{max_table_files_size=23;}"}}));
5520 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5522 ASSERT_EQ(dbfull()->GetOptions().ttl
, 0);
5523 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5526 ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}}));
5527 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5529 ASSERT_EQ(dbfull()->GetOptions().ttl
, 97);
5530 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5533 ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}}));
5534 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5536 ASSERT_EQ(dbfull()->GetOptions().ttl
, 203);
5537 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5540 ASSERT_OK(dbfull()->SetOptions(
5541 {{"compaction_options_fifo", "{allow_compaction=true;}"}}));
5542 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5544 ASSERT_EQ(dbfull()->GetOptions().ttl
, 203);
5545 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5548 ASSERT_OK(dbfull()->SetOptions(
5549 {{"compaction_options_fifo", "{max_table_files_size=31;}"}}));
5550 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5552 ASSERT_EQ(dbfull()->GetOptions().ttl
, 203);
5553 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5556 ASSERT_OK(dbfull()->SetOptions(
5557 {{"compaction_options_fifo",
5558 "{max_table_files_size=51;allow_compaction=true;}"}}));
5559 ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}}));
5560 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.max_table_files_size
,
5562 ASSERT_EQ(dbfull()->GetOptions().ttl
, 49);
5563 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo
.allow_compaction
,
5567 TEST_F(DBTest
, DynamicUniversalCompactionOptions
) {
5569 options
.create_if_missing
= true;
5571 DestroyAndReopen(options
);
5574 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.size_ratio
, 1U);
5575 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.min_merge_width
,
5577 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.max_merge_width
,
5581 .compaction_options_universal
.max_size_amplification_percent
,
5585 .compaction_options_universal
.compression_size_percent
,
5587 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.stop_style
,
5588 kCompactionStopStyleTotalSize
);
5590 dbfull()->GetOptions().compaction_options_universal
.allow_trivial_move
,
5593 ASSERT_OK(dbfull()->SetOptions(
5594 {{"compaction_options_universal", "{size_ratio=7;}"}}));
5595 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.size_ratio
, 7u);
5596 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.min_merge_width
,
5598 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.max_merge_width
,
5602 .compaction_options_universal
.max_size_amplification_percent
,
5606 .compaction_options_universal
.compression_size_percent
,
5608 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.stop_style
,
5609 kCompactionStopStyleTotalSize
);
5611 dbfull()->GetOptions().compaction_options_universal
.allow_trivial_move
,
5614 ASSERT_OK(dbfull()->SetOptions(
5615 {{"compaction_options_universal", "{min_merge_width=11;}"}}));
5616 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.size_ratio
, 7u);
5617 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.min_merge_width
,
5619 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.max_merge_width
,
5623 .compaction_options_universal
.max_size_amplification_percent
,
5627 .compaction_options_universal
.compression_size_percent
,
5629 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal
.stop_style
,
5630 kCompactionStopStyleTotalSize
);
5632 dbfull()->GetOptions().compaction_options_universal
.allow_trivial_move
,
5635 #endif // ROCKSDB_LITE
5637 TEST_F(DBTest
, FileCreationRandomFailure
) {
5640 options
.create_if_missing
= true;
5641 options
.write_buffer_size
= 100000; // Small write buffer
5642 options
.target_file_size_base
= 200000;
5643 options
.max_bytes_for_level_base
= 1000000;
5644 options
.max_bytes_for_level_multiplier
= 2;
5646 DestroyAndReopen(options
);
5649 constexpr int kCDTKeysPerBuffer
= 4;
5650 constexpr int kTestSize
= kCDTKeysPerBuffer
* 4096;
5651 constexpr int kTotalIteration
= 20;
5652 // the second half of the test involves in random failure
5653 // of file creation.
5654 constexpr int kRandomFailureTest
= kTotalIteration
/ 2;
5656 std::vector
<std::string
> values
;
5657 for (int i
= 0; i
< kTestSize
; ++i
) {
5658 values
.push_back("NOT_FOUND");
5660 for (int j
= 0; j
< kTotalIteration
; ++j
) {
5661 if (j
== kRandomFailureTest
) {
5662 env_
->non_writeable_rate_
.store(90);
5664 for (int k
= 0; k
< kTestSize
; ++k
) {
5665 // here we expect some of the Put fails.
5666 std::string value
= rnd
.RandomString(100);
5667 Status s
= Put(Key(k
), Slice(value
));
5669 // update the latest successful put
5672 // But everything before we simulate the failure-test should succeed.
5673 if (j
< kRandomFailureTest
) {
5679 // If rocksdb does not do the correct job, internal assert will fail here.
5680 ASSERT_TRUE(dbfull()->TEST_WaitForFlushMemTable().IsIOError());
5681 ASSERT_TRUE(dbfull()->TEST_WaitForCompact().IsIOError());
5683 // verify we have the latest successful update
5684 for (int k
= 0; k
< kTestSize
; ++k
) {
5685 auto v
= Get(Key(k
));
5686 ASSERT_EQ(v
, values
[k
]);
5689 // reopen and reverify we have the latest successful update
5690 env_
->non_writeable_rate_
.store(0);
5692 for (int k
= 0; k
< kTestSize
; ++k
) {
5693 auto v
= Get(Key(k
));
5694 ASSERT_EQ(v
, values
[k
]);
5698 #ifndef ROCKSDB_LITE
5700 TEST_F(DBTest
, DynamicMiscOptions
) {
5701 // Test max_sequential_skip_in_iterations
5704 options
.create_if_missing
= true;
5705 options
.max_sequential_skip_in_iterations
= 16;
5706 options
.compression
= kNoCompression
;
5707 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
5708 DestroyAndReopen(options
);
5710 auto assert_reseek_count
= [this, &options
](int key_start
, int num_reseek
) {
5711 int key0
= key_start
;
5712 int key1
= key_start
+ 1;
5713 int key2
= key_start
+ 2;
5715 ASSERT_OK(Put(Key(key0
), rnd
.RandomString(8)));
5716 for (int i
= 0; i
< 10; ++i
) {
5717 ASSERT_OK(Put(Key(key1
), rnd
.RandomString(8)));
5719 ASSERT_OK(Put(Key(key2
), rnd
.RandomString(8)));
5720 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(ReadOptions()));
5721 iter
->Seek(Key(key1
));
5722 ASSERT_TRUE(iter
->Valid());
5723 ASSERT_EQ(iter
->key().compare(Key(key1
)), 0);
5725 ASSERT_TRUE(iter
->Valid());
5726 ASSERT_EQ(iter
->key().compare(Key(key2
)), 0);
5727 ASSERT_EQ(num_reseek
,
5728 TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
));
5731 assert_reseek_count(100, 0);
5733 ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}}));
5734 // Clear memtable and make new option effective
5735 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5737 assert_reseek_count(200, 1);
5740 dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}}));
5741 // Clear memtable and make new option effective
5742 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5744 assert_reseek_count(300, 1);
5746 MutableCFOptions mutable_cf_options
;
5747 CreateAndReopenWithCF({"pikachu"}, options
);
5748 // Test soft_pending_compaction_bytes_limit,
5749 // hard_pending_compaction_bytes_limit
5750 ASSERT_OK(dbfull()->SetOptions(
5751 handles_
[1], {{"soft_pending_compaction_bytes_limit", "200"},
5752 {"hard_pending_compaction_bytes_limit", "300"}}));
5753 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[1],
5754 &mutable_cf_options
));
5755 ASSERT_EQ(200, mutable_cf_options
.soft_pending_compaction_bytes_limit
);
5756 ASSERT_EQ(300, mutable_cf_options
.hard_pending_compaction_bytes_limit
);
5757 // Test report_bg_io_stats
5759 dbfull()->SetOptions(handles_
[1], {{"report_bg_io_stats", "true"}}));
5761 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[1],
5762 &mutable_cf_options
));
5763 ASSERT_TRUE(mutable_cf_options
.report_bg_io_stats
);
5766 ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}}));
5767 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[0],
5768 &mutable_cf_options
));
5769 ASSERT_EQ(CompressionType::kNoCompression
, mutable_cf_options
.compression
);
5771 if (Snappy_Supported()) {
5772 ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}}));
5773 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[0],
5774 &mutable_cf_options
));
5775 ASSERT_EQ(CompressionType::kSnappyCompression
,
5776 mutable_cf_options
.compression
);
5779 // Test paranoid_file_checks already done in db_block_cache_test
5781 dbfull()->SetOptions(handles_
[1], {{"paranoid_file_checks", "true"}}));
5782 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[1],
5783 &mutable_cf_options
));
5784 ASSERT_TRUE(mutable_cf_options
.report_bg_io_stats
);
5785 ASSERT_TRUE(mutable_cf_options
.check_flush_compaction_key_order
);
5787 ASSERT_OK(dbfull()->SetOptions(
5788 handles_
[1], {{"check_flush_compaction_key_order", "false"}}));
5789 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_
[1],
5790 &mutable_cf_options
));
5791 ASSERT_FALSE(mutable_cf_options
.check_flush_compaction_key_order
);
5793 #endif // ROCKSDB_LITE
5795 TEST_F(DBTest
, L0L1L2AndUpHitCounter
) {
5796 const int kNumLevels
= 3;
5797 const int kNumKeysPerLevel
= 10000;
5798 const int kNumKeysPerDb
= kNumLevels
* kNumKeysPerLevel
;
5800 Options options
= CurrentOptions();
5801 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
5804 // After the below loop there will be one file on each of L0, L1, and L2.
5806 for (int output_level
= kNumLevels
- 1; output_level
>= 0; --output_level
) {
5807 for (int i
= 0; i
< kNumKeysPerLevel
; ++i
) {
5808 ASSERT_OK(Put(Key(key
), "val"));
5812 for (int input_level
= 0; input_level
< output_level
; ++input_level
) {
5813 // `TEST_CompactRange(input_level, ...)` compacts from `input_level` to
5814 // `input_level + 1`.
5815 ASSERT_OK(dbfull()->TEST_CompactRange(input_level
, nullptr, nullptr));
5818 assert(key
== kNumKeysPerDb
);
5820 ASSERT_EQ(0, TestGetTickerCount(options
, GET_HIT_L0
));
5821 ASSERT_EQ(0, TestGetTickerCount(options
, GET_HIT_L1
));
5822 ASSERT_EQ(0, TestGetTickerCount(options
, GET_HIT_L2_AND_UP
));
5824 for (int i
= 0; i
< kNumKeysPerDb
; i
++) {
5825 ASSERT_EQ(Get(Key(i
)), "val");
5828 ASSERT_EQ(kNumKeysPerLevel
, TestGetTickerCount(options
, GET_HIT_L0
));
5829 ASSERT_EQ(kNumKeysPerLevel
, TestGetTickerCount(options
, GET_HIT_L1
));
5830 ASSERT_EQ(kNumKeysPerLevel
, TestGetTickerCount(options
, GET_HIT_L2_AND_UP
));
5832 ASSERT_EQ(kNumKeysPerDb
, TestGetTickerCount(options
, GET_HIT_L0
) +
5833 TestGetTickerCount(options
, GET_HIT_L1
) +
5834 TestGetTickerCount(options
, GET_HIT_L2_AND_UP
));
5837 TEST_F(DBTest
, EncodeDecompressedBlockSizeTest
) {
5843 CompressionType compressions
[] = {kZlibCompression
, kBZip2Compression
,
5844 kLZ4Compression
, kLZ4HCCompression
,
5845 kXpressCompression
};
5846 for (auto comp
: compressions
) {
5847 if (!CompressionTypeSupported(comp
)) {
5850 // first_table_version 1 -- generate with table_version == 1, read with
5851 // table_version == 2
5852 // first_table_version 2 -- generate with table_version == 2, read with
5853 // table_version == 1
5854 for (int first_table_version
= 1; first_table_version
<= 2;
5855 ++first_table_version
) {
5856 BlockBasedTableOptions table_options
;
5857 table_options
.format_version
= first_table_version
;
5858 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10));
5859 Options options
= CurrentOptions();
5860 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
5861 options
.create_if_missing
= true;
5862 options
.compression
= comp
;
5863 DestroyAndReopen(options
);
5865 int kNumKeysWritten
= 1000;
5868 for (int i
= 0; i
< kNumKeysWritten
; ++i
) {
5869 // compressible string
5870 ASSERT_OK(Put(Key(i
), rnd
.RandomString(128) + std::string(128, 'a')));
5873 table_options
.format_version
= first_table_version
== 1 ? 2 : 1;
5874 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
5876 for (int i
= 0; i
< kNumKeysWritten
; ++i
) {
5877 auto r
= Get(Key(i
));
5878 ASSERT_EQ(r
.substr(128), std::string(128, 'a'));
5884 TEST_F(DBTest
, CloseSpeedup
) {
5885 Options options
= CurrentOptions();
5886 options
.compaction_style
= kCompactionStyleLevel
;
5887 options
.write_buffer_size
= 110 << 10; // 110KB
5888 options
.arena_block_size
= 4 << 10;
5889 options
.level0_file_num_compaction_trigger
= 2;
5890 options
.num_levels
= 4;
5891 options
.max_bytes_for_level_base
= 400 * 1024;
5892 options
.max_write_buffer_number
= 16;
5894 // Block background threads
5895 env_
->SetBackgroundThreads(1, Env::LOW
);
5896 env_
->SetBackgroundThreads(1, Env::HIGH
);
5897 test::SleepingBackgroundTask sleeping_task_low
;
5898 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
5899 Env::Priority::LOW
);
5900 test::SleepingBackgroundTask sleeping_task_high
;
5901 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
,
5902 &sleeping_task_high
, Env::Priority::HIGH
);
5904 std::vector
<std::string
> filenames
;
5905 ASSERT_OK(env_
->GetChildren(dbname_
, &filenames
));
5906 // In Windows, LOCK file cannot be deleted because it is locked by db_test
5907 // After closing db_test, the LOCK file is unlocked and can be deleted
5908 // Delete archival files.
5909 bool deleteDir
= true;
5910 for (size_t i
= 0; i
< filenames
.size(); ++i
) {
5911 Status s
= env_
->DeleteFile(dbname_
+ "/" + filenames
[i
]);
5917 ASSERT_OK(env_
->DeleteDir(dbname_
));
5919 DestroyAndReopen(options
);
5921 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5922 env_
->SetBackgroundThreads(1, Env::LOW
);
5923 env_
->SetBackgroundThreads(1, Env::HIGH
);
5927 // First three 110KB files are not going to level 2
5928 // After that, (100K, 200K)
5929 for (int num
= 0; num
< 5; num
++) {
5930 GenerateNewFile(&rnd
, &key_idx
, true);
5933 ASSERT_EQ(0, GetSstFileCount(dbname_
));
5936 ASSERT_EQ(0, GetSstFileCount(dbname_
));
5938 // Unblock background threads
5939 sleeping_task_high
.WakeUp();
5940 sleeping_task_high
.WaitUntilDone();
5941 sleeping_task_low
.WakeUp();
5942 sleeping_task_low
.WaitUntilDone();
5947 class DelayedMergeOperator
: public MergeOperator
{
5952 explicit DelayedMergeOperator(DBTest
* d
) : db_test_(d
) {}
5954 bool FullMergeV2(const MergeOperationInput
& merge_in
,
5955 MergeOperationOutput
* merge_out
) const override
{
5956 db_test_
->env_
->MockSleepForMicroseconds(1000 *
5957 merge_in
.operand_list
.size());
5958 merge_out
->new_value
= "";
5962 const char* Name() const override
{ return "DelayedMergeOperator"; }
5965 TEST_F(DBTest
, MergeTestTime
) {
5966 std::string one
, two
, three
;
5967 PutFixed64(&one
, 1);
5968 PutFixed64(&two
, 2);
5969 PutFixed64(&three
, 3);
5971 // Enable time profiling
5972 SetPerfLevel(kEnableTime
);
5973 Options options
= CurrentOptions();
5974 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
5975 options
.merge_operator
.reset(new DelayedMergeOperator(this));
5976 SetTimeElapseOnlySleepOnReopen(&options
);
5977 DestroyAndReopen(options
);
5979 // NOTE: Presumed unnecessary and removed: resetting mock time in env
5981 ASSERT_EQ(TestGetTickerCount(options
, MERGE_OPERATION_TOTAL_TIME
), 0);
5982 ASSERT_OK(db_
->Put(WriteOptions(), "foo", one
));
5984 ASSERT_OK(db_
->Merge(WriteOptions(), "foo", two
));
5986 ASSERT_OK(db_
->Merge(WriteOptions(), "foo", three
));
5990 opt
.verify_checksums
= true;
5991 opt
.snapshot
= nullptr;
5993 ASSERT_OK(db_
->Get(opt
, "foo", &result
));
5995 ASSERT_EQ(2000000, TestGetTickerCount(options
, MERGE_OPERATION_TOTAL_TIME
));
5997 ReadOptions read_options
;
5998 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_options
));
6000 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
6001 ASSERT_OK(iter
->status());
6005 ASSERT_EQ(1, count
);
6006 ASSERT_EQ(4000000, TestGetTickerCount(options
, MERGE_OPERATION_TOTAL_TIME
));
6007 #ifdef ROCKSDB_USING_THREAD_STATUS
6008 ASSERT_GT(TestGetTickerCount(options
, FLUSH_WRITE_BYTES
), 0);
6009 #endif // ROCKSDB_USING_THREAD_STATUS
6012 #ifndef ROCKSDB_LITE
6013 TEST_P(DBTestWithParam
, MergeCompactionTimeTest
) {
6014 SetPerfLevel(kEnableTime
);
6015 Options options
= CurrentOptions();
6016 options
.compaction_filter_factory
= std::make_shared
<KeepFilterFactory
>();
6017 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
6018 options
.merge_operator
.reset(new DelayedMergeOperator(this));
6019 options
.disable_auto_compactions
= true;
6020 options
.max_subcompactions
= max_subcompactions_
;
6021 SetTimeElapseOnlySleepOnReopen(&options
);
6022 DestroyAndReopen(options
);
6024 constexpr unsigned n
= 1000;
6025 for (unsigned i
= 0; i
< n
; i
++) {
6026 ASSERT_OK(db_
->Merge(WriteOptions(), "foo", "TEST"));
6029 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6031 CompactRangeOptions cro
;
6032 cro
.exclusive_manual_compaction
= exclusive_manual_compaction_
;
6033 ASSERT_OK(db_
->CompactRange(cro
, nullptr, nullptr));
6035 ASSERT_EQ(uint64_t{n
} * 1000000U,
6036 TestGetTickerCount(options
, MERGE_OPERATION_TOTAL_TIME
));
6039 TEST_P(DBTestWithParam
, FilterCompactionTimeTest
) {
6040 Options options
= CurrentOptions();
6041 options
.compaction_filter_factory
=
6042 std::make_shared
<DelayFilterFactory
>(this);
6043 options
.disable_auto_compactions
= true;
6044 options
.create_if_missing
= true;
6045 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
6046 options
.statistics
->set_stats_level(kExceptTimeForMutex
);
6047 options
.max_subcompactions
= max_subcompactions_
;
6048 SetTimeElapseOnlySleepOnReopen(&options
);
6049 DestroyAndReopen(options
);
6053 for (int table
= 0; table
< 4; ++table
) {
6054 for (int i
= 0; i
< 10 + table
; ++i
) {
6055 ASSERT_OK(Put(std::to_string(table
* 100 + i
), "val"));
6061 CompactRangeOptions cro
;
6062 cro
.exclusive_manual_compaction
= exclusive_manual_compaction_
;
6063 ASSERT_OK(db_
->CompactRange(cro
, nullptr, nullptr));
6064 ASSERT_EQ(0U, CountLiveFiles());
6068 Iterator
* itr
= db_
->NewIterator(ReadOptions());
6070 ASSERT_OK(itr
->status());
6071 ASSERT_EQ(uint64_t{n
} * 1000000U,
6072 TestGetTickerCount(options
, FILTER_OPERATION_TOTAL_TIME
));
6075 #endif // ROCKSDB_LITE
6077 TEST_F(DBTest
, TestLogCleanup
) {
6078 Options options
= CurrentOptions();
6079 options
.write_buffer_size
= 64 * 1024; // very small
6080 // only two memtables allowed ==> only two log files
6081 options
.max_write_buffer_number
= 2;
6084 for (int i
= 0; i
< 100000; ++i
) {
6085 ASSERT_OK(Put(Key(i
), "val"));
6086 // only 2 memtables will be alive, so logs_to_free needs to always be below
6088 ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
6092 #ifndef ROCKSDB_LITE
6093 TEST_F(DBTest
, EmptyCompactedDB
) {
6094 Options options
= CurrentOptions();
6095 options
.max_open_files
= -1;
6097 ASSERT_OK(ReadOnlyReopen(options
));
6098 Status s
= Put("new", "value");
6099 ASSERT_TRUE(s
.IsNotSupported());
6102 #endif // ROCKSDB_LITE
6104 #ifndef ROCKSDB_LITE
6105 TEST_F(DBTest
, SuggestCompactRangeTest
) {
6106 class CompactionFilterFactoryGetContext
: public CompactionFilterFactory
{
6108 std::unique_ptr
<CompactionFilter
> CreateCompactionFilter(
6109 const CompactionFilter::Context
& context
) override
{
6110 saved_context
= context
;
6111 std::unique_ptr
<CompactionFilter
> empty_filter
;
6112 return empty_filter
;
6114 const char* Name() const override
{
6115 return "CompactionFilterFactoryGetContext";
6117 static bool IsManual(CompactionFilterFactory
* compaction_filter_factory
) {
6118 return reinterpret_cast<CompactionFilterFactoryGetContext
*>(
6119 compaction_filter_factory
)
6120 ->saved_context
.is_manual_compaction
;
6122 CompactionFilter::Context saved_context
;
6125 Options options
= CurrentOptions();
6126 options
.memtable_factory
.reset(test::NewSpecialSkipListFactory(
6127 DBTestBase::kNumKeysByGenerateNewRandomFile
));
6128 options
.compaction_style
= kCompactionStyleLevel
;
6129 options
.compaction_filter_factory
.reset(
6130 new CompactionFilterFactoryGetContext());
6131 options
.write_buffer_size
= 200 << 10;
6132 options
.arena_block_size
= 4 << 10;
6133 options
.level0_file_num_compaction_trigger
= 4;
6134 options
.num_levels
= 4;
6135 options
.compression
= kNoCompression
;
6136 options
.max_bytes_for_level_base
= 450 << 10;
6137 options
.target_file_size_base
= 98 << 10;
6138 options
.max_compaction_bytes
= static_cast<uint64_t>(1) << 60; // inf
6144 for (int num
= 0; num
< 10; num
++) {
6145 GenerateNewRandomFile(&rnd
);
6148 ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
6149 options
.compaction_filter_factory
.get()));
6151 // make sure either L0 or L1 has file
6152 while (NumTableFilesAtLevel(0) == 0 && NumTableFilesAtLevel(1) == 0) {
6153 GenerateNewRandomFile(&rnd
);
6156 // compact it three times
6157 for (int i
= 0; i
< 3; ++i
) {
6158 ASSERT_OK(experimental::SuggestCompactRange(db_
, nullptr, nullptr));
6159 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6162 // All files are compacted
6163 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6164 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6166 GenerateNewRandomFile(&rnd
);
6167 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6169 // nonoverlapping with the file on level 0
6170 Slice
start("a"), end("b");
6171 ASSERT_OK(experimental::SuggestCompactRange(db_
, &start
, &end
));
6172 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6174 // should not compact the level 0 file
6175 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6179 ASSERT_OK(experimental::SuggestCompactRange(db_
, &start
, &end
));
6180 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6181 // SuggestCompactRange() is not going to be reported as manual compaction
6182 ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
6183 options
.compaction_filter_factory
.get()));
6185 // now it should compact the level 0 file
6186 // as it's a trivial move to L1, it triggers another one to compact to L2
6187 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6188 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6191 TEST_F(DBTest
, SuggestCompactRangeUniversal
) {
6192 Options options
= CurrentOptions();
6193 options
.memtable_factory
.reset(test::NewSpecialSkipListFactory(
6194 DBTestBase::kNumKeysByGenerateNewRandomFile
));
6195 options
.compaction_style
= kCompactionStyleUniversal
;
6196 options
.write_buffer_size
= 200 << 10;
6197 options
.arena_block_size
= 4 << 10;
6198 options
.level0_file_num_compaction_trigger
= 4;
6199 options
.num_levels
= 4;
6200 options
.compression
= kNoCompression
;
6201 options
.max_bytes_for_level_base
= 450 << 10;
6202 options
.target_file_size_base
= 98 << 10;
6203 options
.max_compaction_bytes
= static_cast<uint64_t>(1) << 60; // inf
6209 for (int num
= 0; num
< 10; num
++) {
6210 GenerateNewRandomFile(&rnd
);
6213 ASSERT_EQ("1,2,3,4", FilesPerLevel());
6214 for (int i
= 0; i
< 3; i
++) {
6216 db_
->SuggestCompactRange(db_
->DefaultColumnFamily(), nullptr, nullptr));
6217 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6220 // All files are compacted
6221 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6222 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6223 ASSERT_EQ(0, NumTableFilesAtLevel(2));
6225 GenerateNewRandomFile(&rnd
);
6226 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6228 // nonoverlapping with the file on level 0
6229 Slice
start("a"), end("b");
6230 ASSERT_OK(experimental::SuggestCompactRange(db_
, &start
, &end
));
6231 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6233 // should not compact the level 0 file
6234 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6238 ASSERT_OK(experimental::SuggestCompactRange(db_
, &start
, &end
));
6239 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6241 // now it should compact the level 0 file to the last level
6242 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6243 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6246 TEST_F(DBTest
, PromoteL0
) {
6247 Options options
= CurrentOptions();
6248 options
.disable_auto_compactions
= true;
6249 options
.write_buffer_size
= 10 * 1024 * 1024;
6250 DestroyAndReopen(options
);
6252 // non overlapping ranges
6253 std::vector
<std::pair
<int32_t, int32_t>> ranges
= {
6254 {81, 160}, {0, 80}, {161, 240}, {241, 320}};
6256 int32_t value_size
= 10 * 1024; // 10 KB
6259 std::map
<int32_t, std::string
> values
;
6260 for (const auto& range
: ranges
) {
6261 for (int32_t j
= range
.first
; j
< range
.second
; j
++) {
6262 values
[j
] = rnd
.RandomString(value_size
);
6263 ASSERT_OK(Put(Key(j
), values
[j
]));
6268 int32_t level0_files
= NumTableFilesAtLevel(0, 0);
6269 ASSERT_EQ(level0_files
, ranges
.size());
6270 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1
6272 // Promote L0 level to L2.
6273 ASSERT_OK(experimental::PromoteL0(db_
, db_
->DefaultColumnFamily(), 2));
6274 // We expect that all the files were trivially moved from L0 to L2
6275 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
6276 ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files
);
6278 for (const auto& kv
: values
) {
6279 ASSERT_EQ(Get(Key(kv
.first
)), kv
.second
);
6283 TEST_F(DBTest
, PromoteL0Failure
) {
6284 Options options
= CurrentOptions();
6285 options
.disable_auto_compactions
= true;
6286 options
.write_buffer_size
= 10 * 1024 * 1024;
6287 DestroyAndReopen(options
);
6289 // Produce two L0 files with overlapping ranges.
6290 ASSERT_OK(Put(Key(0), ""));
6291 ASSERT_OK(Put(Key(3), ""));
6293 ASSERT_OK(Put(Key(1), ""));
6297 // Fails because L0 has overlapping files.
6298 status
= experimental::PromoteL0(db_
, db_
->DefaultColumnFamily());
6299 ASSERT_TRUE(status
.IsInvalidArgument());
6301 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6302 // Now there is a file in L1.
6303 ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
6305 ASSERT_OK(Put(Key(5), ""));
6307 // Fails because L1 is non-empty.
6308 status
= experimental::PromoteL0(db_
, db_
->DefaultColumnFamily());
6309 ASSERT_TRUE(status
.IsInvalidArgument());
6312 // Github issue #596
6313 TEST_F(DBTest
, CompactRangeWithEmptyBottomLevel
) {
6314 const int kNumLevels
= 2;
6315 const int kNumL0Files
= 2;
6316 Options options
= CurrentOptions();
6317 options
.disable_auto_compactions
= true;
6318 options
.num_levels
= kNumLevels
;
6319 DestroyAndReopen(options
);
6322 for (int i
= 0; i
< kNumL0Files
; ++i
) {
6323 ASSERT_OK(Put(Key(0), rnd
.RandomString(1024)));
6326 ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files
);
6327 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
6329 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6330 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6331 ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files
);
6333 #endif // ROCKSDB_LITE
6335 TEST_F(DBTest
, AutomaticConflictsWithManualCompaction
) {
6336 const int kNumL0Files
= 50;
6337 Options options
= CurrentOptions();
6338 options
.level0_file_num_compaction_trigger
= 4;
6339 // never slowdown / stop
6340 options
.level0_slowdown_writes_trigger
= 999999;
6341 options
.level0_stop_writes_trigger
= 999999;
6342 options
.max_background_compactions
= 10;
6343 DestroyAndReopen(options
);
6345 // schedule automatic compactions after the manual one starts, but before it
6346 // finishes to ensure conflict.
6347 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6348 {{"DBImpl::BackgroundCompaction:Start",
6349 "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"},
6350 {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts",
6351 "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
6352 std::atomic
<int> callback_count(0);
6353 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6354 "DBImpl::MaybeScheduleFlushOrCompaction:Conflict",
6355 [&](void* /*arg*/) { callback_count
.fetch_add(1); });
6356 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6359 for (int i
= 0; i
< 2; ++i
) {
6360 // put two keys to ensure no trivial move
6361 for (int j
= 0; j
< 2; ++j
) {
6362 ASSERT_OK(Put(Key(j
), rnd
.RandomString(1024)));
6366 port::Thread
manual_compaction_thread([this]() {
6367 CompactRangeOptions croptions
;
6368 croptions
.exclusive_manual_compaction
= true;
6369 ASSERT_OK(db_
->CompactRange(croptions
, nullptr, nullptr));
6372 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts");
6373 for (int i
= 0; i
< kNumL0Files
; ++i
) {
6374 // put two keys to ensure no trivial move
6375 for (int j
= 0; j
< 2; ++j
) {
6376 ASSERT_OK(Put(Key(j
), rnd
.RandomString(1024)));
6380 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts");
6382 ASSERT_GE(callback_count
.load(), 1);
6383 for (int i
= 0; i
< 2; ++i
) {
6384 ASSERT_NE("NOT_FOUND", Get(Key(i
)));
6386 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6387 manual_compaction_thread
.join();
6388 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6391 #ifndef ROCKSDB_LITE
6392 TEST_F(DBTest
, CompactFilesShouldTriggerAutoCompaction
) {
6393 Options options
= CurrentOptions();
6394 options
.max_background_compactions
= 1;
6395 options
.level0_file_num_compaction_trigger
= 4;
6396 options
.level0_slowdown_writes_trigger
= 36;
6397 options
.level0_stop_writes_trigger
= 36;
6398 DestroyAndReopen(options
);
6400 // generate files for manual compaction
6402 for (int i
= 0; i
< 2; ++i
) {
6403 // put two keys to ensure no trivial move
6404 for (int j
= 0; j
< 2; ++j
) {
6405 ASSERT_OK(Put(Key(j
), rnd
.RandomString(1024)));
6410 ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data
;
6411 db_
->GetColumnFamilyMetaData(db_
->DefaultColumnFamily(), &cf_meta_data
);
6413 std::vector
<std::string
> input_files
;
6414 input_files
.push_back(cf_meta_data
.levels
[0].files
[0].name
);
6416 SyncPoint::GetInstance()->LoadDependency({
6417 {"CompactFilesImpl:0",
6418 "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"},
6419 {"DBTest::CompactFilesShouldTriggerAutoCompaction:End",
6420 "CompactFilesImpl:1"},
6423 SyncPoint::GetInstance()->EnableProcessing();
6425 port::Thread
manual_compaction_thread([&]() {
6426 auto s
= db_
->CompactFiles(CompactionOptions(), db_
->DefaultColumnFamily(),
6431 TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:Begin");
6432 // generate enough files to trigger compaction
6433 for (int i
= 0; i
< 20; ++i
) {
6434 for (int j
= 0; j
< 2; ++j
) {
6435 ASSERT_OK(Put(Key(j
), rnd
.RandomString(1024)));
6439 db_
->GetColumnFamilyMetaData(db_
->DefaultColumnFamily(), &cf_meta_data
);
6440 ASSERT_GT(cf_meta_data
.levels
[0].files
.size(),
6441 options
.level0_file_num_compaction_trigger
);
6442 TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:End");
6444 manual_compaction_thread
.join();
6445 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6447 db_
->GetColumnFamilyMetaData(db_
->DefaultColumnFamily(), &cf_meta_data
);
6448 ASSERT_LE(cf_meta_data
.levels
[0].files
.size(),
6449 options
.level0_file_num_compaction_trigger
);
6451 #endif // ROCKSDB_LITE
6453 // Github issue #595
6454 // Large write batch with column families
6455 TEST_F(DBTest
, LargeBatchWithColumnFamilies
) {
6456 Options options
= CurrentOptions();
6458 options
.write_buffer_size
= 100000; // Small write buffer
6459 CreateAndReopenWithCF({"pikachu"}, options
);
6461 for (int i
= 0; i
< 5; i
++) {
6462 for (int pass
= 1; pass
<= 3; pass
++) {
6464 size_t write_size
= 1024 * 1024 * (5 + i
);
6465 fprintf(stderr
, "prepare: %" ROCKSDB_PRIszt
" MB, pass:%d\n",
6466 (write_size
/ 1024 / 1024), pass
);
6468 std::string
data(3000, j
++ % 127 + 20);
6469 data
+= std::to_string(j
);
6470 ASSERT_OK(batch
.Put(handles_
[0], Slice(data
), Slice(data
)));
6471 if (batch
.GetDataSize() > write_size
) {
6475 fprintf(stderr
, "write: %" ROCKSDB_PRIszt
" MB\n",
6476 (batch
.GetDataSize() / 1024 / 1024));
6477 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch
));
6478 fprintf(stderr
, "done\n");
6481 // make sure we can re-open it.
6482 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options
));
6485 // Make sure that Flushes can proceed in parallel with CompactRange()
6486 TEST_F(DBTest
, FlushesInParallelWithCompactRange
) {
6487 // iter == 0 -- leveled
6488 // iter == 1 -- leveled, but throw in a flush between two levels compacting
6489 // iter == 2 -- universal
6490 for (int iter
= 0; iter
< 3; ++iter
) {
6491 Options options
= CurrentOptions();
6493 options
.compaction_style
= kCompactionStyleLevel
;
6495 options
.compaction_style
= kCompactionStyleUniversal
;
6497 options
.write_buffer_size
= 110 << 10;
6498 options
.level0_file_num_compaction_trigger
= 4;
6499 options
.num_levels
= 4;
6500 options
.compression
= kNoCompression
;
6501 options
.max_bytes_for_level_base
= 450 << 10;
6502 options
.target_file_size_base
= 98 << 10;
6503 options
.max_write_buffer_number
= 2;
6505 DestroyAndReopen(options
);
6508 for (int num
= 0; num
< 14; num
++) {
6509 GenerateNewRandomFile(&rnd
);
6513 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6514 {{"DBImpl::RunManualCompaction()::1",
6515 "DBTest::FlushesInParallelWithCompactRange:1"},
6516 {"DBTest::FlushesInParallelWithCompactRange:2",
6517 "DBImpl::RunManualCompaction()::2"}});
6519 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6520 {{"CompactionJob::Run():Start",
6521 "DBTest::FlushesInParallelWithCompactRange:1"},
6522 {"DBTest::FlushesInParallelWithCompactRange:2",
6523 "CompactionJob::Run():End"}});
6525 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6527 std::vector
<port::Thread
> threads
;
6528 threads
.emplace_back([&]() { Compact("a", "z"); });
6530 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
6532 // this has to start a flush. if flushes are blocked, this will try to
6534 // 3 memtables, and that will fail because max_write_buffer_number is 2
6535 for (int num
= 0; num
< 3; num
++) {
6536 GenerateNewRandomFile(&rnd
, /* nowait */ true);
6539 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
6541 for (auto& t
: threads
) {
6544 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6548 TEST_F(DBTest
, DelayedWriteRate
) {
6549 const int kEntriesPerMemTable
= 100;
6550 const int kTotalFlushes
= 12;
6552 Options options
= CurrentOptions();
6553 env_
->SetBackgroundThreads(1, Env::LOW
);
6555 options
.write_buffer_size
= 100000000;
6556 options
.max_write_buffer_number
= 256;
6557 options
.max_background_compactions
= 1;
6558 options
.level0_file_num_compaction_trigger
= 3;
6559 options
.level0_slowdown_writes_trigger
= 3;
6560 options
.level0_stop_writes_trigger
= 999999;
6561 options
.delayed_write_rate
= 20000000; // Start with 200MB/s
6562 options
.memtable_factory
.reset(
6563 test::NewSpecialSkipListFactory(kEntriesPerMemTable
));
6565 SetTimeElapseOnlySleepOnReopen(&options
);
6566 CreateAndReopenWithCF({"pikachu"}, options
);
6568 // Block compactions
6569 test::SleepingBackgroundTask sleeping_task_low
;
6570 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
6571 Env::Priority::LOW
);
6573 for (int i
= 0; i
< 3; i
++) {
6574 ASSERT_OK(Put(Key(i
), std::string(10000, 'x')));
6578 // These writes will be slowed down to 1KB/s
6579 uint64_t estimated_sleep_time
= 0;
6581 ASSERT_OK(Put("", ""));
6582 uint64_t cur_rate
= options
.delayed_write_rate
;
6583 for (int i
= 0; i
< kTotalFlushes
; i
++) {
6584 uint64_t size_memtable
= 0;
6585 for (int j
= 0; j
< kEntriesPerMemTable
; j
++) {
6586 auto rand_num
= rnd
.Uniform(20);
6587 // Spread the size range to more.
6588 size_t entry_size
= rand_num
* rand_num
* rand_num
;
6590 ASSERT_OK(Put(Key(i
), std::string(entry_size
, 'x'), wo
));
6591 size_memtable
+= entry_size
+ 18;
6592 // Occasionally sleep a while
6593 if (rnd
.Uniform(20) == 6) {
6594 env_
->SleepForMicroseconds(2666);
6597 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6598 estimated_sleep_time
+= size_memtable
* 1000000u / cur_rate
;
6599 // Slow down twice. One for memtable switch and one for flush finishes.
6600 cur_rate
= static_cast<uint64_t>(static_cast<double>(cur_rate
) *
6601 kIncSlowdownRatio
* kIncSlowdownRatio
);
6603 // Estimate the total sleep time fall into the rough range.
6604 ASSERT_GT(env_
->NowMicros(), estimated_sleep_time
/ 2);
6605 ASSERT_LT(env_
->NowMicros(), estimated_sleep_time
* 2);
6607 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6608 sleeping_task_low
.WakeUp();
6609 sleeping_task_low
.WaitUntilDone();
6612 TEST_F(DBTest
, HardLimit
) {
6613 Options options
= CurrentOptions();
6615 env_
->SetBackgroundThreads(1, Env::LOW
);
6616 options
.max_write_buffer_number
= 256;
6617 options
.write_buffer_size
= 110 << 10; // 110KB
6618 options
.arena_block_size
= 4 * 1024;
6619 options
.level0_file_num_compaction_trigger
= 4;
6620 options
.level0_slowdown_writes_trigger
= 999999;
6621 options
.level0_stop_writes_trigger
= 999999;
6622 options
.hard_pending_compaction_bytes_limit
= 800 << 10;
6623 options
.max_bytes_for_level_base
= 10000000000u;
6624 options
.max_background_compactions
= 1;
6625 options
.memtable_factory
.reset(
6626 test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile
- 1));
6628 env_
->SetBackgroundThreads(1, Env::LOW
);
6629 test::SleepingBackgroundTask sleeping_task_low
;
6630 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
6631 Env::Priority::LOW
);
6633 CreateAndReopenWithCF({"pikachu"}, options
);
6635 std::atomic
<int> callback_count(0);
6636 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6637 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
6638 callback_count
.fetch_add(1);
6639 sleeping_task_low
.WakeUp();
6641 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6645 for (int num
= 0; num
< 5; num
++) {
6646 GenerateNewFile(&rnd
, &key_idx
, true);
6647 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6650 ASSERT_EQ(0, callback_count
.load());
6652 for (int num
= 0; num
< 5; num
++) {
6653 GenerateNewFile(&rnd
, &key_idx
, true);
6654 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6656 ASSERT_GE(callback_count
.load(), 1);
6658 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6659 sleeping_task_low
.WaitUntilDone();
6662 #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
6663 class WriteStallListener
: public EventListener
{
6665 WriteStallListener() : condition_(WriteStallCondition::kNormal
) {}
6666 void OnStallConditionsChanged(const WriteStallInfo
& info
) override
{
6667 MutexLock
l(&mutex_
);
6668 condition_
= info
.condition
.cur
;
6670 bool CheckCondition(WriteStallCondition expected
) {
6671 MutexLock
l(&mutex_
);
6672 return expected
== condition_
;
6677 WriteStallCondition condition_
;
6680 TEST_F(DBTest
, SoftLimit
) {
6681 Options options
= CurrentOptions();
6683 options
.write_buffer_size
= 100000; // Small write buffer
6684 options
.max_write_buffer_number
= 256;
6685 options
.level0_file_num_compaction_trigger
= 1;
6686 options
.level0_slowdown_writes_trigger
= 3;
6687 options
.level0_stop_writes_trigger
= 999999;
6688 options
.delayed_write_rate
= 20000; // About 200KB/s limited rate
6689 options
.soft_pending_compaction_bytes_limit
= 160000;
6690 options
.target_file_size_base
= 99999999; // All into one file
6691 options
.max_bytes_for_level_base
= 50000;
6692 options
.max_bytes_for_level_multiplier
= 10;
6693 options
.max_background_compactions
= 1;
6694 options
.compression
= kNoCompression
;
6695 WriteStallListener
* listener
= new WriteStallListener();
6696 options
.listeners
.emplace_back(listener
);
6698 // FlushMemtable with opt.wait=true does not wait for
6699 // `OnStallConditionsChanged` being called. The event listener is triggered
6700 // on `JobContext::Clean`, which happens after flush result is installed.
6701 // We use sync point to create a custom WaitForFlush that waits for
6703 port::Mutex flush_mutex
;
6704 port::CondVar
flush_cv(&flush_mutex
);
6705 bool flush_finished
= false;
6706 auto InstallFlushCallback
= [&]() {
6708 MutexLock
l(&flush_mutex
);
6709 flush_finished
= false;
6711 SyncPoint::GetInstance()->SetCallBack(
6712 "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) {
6714 MutexLock
l(&flush_mutex
);
6715 flush_finished
= true;
6717 flush_cv
.SignalAll();
6720 auto WaitForFlush
= [&]() {
6722 MutexLock
l(&flush_mutex
);
6723 while (!flush_finished
) {
6727 SyncPoint::GetInstance()->ClearCallBack(
6728 "DBImpl::BackgroundCallFlush:ContextCleanedUp");
6731 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6735 // Generating 360KB in Level 3
6736 for (int i
= 0; i
< 72; i
++) {
6737 ASSERT_OK(Put(Key(i
), std::string(5000, 'x')));
6739 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6742 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6743 MoveFilesToLevel(3);
6745 // Generating 360KB in Level 2
6746 for (int i
= 0; i
< 72; i
++) {
6747 ASSERT_OK(Put(Key(i
), std::string(5000, 'x')));
6749 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6752 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6753 MoveFilesToLevel(2);
6755 ASSERT_OK(Put(Key(0), ""));
6757 test::SleepingBackgroundTask sleeping_task_low
;
6758 // Block compactions
6759 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
6760 Env::Priority::LOW
);
6761 sleeping_task_low
.WaitUntilSleeping();
6763 // Create 3 L0 files, making score of L0 to be 3.
6764 for (int i
= 0; i
< 3; i
++) {
6765 ASSERT_OK(Put(Key(i
), std::string(5000, 'x')));
6766 ASSERT_OK(Put(Key(100 - i
), std::string(5000, 'x')));
6767 // Flush the file. File size is around 30KB.
6768 InstallFlushCallback();
6769 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6772 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6773 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kDelayed
));
6775 sleeping_task_low
.WakeUp();
6776 sleeping_task_low
.WaitUntilDone();
6777 sleeping_task_low
.Reset();
6778 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6780 // Now there is one L1 file but doesn't trigger soft_rate_limit
6782 // TODO: soft_rate_limit is depreciated. If this test
6783 // relies on soft_rate_limit, then we need to change the test.
6785 // The L1 file size is around 30KB.
6786 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6787 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6788 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kNormal
));
6790 // Only allow one compactin going through.
6791 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6792 "BackgroundCallCompaction:0", [&](void* /*arg*/) {
6793 // Schedule a sleeping task.
6794 sleeping_task_low
.Reset();
6795 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
,
6796 &sleeping_task_low
, Env::Priority::LOW
);
6799 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task_low
,
6800 Env::Priority::LOW
);
6801 sleeping_task_low
.WaitUntilSleeping();
6802 // Create 3 L0 files, making score of L0 to be 3
6803 for (int i
= 0; i
< 3; i
++) {
6804 ASSERT_OK(Put(Key(10 + i
), std::string(5000, 'x')));
6805 ASSERT_OK(Put(Key(90 - i
), std::string(5000, 'x')));
6806 // Flush the file. File size is around 30KB.
6807 InstallFlushCallback();
6808 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6812 // Wake up sleep task to enable compaction to run and waits
6813 // for it to go to sleep state again to make sure one compaction
6815 sleeping_task_low
.WakeUp();
6816 sleeping_task_low
.WaitUntilSleeping();
6818 // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
6819 // Given level multiplier 10, estimated pending compaction is around 100KB
6820 // doesn't trigger soft_pending_compaction_bytes_limit
6821 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6822 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6823 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kNormal
));
6825 // Create 3 L0 files, making score of L0 to be 3, higher than L0.
6826 for (int i
= 0; i
< 3; i
++) {
6827 ASSERT_OK(Put(Key(20 + i
), std::string(5000, 'x')));
6828 ASSERT_OK(Put(Key(80 - i
), std::string(5000, 'x')));
6829 // Flush the file. File size is around 30KB.
6830 InstallFlushCallback();
6831 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6834 // Wake up sleep task to enable compaction to run and waits
6835 // for it to go to sleep state again to make sure one compaction
6837 sleeping_task_low
.WakeUp();
6838 sleeping_task_low
.WaitUntilSleeping();
6840 // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
6841 // L2 size is 360KB, so the estimated level fanout 4, estimated pending
6842 // compaction is around 200KB
6843 // triggerring soft_pending_compaction_bytes_limit
6844 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6845 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6846 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kDelayed
));
6848 sleeping_task_low
.WakeUp();
6849 sleeping_task_low
.WaitUntilSleeping();
6851 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6852 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kNormal
));
6854 // shrink level base so L2 will hit soft limit easier.
6855 ASSERT_OK(dbfull()->SetOptions({
6856 {"max_bytes_for_level_base", "5000"},
6859 ASSERT_OK(Put("", ""));
6861 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6862 ASSERT_TRUE(listener
->CheckCondition(WriteStallCondition::kDelayed
));
6864 sleeping_task_low
.WaitUntilSleeping();
6865 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6866 sleeping_task_low
.WakeUp();
6867 sleeping_task_low
.WaitUntilDone();
6870 TEST_F(DBTest
, LastWriteBufferDelay
) {
6871 Options options
= CurrentOptions();
6873 options
.write_buffer_size
= 100000;
6874 options
.max_write_buffer_number
= 4;
6875 options
.delayed_write_rate
= 20000;
6876 options
.compression
= kNoCompression
;
6877 options
.disable_auto_compactions
= true;
6878 int kNumKeysPerMemtable
= 3;
6879 options
.memtable_factory
.reset(
6880 test::NewSpecialSkipListFactory(kNumKeysPerMemtable
));
6883 test::SleepingBackgroundTask sleeping_task
;
6885 env_
->Schedule(&test::SleepingBackgroundTask::DoSleepTask
, &sleeping_task
,
6886 Env::Priority::HIGH
);
6887 sleeping_task
.WaitUntilSleeping();
6889 // Create 3 L0 files, making score of L0 to be 3.
6890 for (int i
= 0; i
< 3; i
++) {
6891 // Fill one mem table
6892 for (int j
= 0; j
< kNumKeysPerMemtable
; j
++) {
6893 ASSERT_OK(Put(Key(j
), ""));
6895 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6897 // Inserting a new entry would create a new mem table, triggering slow down.
6898 ASSERT_OK(Put(Key(0), ""));
6899 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6901 sleeping_task
.WakeUp();
6902 sleeping_task
.WaitUntilDone();
6904 #endif // !defined(ROCKSDB_LITE) &&
6905 // !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
6907 TEST_F(DBTest
, FailWhenCompressionNotSupportedTest
) {
6908 CompressionType compressions
[] = {kZlibCompression
, kBZip2Compression
,
6909 kLZ4Compression
, kLZ4HCCompression
,
6910 kXpressCompression
};
6911 for (auto comp
: compressions
) {
6912 if (!CompressionTypeSupported(comp
)) {
6913 // not supported, we should fail the Open()
6914 Options options
= CurrentOptions();
6915 options
.compression
= comp
;
6916 ASSERT_TRUE(!TryReopen(options
).ok());
6917 // Try if CreateColumnFamily also fails
6918 options
.compression
= kNoCompression
;
6919 ASSERT_OK(TryReopen(options
));
6920 ColumnFamilyOptions
cf_options(options
);
6921 cf_options
.compression
= comp
;
6922 ColumnFamilyHandle
* handle
;
6923 ASSERT_TRUE(!db_
->CreateColumnFamily(cf_options
, "name", &handle
).ok());
6928 TEST_F(DBTest
, CreateColumnFamilyShouldFailOnIncompatibleOptions
) {
6929 Options options
= CurrentOptions();
6930 options
.max_open_files
= 100;
6933 ColumnFamilyOptions
cf_options(options
);
6934 // ttl is now supported when max_open_files is -1.
6935 cf_options
.ttl
= 3600;
6936 ColumnFamilyHandle
* handle
;
6937 ASSERT_OK(db_
->CreateColumnFamily(cf_options
, "pikachu", &handle
));
6941 #ifndef ROCKSDB_LITE
6942 TEST_F(DBTest
, RowCache
) {
6943 Options options
= CurrentOptions();
6944 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
6945 options
.row_cache
= NewLRUCache(8192);
6946 DestroyAndReopen(options
);
6948 ASSERT_OK(Put("foo", "bar"));
6951 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_HIT
), 0);
6952 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_MISS
), 0);
6953 ASSERT_EQ(Get("foo"), "bar");
6954 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_HIT
), 0);
6955 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_MISS
), 1);
6956 ASSERT_EQ(Get("foo"), "bar");
6957 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_HIT
), 1);
6958 ASSERT_EQ(TestGetTickerCount(options
, ROW_CACHE_MISS
), 1);
6961 TEST_F(DBTest
, PinnableSliceAndRowCache
) {
6962 Options options
= CurrentOptions();
6963 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
6964 options
.row_cache
= NewLRUCache(8192);
6965 DestroyAndReopen(options
);
6967 ASSERT_OK(Put("foo", "bar"));
6970 ASSERT_EQ(Get("foo"), "bar");
6972 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
6976 PinnableSlice pin_slice
;
6977 ASSERT_EQ(Get("foo", &pin_slice
), Status::OK());
6978 ASSERT_EQ(pin_slice
.ToString(), "bar");
6979 // Entry is already in cache, lookup will remove the element from lru
6981 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
6984 // After PinnableSlice destruction element is added back in LRU
6986 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
6990 TEST_F(DBTest
, ReusePinnableSlice
) {
6991 Options options
= CurrentOptions();
6992 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
6993 options
.row_cache
= NewLRUCache(8192);
6994 DestroyAndReopen(options
);
6996 ASSERT_OK(Put("foo", "bar"));
6999 ASSERT_EQ(Get("foo"), "bar");
7001 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7005 PinnableSlice pin_slice
;
7006 ASSERT_EQ(Get("foo", &pin_slice
), Status::OK());
7007 ASSERT_EQ(Get("foo", &pin_slice
), Status::OK());
7008 ASSERT_EQ(pin_slice
.ToString(), "bar");
7010 // Entry is already in cache, lookup will remove the element from lru
7012 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7015 // After PinnableSlice destruction element is added back in LRU
7017 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7021 std::vector
<Slice
> multiget_keys
;
7022 multiget_keys
.push_back("foo");
7023 std::vector
<PinnableSlice
> multiget_values(1);
7024 std::vector
<Status
> statuses({Status::NotFound()});
7026 dbfull()->MultiGet(ropt
, dbfull()->DefaultColumnFamily(),
7027 multiget_keys
.size(), multiget_keys
.data(),
7028 multiget_values
.data(), statuses
.data());
7029 ASSERT_EQ(Status::OK(), statuses
[0]);
7030 dbfull()->MultiGet(ropt
, dbfull()->DefaultColumnFamily(),
7031 multiget_keys
.size(), multiget_keys
.data(),
7032 multiget_values
.data(), statuses
.data());
7033 ASSERT_EQ(Status::OK(), statuses
[0]);
7035 // Entry is already in cache, lookup will remove the element from lru
7037 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7040 // After PinnableSlice destruction element is added back in LRU
7042 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7046 std::vector
<ColumnFamilyHandle
*> multiget_cfs
;
7047 multiget_cfs
.push_back(dbfull()->DefaultColumnFamily());
7048 std::vector
<Slice
> multiget_keys
;
7049 multiget_keys
.push_back("foo");
7050 std::vector
<PinnableSlice
> multiget_values(1);
7051 std::vector
<Status
> statuses({Status::NotFound()});
7053 dbfull()->MultiGet(ropt
, multiget_keys
.size(), multiget_cfs
.data(),
7054 multiget_keys
.data(), multiget_values
.data(),
7056 ASSERT_EQ(Status::OK(), statuses
[0]);
7057 dbfull()->MultiGet(ropt
, multiget_keys
.size(), multiget_cfs
.data(),
7058 multiget_keys
.data(), multiget_values
.data(),
7060 ASSERT_EQ(Status::OK(), statuses
[0]);
7062 // Entry is already in cache, lookup will remove the element from lru
7064 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7067 // After PinnableSlice destruction element is added back in LRU
7069 reinterpret_cast<LRUCache
*>(options
.row_cache
.get())->TEST_GetLRUSize(),
7073 #endif // ROCKSDB_LITE
7075 TEST_F(DBTest
, DeletingOldWalAfterDrop
) {
7076 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7077 {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"},
7078 {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}});
7079 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
7081 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7082 Options options
= CurrentOptions();
7083 options
.max_total_wal_size
= 8192;
7084 options
.compression
= kNoCompression
;
7085 options
.write_buffer_size
= 1 << 20;
7086 options
.level0_file_num_compaction_trigger
= (1 << 30);
7087 options
.level0_slowdown_writes_trigger
= (1 << 30);
7088 options
.level0_stop_writes_trigger
= (1 << 30);
7089 options
.disable_auto_compactions
= true;
7090 DestroyAndReopen(options
);
7091 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7093 CreateColumnFamilies({"cf1", "cf2"}, options
);
7094 ASSERT_OK(Put(0, "key1", DummyString(8192)));
7095 ASSERT_OK(Put(0, "key2", DummyString(8192)));
7096 // the oldest wal should now be getting_flushed
7097 ASSERT_OK(db_
->DropColumnFamily(handles_
[0]));
7098 // all flushes should now do nothing because their CF is dropped
7099 TEST_SYNC_POINT("Test:AllowFlushes");
7100 TEST_SYNC_POINT("Test:WaitForFlush");
7101 uint64_t lognum1
= dbfull()->TEST_LogfileNumber();
7102 ASSERT_OK(Put(1, "key3", DummyString(8192)));
7103 ASSERT_OK(Put(1, "key4", DummyString(8192)));
7104 // new wal should have been created
7105 uint64_t lognum2
= dbfull()->TEST_LogfileNumber();
7106 EXPECT_GT(lognum2
, lognum1
);
7109 TEST_F(DBTest
, UnsupportedManualSync
) {
7110 DestroyAndReopen(CurrentOptions());
7111 env_
->is_wal_sync_thread_safe_
.store(false);
7112 Status s
= db_
->SyncWAL();
7113 ASSERT_TRUE(s
.IsNotSupported());
7116 INSTANTIATE_TEST_CASE_P(DBTestWithParam
, DBTestWithParam
,
7117 ::testing::Combine(::testing::Values(1, 4),
7118 ::testing::Bool()));
7120 TEST_F(DBTest
, PauseBackgroundWorkTest
) {
7121 Options options
= CurrentOptions();
7122 options
.write_buffer_size
= 100000; // Small write buffer
7125 std::vector
<port::Thread
> threads
;
7126 std::atomic
<bool> done(false);
7127 ASSERT_OK(db_
->PauseBackgroundWork());
7128 threads
.emplace_back([&]() {
7130 for (int i
= 0; i
< 10000; ++i
) {
7131 ASSERT_OK(Put(rnd
.RandomString(10), rnd
.RandomString(10)));
7135 env_
->SleepForMicroseconds(200000);
7136 // make sure the thread is not done
7137 ASSERT_FALSE(done
.load());
7138 ASSERT_OK(db_
->ContinueBackgroundWork());
7139 for (auto& t
: threads
) {
7143 ASSERT_TRUE(done
.load());
7146 // Keep spawning short-living threads that create an iterator and quit.
7147 // Meanwhile in another thread keep flushing memtables.
7148 // This used to cause a deadlock.
7149 TEST_F(DBTest
, ThreadLocalPtrDeadlock
) {
7150 std::atomic
<int> flushes_done
{0};
7151 std::atomic
<int> threads_destroyed
{0};
7152 auto done
= [&] { return flushes_done
.load() > 10; };
7154 port::Thread
flushing_thread([&] {
7155 for (int i
= 0; !done(); ++i
) {
7156 ASSERT_OK(db_
->Put(WriteOptions(), Slice("hi"),
7157 Slice(std::to_string(i
).c_str())));
7158 ASSERT_OK(db_
->Flush(FlushOptions()));
7159 int cnt
= ++flushes_done
;
7160 fprintf(stderr
, "Flushed %d times\n", cnt
);
7164 std::vector
<port::Thread
> thread_spawning_threads(10);
7165 for (auto& t
: thread_spawning_threads
) {
7166 t
= port::Thread([&] {
7169 port::Thread
tmp_thread([&] {
7170 auto it
= db_
->NewIterator(ReadOptions());
7171 ASSERT_OK(it
->status());
7176 ++threads_destroyed
;
7181 for (auto& t
: thread_spawning_threads
) {
7184 flushing_thread
.join();
7185 fprintf(stderr
, "Done. Flushed %d times, destroyed %d threads\n",
7186 flushes_done
.load(), threads_destroyed
.load());
7189 TEST_F(DBTest
, LargeBlockSizeTest
) {
7190 Options options
= CurrentOptions();
7191 CreateAndReopenWithCF({"pikachu"}, options
);
7192 ASSERT_OK(Put(0, "foo", "bar"));
7193 BlockBasedTableOptions table_options
;
7194 table_options
.block_size
= 8LL * 1024 * 1024 * 1024LL;
7195 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
7196 ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options
));
7199 #ifndef ROCKSDB_LITE
7201 TEST_F(DBTest
, CreationTimeOfOldestFile
) {
7202 const int kNumKeysPerFile
= 32;
7203 const int kNumLevelFiles
= 2;
7204 const int kValueSize
= 100;
7206 Options options
= CurrentOptions();
7207 options
.max_open_files
= -1;
7208 env_
->SetMockSleep();
7211 // NOTE: Presumed unnecessary and removed: resetting mock time in env
7213 DestroyAndReopen(options
);
7215 bool set_file_creation_time_to_zero
= true;
7219 env_
->GetCurrentTime(&time_1
);
7220 const uint64_t uint_time_1
= static_cast<uint64_t>(time_1
);
7223 env_
->MockSleepForSeconds(50 * 60 * 60);
7226 env_
->GetCurrentTime(&time_2
);
7227 const uint64_t uint_time_2
= static_cast<uint64_t>(time_2
);
7229 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7230 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg
) {
7231 TableProperties
* props
= reinterpret_cast<TableProperties
*>(arg
);
7232 if (set_file_creation_time_to_zero
) {
7234 props
->file_creation_time
= 0;
7236 } else if (idx
== 1) {
7237 props
->file_creation_time
= uint_time_1
;
7242 props
->file_creation_time
= uint_time_1
;
7244 } else if (idx
== 1) {
7245 props
->file_creation_time
= uint_time_2
;
7249 // Set file creation time in manifest all to 0.
7250 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7251 "FileMetaData::FileMetaData", [&](void* arg
) {
7252 FileMetaData
* meta
= static_cast<FileMetaData
*>(arg
);
7253 meta
->file_creation_time
= 0;
7255 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7258 for (int i
= 0; i
< kNumLevelFiles
; ++i
) {
7259 for (int j
= 0; j
< kNumKeysPerFile
; ++j
) {
7261 Put(Key(i
* kNumKeysPerFile
+ j
), rnd
.RandomString(kValueSize
)));
7266 // At this point there should be 2 files, one with file_creation_time = 0 and
7267 // the other non-zero. GetCreationTimeOfOldestFile API should return 0.
7268 uint64_t creation_time
;
7269 Status s1
= dbfull()->GetCreationTimeOfOldestFile(&creation_time
);
7270 ASSERT_EQ(0, creation_time
);
7271 ASSERT_EQ(s1
, Status::OK());
7273 // Testing with non-zero file creation time.
7274 set_file_creation_time_to_zero
= false;
7275 options
= CurrentOptions();
7276 options
.max_open_files
= -1;
7279 // NOTE: Presumed unnecessary and removed: resetting mock time in env
7281 DestroyAndReopen(options
);
7283 for (int i
= 0; i
< kNumLevelFiles
; ++i
) {
7284 for (int j
= 0; j
< kNumKeysPerFile
; ++j
) {
7286 Put(Key(i
* kNumKeysPerFile
+ j
), rnd
.RandomString(kValueSize
)));
7291 // At this point there should be 2 files with non-zero file creation time.
7292 // GetCreationTimeOfOldestFile API should return non-zero value.
7294 Status s2
= dbfull()->GetCreationTimeOfOldestFile(&ctime
);
7295 ASSERT_EQ(uint_time_1
, ctime
);
7296 ASSERT_EQ(s2
, Status::OK());
7298 // Testing with max_open_files != -1
7299 options
= CurrentOptions();
7300 options
.max_open_files
= 10;
7301 DestroyAndReopen(options
);
7302 Status s3
= dbfull()->GetCreationTimeOfOldestFile(&ctime
);
7303 ASSERT_EQ(s3
, Status::NotSupported());
7305 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7308 TEST_F(DBTest
, MemoryUsageWithMaxWriteBufferSizeToMaintain
) {
7309 Options options
= CurrentOptions();
7310 options
.max_write_buffer_size_to_maintain
= 10000;
7311 options
.write_buffer_size
= 160000;
7314 bool memory_limit_exceeded
= false;
7316 ColumnFamilyData
* cfd
=
7317 static_cast<ColumnFamilyHandleImpl
*>(db_
->DefaultColumnFamily())->cfd();
7319 for (int i
= 0; i
< 1000; i
++) {
7320 std::string value
= rnd
.RandomString(1000);
7321 ASSERT_OK(Put("keykey_" + std::to_string(i
), value
));
7323 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
7325 const uint64_t cur_active_mem
= cfd
->mem()->ApproximateMemoryUsage();
7326 const uint64_t size_all_mem_table
=
7327 cur_active_mem
+ cfd
->imm()->ApproximateMemoryUsage();
7329 // Errors out if memory usage keeps on increasing beyond the limit.
7330 // Once memory limit exceeds, memory_limit_exceeded is set and if
7331 // size_all_mem_table doesn't drop out in the next write then it errors out
7332 // (not expected behaviour). If memory usage drops then
7333 // memory_limit_exceeded is set to false.
7334 if ((size_all_mem_table
> cur_active_mem
) &&
7336 static_cast<uint64_t>(options
.max_write_buffer_size_to_maintain
)) &&
7337 (size_all_mem_table
>
7338 static_cast<uint64_t>(options
.max_write_buffer_size_to_maintain
) +
7339 options
.write_buffer_size
)) {
7340 ASSERT_FALSE(memory_limit_exceeded
);
7341 memory_limit_exceeded
= true;
7343 memory_limit_exceeded
= false;
7348 TEST_F(DBTest
, ShuttingDownNotBlockStalledWrites
) {
7349 Options options
= CurrentOptions();
7350 options
.disable_auto_compactions
= true;
7354 for (int i
= 0; i
< 20; i
++) {
7355 ASSERT_OK(Put("key_" + std::to_string(i
), rnd
.RandomString(10)));
7358 ASSERT_EQ(GetSstFileCount(dbname_
), 20);
7360 // We need !disable_auto_compactions for writes to stall but also want to
7361 // delay compaction so stalled writes unblocked due to kShutdownInProgress. BG
7362 // compaction will first wait for the sync point
7363 // DBTest::ShuttingDownNotBlockStalledWrites. Then it waits extra 2 sec to
7364 // allow CancelAllBackgroundWork() to set shutting_down_.
7365 SyncPoint::GetInstance()->SetCallBack(
7366 "BackgroundCallCompaction:0",
7367 [&](void* /* arg */) { env_
->SleepForMicroseconds(2 * 1000 * 1000); });
7368 SyncPoint::GetInstance()->LoadDependency(
7369 {{"DBImpl::DelayWrite:Wait", "DBTest::ShuttingDownNotBlockStalledWrites"},
7370 {"DBTest::ShuttingDownNotBlockStalledWrites",
7371 "BackgroundCallCompaction:0"}});
7372 SyncPoint::GetInstance()->EnableProcessing();
7374 options
.level0_stop_writes_trigger
= 20;
7375 options
.disable_auto_compactions
= false;
7378 std::thread
thd([&]() {
7379 Status s
= Put("key_" + std::to_string(101), "101");
7380 ASSERT_EQ(s
.code(), Status::kShutdownInProgress
);
7383 TEST_SYNC_POINT("DBTest::ShuttingDownNotBlockStalledWrites");
7384 CancelAllBackgroundWork(db_
, true);
7390 } // namespace ROCKSDB_NAMESPACE
7392 int main(int argc
, char** argv
) {
7393 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7394 ::testing::InitGoogleTest(&argc
, argv
);
7395 RegisterCustomObjects(argc
, argv
);
7396 return RUN_ALL_TESTS();