]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_test.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rocksdb / db / db_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 // Introduction of SyncPoint effectively disabled building and running this test
11 // in Release build.
12 // which is a pity, it is a good test
13 #include <fcntl.h>
14
15 #include <algorithm>
16 #include <set>
17 #include <thread>
18 #include <unordered_set>
19 #include <utility>
20
21 #ifndef OS_WIN
22 #include <unistd.h>
23 #endif
24 #ifdef OS_SOLARIS
25 #include <alloca.h>
26 #endif
27
28 #include "cache/lru_cache.h"
29 #include "db/blob/blob_index.h"
30 #include "db/blob/blob_log_format.h"
31 #include "db/db_impl/db_impl.h"
32 #include "db/db_test_util.h"
33 #include "db/dbformat.h"
34 #include "db/job_context.h"
35 #include "db/version_set.h"
36 #include "db/write_batch_internal.h"
37 #include "env/mock_env.h"
38 #include "file/filename.h"
39 #include "monitoring/thread_status_util.h"
40 #include "port/port.h"
41 #include "port/stack_trace.h"
42 #include "rocksdb/cache.h"
43 #include "rocksdb/compaction_filter.h"
44 #include "rocksdb/convenience.h"
45 #include "rocksdb/db.h"
46 #include "rocksdb/env.h"
47 #include "rocksdb/experimental.h"
48 #include "rocksdb/filter_policy.h"
49 #include "rocksdb/options.h"
50 #include "rocksdb/perf_context.h"
51 #include "rocksdb/slice.h"
52 #include "rocksdb/slice_transform.h"
53 #include "rocksdb/snapshot.h"
54 #include "rocksdb/table.h"
55 #include "rocksdb/table_properties.h"
56 #include "rocksdb/thread_status.h"
57 #include "rocksdb/types.h"
58 #include "rocksdb/utilities/checkpoint.h"
59 #include "rocksdb/utilities/optimistic_transaction_db.h"
60 #include "rocksdb/utilities/write_batch_with_index.h"
61 #include "table/mock_table.h"
62 #include "table/scoped_arena_iterator.h"
63 #include "test_util/sync_point.h"
64 #include "test_util/testharness.h"
65 #include "test_util/testutil.h"
66 #include "util/compression.h"
67 #include "util/mutexlock.h"
68 #include "util/random.h"
69 #include "util/rate_limiter.h"
70 #include "util/string_util.h"
71 #include "utilities/merge_operators.h"
72
73 namespace ROCKSDB_NAMESPACE {
74
75 // Note that whole DBTest and its child classes disable fsync on files
76 // and directories for speed.
77 // If fsync needs to be covered in a test, put it in other places.
78 class DBTest : public DBTestBase {
79 public:
80 DBTest() : DBTestBase("db_test", /*env_do_fsync=*/false) {}
81 };
82
83 class DBTestWithParam
84 : public DBTest,
85 public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
86 public:
87 DBTestWithParam() {
88 max_subcompactions_ = std::get<0>(GetParam());
89 exclusive_manual_compaction_ = std::get<1>(GetParam());
90 }
91
92 // Required if inheriting from testing::WithParamInterface<>
93 static void SetUpTestCase() {}
94 static void TearDownTestCase() {}
95
96 uint32_t max_subcompactions_;
97 bool exclusive_manual_compaction_;
98 };
99
100 TEST_F(DBTest, MockEnvTest) {
101 std::unique_ptr<MockEnv> env{MockEnv::Create(Env::Default())};
102 Options options;
103 options.create_if_missing = true;
104 options.env = env.get();
105 DB* db;
106
107 const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
108 const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
109
110 ASSERT_OK(DB::Open(options, "/dir/db", &db));
111 for (size_t i = 0; i < 3; ++i) {
112 ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
113 }
114
115 for (size_t i = 0; i < 3; ++i) {
116 std::string res;
117 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
118 ASSERT_TRUE(res == vals[i]);
119 }
120
121 Iterator* iterator = db->NewIterator(ReadOptions());
122 iterator->SeekToFirst();
123 for (size_t i = 0; i < 3; ++i) {
124 ASSERT_TRUE(iterator->Valid());
125 ASSERT_TRUE(keys[i] == iterator->key());
126 ASSERT_TRUE(vals[i] == iterator->value());
127 iterator->Next();
128 }
129 ASSERT_TRUE(!iterator->Valid());
130 delete iterator;
131
132 // TEST_FlushMemTable() is not supported in ROCKSDB_LITE
133 #ifndef ROCKSDB_LITE
134 DBImpl* dbi = static_cast_with_check<DBImpl>(db);
135 ASSERT_OK(dbi->TEST_FlushMemTable());
136
137 for (size_t i = 0; i < 3; ++i) {
138 std::string res;
139 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
140 ASSERT_TRUE(res == vals[i]);
141 }
142 #endif // ROCKSDB_LITE
143
144 delete db;
145 }
146
147 // NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
148 // defined.
149 #ifndef ROCKSDB_LITE
150 TEST_F(DBTest, MemEnvTest) {
151 std::unique_ptr<Env> env{NewMemEnv(Env::Default())};
152 Options options;
153 options.create_if_missing = true;
154 options.env = env.get();
155 DB* db;
156
157 const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
158 const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
159
160 ASSERT_OK(DB::Open(options, "/dir/db", &db));
161 for (size_t i = 0; i < 3; ++i) {
162 ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
163 }
164
165 for (size_t i = 0; i < 3; ++i) {
166 std::string res;
167 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
168 ASSERT_TRUE(res == vals[i]);
169 }
170
171 Iterator* iterator = db->NewIterator(ReadOptions());
172 iterator->SeekToFirst();
173 for (size_t i = 0; i < 3; ++i) {
174 ASSERT_TRUE(iterator->Valid());
175 ASSERT_TRUE(keys[i] == iterator->key());
176 ASSERT_TRUE(vals[i] == iterator->value());
177 iterator->Next();
178 }
179 ASSERT_TRUE(!iterator->Valid());
180 delete iterator;
181
182 DBImpl* dbi = static_cast_with_check<DBImpl>(db);
183 ASSERT_OK(dbi->TEST_FlushMemTable());
184
185 for (size_t i = 0; i < 3; ++i) {
186 std::string res;
187 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
188 ASSERT_TRUE(res == vals[i]);
189 }
190
191 delete db;
192
193 options.create_if_missing = false;
194 ASSERT_OK(DB::Open(options, "/dir/db", &db));
195 for (size_t i = 0; i < 3; ++i) {
196 std::string res;
197 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
198 ASSERT_TRUE(res == vals[i]);
199 }
200 delete db;
201 }
202 #endif // ROCKSDB_LITE
203
204 TEST_F(DBTest, WriteEmptyBatch) {
205 Options options = CurrentOptions();
206 options.env = env_;
207 options.write_buffer_size = 100000;
208 CreateAndReopenWithCF({"pikachu"}, options);
209
210 ASSERT_OK(Put(1, "foo", "bar"));
211 WriteOptions wo;
212 wo.sync = true;
213 wo.disableWAL = false;
214 WriteBatch empty_batch;
215 ASSERT_OK(dbfull()->Write(wo, &empty_batch));
216
217 // make sure we can re-open it.
218 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
219 ASSERT_EQ("bar", Get(1, "foo"));
220 }
221
222 TEST_F(DBTest, SkipDelay) {
223 Options options = CurrentOptions();
224 options.env = env_;
225 options.write_buffer_size = 100000;
226 CreateAndReopenWithCF({"pikachu"}, options);
227
228 for (bool sync : {true, false}) {
229 for (bool disableWAL : {true, false}) {
230 if (sync && disableWAL) {
231 // sync and disableWAL is incompatible.
232 continue;
233 }
234 // Use a small number to ensure a large delay that is still effective
235 // when we do Put
236 // TODO(myabandeh): this is time dependent and could potentially make
237 // the test flaky
238 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
239 std::atomic<int> sleep_count(0);
240 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
241 "DBImpl::DelayWrite:Sleep",
242 [&](void* /*arg*/) { sleep_count.fetch_add(1); });
243 std::atomic<int> wait_count(0);
244 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
245 "DBImpl::DelayWrite:Wait",
246 [&](void* /*arg*/) { wait_count.fetch_add(1); });
247 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
248
249 WriteOptions wo;
250 wo.sync = sync;
251 wo.disableWAL = disableWAL;
252 wo.no_slowdown = true;
253 // Large enough to exceed allowance for one time interval
254 std::string large_value(1024, 'x');
255 // Perhaps ideally this first write would fail because of delay, but
256 // the current implementation does not guarantee that.
257 dbfull()->Put(wo, "foo", large_value).PermitUncheckedError();
258 // We need the 2nd write to trigger delay. This is because delay is
259 // estimated based on the last write size which is 0 for the first write.
260 ASSERT_NOK(dbfull()->Put(wo, "foo2", large_value));
261 ASSERT_GE(sleep_count.load(), 0);
262 ASSERT_GE(wait_count.load(), 0);
263 token.reset();
264
265 token = dbfull()->TEST_write_controler().GetDelayToken(1000000);
266 wo.no_slowdown = false;
267 ASSERT_OK(dbfull()->Put(wo, "foo3", large_value));
268 ASSERT_GE(sleep_count.load(), 1);
269 token.reset();
270 }
271 }
272 }
273
274 TEST_F(DBTest, MixedSlowdownOptions) {
275 Options options = CurrentOptions();
276 options.env = env_;
277 options.write_buffer_size = 100000;
278 CreateAndReopenWithCF({"pikachu"}, options);
279 std::vector<port::Thread> threads;
280 std::atomic<int> thread_num(0);
281
282 std::function<void()> write_slowdown_func = [&]() {
283 int a = thread_num.fetch_add(1);
284 std::string key = "foo" + std::to_string(a);
285 WriteOptions wo;
286 wo.no_slowdown = false;
287 ASSERT_OK(dbfull()->Put(wo, key, "bar"));
288 };
289 std::function<void()> write_no_slowdown_func = [&]() {
290 int a = thread_num.fetch_add(1);
291 std::string key = "foo" + std::to_string(a);
292 WriteOptions wo;
293 wo.no_slowdown = true;
294 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
295 };
296 // Use a small number to ensure a large delay that is still effective
297 // when we do Put
298 // TODO(myabandeh): this is time dependent and could potentially make
299 // the test flaky
300 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
301 std::atomic<int> sleep_count(0);
302 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
303 "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) {
304 sleep_count.fetch_add(1);
305 if (threads.empty()) {
306 for (int i = 0; i < 2; ++i) {
307 threads.emplace_back(write_slowdown_func);
308 }
309 for (int i = 0; i < 2; ++i) {
310 threads.emplace_back(write_no_slowdown_func);
311 }
312 }
313 });
314 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
315
316 WriteOptions wo;
317 wo.sync = false;
318 wo.disableWAL = false;
319 wo.no_slowdown = false;
320 ASSERT_OK(dbfull()->Put(wo, "foo", "bar"));
321 // We need the 2nd write to trigger delay. This is because delay is
322 // estimated based on the last write size which is 0 for the first write.
323 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
324 token.reset();
325
326 for (auto& t : threads) {
327 t.join();
328 }
329 ASSERT_GE(sleep_count.load(), 1);
330
331 wo.no_slowdown = true;
332 ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
333 }
334
335 TEST_F(DBTest, MixedSlowdownOptionsInQueue) {
336 Options options = CurrentOptions();
337 options.env = env_;
338 options.write_buffer_size = 100000;
339 CreateAndReopenWithCF({"pikachu"}, options);
340 std::vector<port::Thread> threads;
341 std::atomic<int> thread_num(0);
342
343 std::function<void()> write_no_slowdown_func = [&]() {
344 int a = thread_num.fetch_add(1);
345 std::string key = "foo" + std::to_string(a);
346 WriteOptions wo;
347 wo.no_slowdown = true;
348 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
349 };
350 // Use a small number to ensure a large delay that is still effective
351 // when we do Put
352 // TODO(myabandeh): this is time dependent and could potentially make
353 // the test flaky
354 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
355 std::atomic<int> sleep_count(0);
356 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
357 "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) {
358 sleep_count.fetch_add(1);
359 if (threads.empty()) {
360 for (int i = 0; i < 2; ++i) {
361 threads.emplace_back(write_no_slowdown_func);
362 }
363 // Sleep for 2s to allow the threads to insert themselves into the
364 // write queue
365 env_->SleepForMicroseconds(3000000ULL);
366 }
367 });
368 std::atomic<int> wait_count(0);
369 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
370 "DBImpl::DelayWrite:Wait",
371 [&](void* /*arg*/) { wait_count.fetch_add(1); });
372 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
373
374 WriteOptions wo;
375 wo.sync = false;
376 wo.disableWAL = false;
377 wo.no_slowdown = false;
378 ASSERT_OK(dbfull()->Put(wo, "foo", "bar"));
379 // We need the 2nd write to trigger delay. This is because delay is
380 // estimated based on the last write size which is 0 for the first write.
381 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
382 token.reset();
383
384 for (auto& t : threads) {
385 t.join();
386 }
387 ASSERT_EQ(sleep_count.load(), 1);
388 ASSERT_GE(wait_count.load(), 0);
389 }
390
391 TEST_F(DBTest, MixedSlowdownOptionsStop) {
392 Options options = CurrentOptions();
393 options.env = env_;
394 options.write_buffer_size = 100000;
395 CreateAndReopenWithCF({"pikachu"}, options);
396 std::vector<port::Thread> threads;
397 std::atomic<int> thread_num(0);
398
399 std::function<void()> write_slowdown_func = [&]() {
400 int a = thread_num.fetch_add(1);
401 std::string key = "foo" + std::to_string(a);
402 WriteOptions wo;
403 wo.no_slowdown = false;
404 ASSERT_OK(dbfull()->Put(wo, key, "bar"));
405 };
406 std::function<void()> write_no_slowdown_func = [&]() {
407 int a = thread_num.fetch_add(1);
408 std::string key = "foo" + std::to_string(a);
409 WriteOptions wo;
410 wo.no_slowdown = true;
411 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
412 };
413 std::function<void()> wakeup_writer = [&]() {
414 dbfull()->mutex_.Lock();
415 dbfull()->bg_cv_.SignalAll();
416 dbfull()->mutex_.Unlock();
417 };
418 // Use a small number to ensure a large delay that is still effective
419 // when we do Put
420 // TODO(myabandeh): this is time dependent and could potentially make
421 // the test flaky
422 auto token = dbfull()->TEST_write_controler().GetStopToken();
423 std::atomic<int> wait_count(0);
424 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
425 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
426 wait_count.fetch_add(1);
427 if (threads.empty()) {
428 for (int i = 0; i < 2; ++i) {
429 threads.emplace_back(write_slowdown_func);
430 }
431 for (int i = 0; i < 2; ++i) {
432 threads.emplace_back(write_no_slowdown_func);
433 }
434 // Sleep for 2s to allow the threads to insert themselves into the
435 // write queue
436 env_->SleepForMicroseconds(3000000ULL);
437 }
438 token.reset();
439 threads.emplace_back(wakeup_writer);
440 });
441 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
442
443 WriteOptions wo;
444 wo.sync = false;
445 wo.disableWAL = false;
446 wo.no_slowdown = false;
447 ASSERT_OK(dbfull()->Put(wo, "foo", "bar"));
448 // We need the 2nd write to trigger delay. This is because delay is
449 // estimated based on the last write size which is 0 for the first write.
450 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
451 token.reset();
452
453 for (auto& t : threads) {
454 t.join();
455 }
456 ASSERT_GE(wait_count.load(), 1);
457
458 wo.no_slowdown = true;
459 ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
460 }
461 #ifndef ROCKSDB_LITE
462
463 TEST_F(DBTest, LevelLimitReopen) {
464 Options options = CurrentOptions();
465 CreateAndReopenWithCF({"pikachu"}, options);
466
467 const std::string value(1024 * 1024, ' ');
468 int i = 0;
469 while (NumTableFilesAtLevel(2, 1) == 0) {
470 ASSERT_OK(Put(1, Key(i++), value));
471 }
472
473 options.num_levels = 1;
474 options.max_bytes_for_level_multiplier_additional.resize(1, 1);
475 Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
476 ASSERT_EQ(s.IsInvalidArgument(), true);
477 ASSERT_EQ(s.ToString(),
478 "Invalid argument: db has more levels than options.num_levels");
479
480 options.num_levels = 10;
481 options.max_bytes_for_level_multiplier_additional.resize(10, 1);
482 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
483 }
484 #endif // ROCKSDB_LITE
485
486 #ifndef ROCKSDB_LITE
487 TEST_F(DBTest, LevelReopenWithFIFO) {
488 const int kLevelCount = 4;
489 const int kKeyCount = 5;
490 const int kTotalSstFileCount = kLevelCount * kKeyCount;
491 const int kCF = 1;
492
493 Options options = CurrentOptions();
494 // Config level0_file_num_compaction_trigger to prevent L0 files being
495 // automatically compacted while we are constructing a LSM tree structure
496 // to test multi-level FIFO compaction.
497 options.level0_file_num_compaction_trigger = kKeyCount + 1;
498 CreateAndReopenWithCF({"pikachu"}, options);
499
500 // The expected number of files per level after each file creation.
501 const std::string expected_files_per_level[kLevelCount][kKeyCount] = {
502 {"0,0,0,1", "0,0,0,2", "0,0,0,3", "0,0,0,4", "0,0,0,5"},
503 {"0,0,1,5", "0,0,2,5", "0,0,3,5", "0,0,4,5", "0,0,5,5"},
504 {"0,1,5,5", "0,2,5,5", "0,3,5,5", "0,4,5,5", "0,5,5,5"},
505 {"1,5,5,5", "2,5,5,5", "3,5,5,5", "4,5,5,5", "5,5,5,5"},
506 };
507
508 const std::string expected_entries[kKeyCount][kLevelCount + 1] = {
509 {"[ ]", "[ a3 ]", "[ a2, a3 ]", "[ a1, a2, a3 ]", "[ a0, a1, a2, a3 ]"},
510 {"[ ]", "[ b3 ]", "[ b2, b3 ]", "[ b1, b2, b3 ]", "[ b0, b1, b2, b3 ]"},
511 {"[ ]", "[ c3 ]", "[ c2, c3 ]", "[ c1, c2, c3 ]", "[ c0, c1, c2, c3 ]"},
512 {"[ ]", "[ d3 ]", "[ d2, d3 ]", "[ d1, d2, d3 ]", "[ d0, d1, d2, d3 ]"},
513 {"[ ]", "[ e3 ]", "[ e2, e3 ]", "[ e1, e2, e3 ]", "[ e0, e1, e2, e3 ]"},
514 };
515
516 // The loop below creates the following LSM tree where each (k, v) pair
517 // represents a file that contains that entry. When a file is created,
518 // the db is reopend with FIFO compaction and verified the LSM tree
519 // structure is still the same.
520 //
521 // The resulting LSM tree will contain 5 different keys. Each key as
522 // 4 different versions, located in different level.
523 //
524 // L0: (e, e0) (d, d0) (c, c0) (b, b0) (a, a0)
525 // L1: (a, a1) (b, b1) (c, c1) (d, d1) (e, e1)
526 // L2: (a, a2) (b, b2) (c, c2) (d, d2) (e, e2)
527 // L3: (a, a3) (b, b3) (c, c3) (d, d3) (e, e3)
528 for (int l = 0; l < kLevelCount; ++l) {
529 int level = kLevelCount - 1 - l;
530 for (int p = 0; p < kKeyCount; ++p) {
531 std::string put_key = std::string(1, char('a' + p));
532 ASSERT_OK(Put(kCF, put_key, put_key + std::to_string(level)));
533 ASSERT_OK(Flush(kCF));
534 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
535 for (int g = 0; g < kKeyCount; ++g) {
536 int entry_count = (p >= g) ? l + 1 : l;
537 std::string get_key = std::string(1, char('a' + g));
538 CheckAllEntriesWithFifoReopen(expected_entries[g][entry_count], get_key,
539 kCF, {"pikachu"}, options);
540 }
541 if (level != 0) {
542 MoveFilesToLevel(level, kCF);
543 for (int g = 0; g < kKeyCount; ++g) {
544 int entry_count = (p >= g) ? l + 1 : l;
545 std::string get_key = std::string(1, char('a' + g));
546 CheckAllEntriesWithFifoReopen(expected_entries[g][entry_count],
547 get_key, kCF, {"pikachu"}, options);
548 }
549 }
550 ASSERT_EQ(expected_files_per_level[l][p], FilesPerLevel(kCF));
551 }
552 }
553
554 // The expected number of sst files in each level after each FIFO compaction
555 // that deletes the oldest sst file.
556 const std::string expected_files_per_level_after_fifo[] = {
557 "5,5,5,4", "5,5,5,3", "5,5,5,2", "5,5,5,1", "5,5,5", "5,5,4", "5,5,3",
558 "5,5,2", "5,5,1", "5,5", "5,4", "5,3", "5,2", "5,1",
559 "5", "4", "3", "2", "1", "",
560 };
561
562 // The expected value entries of each key after each FIFO compaction.
563 // This verifies whether FIFO removes the file with the smallest key in non-L0
564 // files first then the oldest files in L0.
565 const std::string expected_entries_after_fifo[kKeyCount][kLevelCount + 1] = {
566 {"[ a0, a1, a2, a3 ]", "[ a0, a1, a2 ]", "[ a0, a1 ]", "[ a0 ]", "[ ]"},
567 {"[ b0, b1, b2, b3 ]", "[ b0, b1, b2 ]", "[ b0, b1 ]", "[ b0 ]", "[ ]"},
568 {"[ c0, c1, c2, c3 ]", "[ c0, c1, c2 ]", "[ c0, c1 ]", "[ c0 ]", "[ ]"},
569 {"[ d0, d1, d2, d3 ]", "[ d0, d1, d2 ]", "[ d0, d1 ]", "[ d0 ]", "[ ]"},
570 {"[ e0, e1, e2, e3 ]", "[ e0, e1, e2 ]", "[ e0, e1 ]", "[ e0 ]", "[ ]"},
571 };
572
573 // In the 2nd phase, we reopen the DB with FIFO compaction. In each reopen,
574 // we config max_table_files_size so that FIFO will remove exactly one file
575 // at a time upon compaction, and we will use it to verify whether the sst
576 // files are deleted in the correct order.
577 for (int i = 0; i < kTotalSstFileCount; ++i) {
578 uint64_t total_sst_files_size = 0;
579 ASSERT_TRUE(dbfull()->GetIntProperty(
580 handles_[1], "rocksdb.total-sst-files-size", &total_sst_files_size));
581 ASSERT_TRUE(total_sst_files_size > 0);
582
583 Options fifo_options(options);
584 fifo_options.compaction_style = kCompactionStyleFIFO;
585 options.create_if_missing = false;
586 fifo_options.max_open_files = -1;
587 fifo_options.disable_auto_compactions = false;
588 // Config max_table_files_size to be total_sst_files_size - 1 so that
589 // FIFO will delete one file.
590 fifo_options.compaction_options_fifo.max_table_files_size =
591 total_sst_files_size - 1;
592 ASSERT_OK(
593 TryReopenWithColumnFamilies({"default", "pikachu"}, fifo_options));
594 // For FIFO to pick a compaction
595 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
596 ASSERT_OK(dbfull()->TEST_WaitForCompact(false));
597 for (int g = 0; g < kKeyCount; ++g) {
598 std::string get_key = std::string(1, char('a' + g));
599 int status_index = i / kKeyCount;
600 if ((i % kKeyCount) >= g) {
601 // If true, then it means the sst file containing the get_key in the
602 // current level has already been deleted, so we need to move the
603 // status_index for checking the expected value.
604 status_index++;
605 }
606 CheckAllEntriesWithFifoReopen(
607 expected_entries_after_fifo[g][status_index], get_key, kCF,
608 {"pikachu"}, options);
609 }
610 ASSERT_EQ(expected_files_per_level_after_fifo[i], FilesPerLevel(kCF));
611 }
612 }
613 #endif // !ROCKSDB_LITE
614
615 TEST_F(DBTest, PutSingleDeleteGet) {
616 do {
617 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
618 ASSERT_OK(Put(1, "foo", "v1"));
619 ASSERT_EQ("v1", Get(1, "foo"));
620 ASSERT_OK(Put(1, "foo2", "v2"));
621 ASSERT_EQ("v2", Get(1, "foo2"));
622 ASSERT_OK(SingleDelete(1, "foo"));
623 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
624 // Skip FIFO and universal compaction because they do not apply to the test
625 // case. Skip MergePut because single delete does not get removed when it
626 // encounters a merge.
627 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
628 kSkipMergePut));
629 }
630
631 TEST_F(DBTest, ReadFromPersistedTier) {
632 do {
633 Random rnd(301);
634 Options options = CurrentOptions();
635 for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
636 CreateAndReopenWithCF({"pikachu"}, options);
637 WriteOptions wopt;
638 wopt.disableWAL = (disableWAL == 1);
639 // 1st round: put but not flush
640 ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
641 ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
642 ASSERT_EQ("first", Get(1, "foo"));
643 ASSERT_EQ("one", Get(1, "bar"));
644
645 // Read directly from persited data.
646 ReadOptions ropt;
647 ropt.read_tier = kPersistedTier;
648 std::string value;
649 if (wopt.disableWAL) {
650 // as data has not yet being flushed, we expect not found.
651 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
652 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
653 } else {
654 ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
655 ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
656 }
657
658 // Multiget
659 std::vector<ColumnFamilyHandle*> multiget_cfs;
660 multiget_cfs.push_back(handles_[1]);
661 multiget_cfs.push_back(handles_[1]);
662 std::vector<Slice> multiget_keys;
663 multiget_keys.push_back("foo");
664 multiget_keys.push_back("bar");
665 std::vector<std::string> multiget_values;
666 auto statuses =
667 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
668 if (wopt.disableWAL) {
669 ASSERT_TRUE(statuses[0].IsNotFound());
670 ASSERT_TRUE(statuses[1].IsNotFound());
671 } else {
672 ASSERT_OK(statuses[0]);
673 ASSERT_OK(statuses[1]);
674 }
675
676 // 2nd round: flush and put a new value in memtable.
677 ASSERT_OK(Flush(1));
678 ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
679
680 // once the data has been flushed, we are able to get the
681 // data when kPersistedTier is used.
682 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
683 ASSERT_EQ(value, "first");
684 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
685 ASSERT_EQ(value, "one");
686 if (wopt.disableWAL) {
687 ASSERT_TRUE(
688 db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
689 } else {
690 ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
691 ASSERT_EQ(value, "hello");
692 }
693
694 // Expect same result in multiget
695 multiget_cfs.push_back(handles_[1]);
696 multiget_keys.push_back("rocksdb");
697 statuses =
698 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
699 ASSERT_TRUE(statuses[0].ok());
700 ASSERT_EQ("first", multiget_values[0]);
701 ASSERT_TRUE(statuses[1].ok());
702 ASSERT_EQ("one", multiget_values[1]);
703 if (wopt.disableWAL) {
704 ASSERT_TRUE(statuses[2].IsNotFound());
705 } else {
706 ASSERT_OK(statuses[2]);
707 }
708
709 // 3rd round: delete and flush
710 ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
711 Flush(1);
712 ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
713
714 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
715 if (wopt.disableWAL) {
716 // Still expect finding the value as its delete has not yet being
717 // flushed.
718 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
719 ASSERT_EQ(value, "one");
720 } else {
721 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
722 }
723 ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
724 ASSERT_EQ(value, "hello");
725
726 statuses =
727 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
728 ASSERT_TRUE(statuses[0].IsNotFound());
729 if (wopt.disableWAL) {
730 ASSERT_TRUE(statuses[1].ok());
731 ASSERT_EQ("one", multiget_values[1]);
732 } else {
733 ASSERT_TRUE(statuses[1].IsNotFound());
734 }
735 ASSERT_TRUE(statuses[2].ok());
736 ASSERT_EQ("hello", multiget_values[2]);
737 if (wopt.disableWAL == 0) {
738 DestroyAndReopen(options);
739 }
740 }
741 } while (ChangeOptions());
742 }
743
744 TEST_F(DBTest, SingleDeleteFlush) {
745 // Test to check whether flushing preserves a single delete hidden
746 // behind a put.
747 do {
748 Random rnd(301);
749
750 Options options = CurrentOptions();
751 options.disable_auto_compactions = true;
752 CreateAndReopenWithCF({"pikachu"}, options);
753
754 // Put values on second level (so that they will not be in the same
755 // compaction as the other operations.
756 ASSERT_OK(Put(1, "foo", "first"));
757 ASSERT_OK(Put(1, "bar", "one"));
758 ASSERT_OK(Flush(1));
759 MoveFilesToLevel(2, 1);
760
761 // (Single) delete hidden by a put
762 ASSERT_OK(SingleDelete(1, "foo"));
763 ASSERT_OK(Put(1, "foo", "second"));
764 ASSERT_OK(Delete(1, "bar"));
765 ASSERT_OK(Put(1, "bar", "two"));
766 ASSERT_OK(Flush(1));
767
768 ASSERT_OK(SingleDelete(1, "foo"));
769 ASSERT_OK(Delete(1, "bar"));
770 ASSERT_OK(Flush(1));
771
772 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1],
773 nullptr, nullptr));
774
775 ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
776 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
777 // Skip FIFO and universal compaction beccaus they do not apply to the test
778 // case. Skip MergePut because single delete does not get removed when it
779 // encounters a merge.
780 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
781 kSkipMergePut));
782 }
783
784 TEST_F(DBTest, SingleDeletePutFlush) {
785 // Single deletes that encounter the matching put in a flush should get
786 // removed.
787 do {
788 Random rnd(301);
789
790 Options options = CurrentOptions();
791 options.disable_auto_compactions = true;
792 CreateAndReopenWithCF({"pikachu"}, options);
793
794 ASSERT_OK(Put(1, "foo", Slice()));
795 ASSERT_OK(Put(1, "a", Slice()));
796 ASSERT_OK(SingleDelete(1, "a"));
797 ASSERT_OK(Flush(1));
798
799 ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
800 // Skip FIFO and universal compaction because they do not apply to the test
801 // case. Skip MergePut because single delete does not get removed when it
802 // encounters a merge.
803 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
804 kSkipMergePut));
805 }
806
807 // Disable because not all platform can run it.
808 // It requires more than 9GB memory to run it, With single allocation
809 // of more than 3GB.
810 TEST_F(DBTest, DISABLED_SanitizeVeryVeryLargeValue) {
811 const size_t kValueSize = 4 * size_t{1024 * 1024 * 1024}; // 4GB value
812 std::string raw(kValueSize, 'v');
813 Options options = CurrentOptions();
814 options.env = env_;
815 options.merge_operator = MergeOperators::CreatePutOperator();
816 options.write_buffer_size = 100000; // Small write buffer
817 options.paranoid_checks = true;
818 DestroyAndReopen(options);
819
820 ASSERT_OK(Put("boo", "v1"));
821 ASSERT_TRUE(Put("foo", raw).IsInvalidArgument());
822 ASSERT_TRUE(Merge("foo", raw).IsInvalidArgument());
823
824 WriteBatch wb;
825 ASSERT_TRUE(wb.Put("foo", raw).IsInvalidArgument());
826 ASSERT_TRUE(wb.Merge("foo", raw).IsInvalidArgument());
827
828 Slice value_slice = raw;
829 Slice key_slice = "foo";
830 SliceParts sp_key(&key_slice, 1);
831 SliceParts sp_value(&value_slice, 1);
832
833 ASSERT_TRUE(wb.Put(sp_key, sp_value).IsInvalidArgument());
834 ASSERT_TRUE(wb.Merge(sp_key, sp_value).IsInvalidArgument());
835 }
836
837 // Disable because not all platform can run it.
838 // It requires more than 9GB memory to run it, With single allocation
839 // of more than 3GB.
840 TEST_F(DBTest, DISABLED_VeryLargeValue) {
841 const size_t kValueSize = 3221225472u; // 3GB value
842 const size_t kKeySize = 8388608u; // 8MB key
843 std::string raw(kValueSize, 'v');
844 std::string key1(kKeySize, 'c');
845 std::string key2(kKeySize, 'd');
846
847 Options options = CurrentOptions();
848 options.env = env_;
849 options.write_buffer_size = 100000; // Small write buffer
850 options.paranoid_checks = true;
851 DestroyAndReopen(options);
852
853 ASSERT_OK(Put("boo", "v1"));
854 ASSERT_OK(Put("foo", "v1"));
855 ASSERT_OK(Put(key1, raw));
856 raw[0] = 'w';
857 ASSERT_OK(Put(key2, raw));
858 dbfull()->TEST_WaitForFlushMemTable();
859
860 #ifndef ROCKSDB_LITE
861 ASSERT_EQ(1, NumTableFilesAtLevel(0));
862 #endif // !ROCKSDB_LITE
863
864 std::string value;
865 Status s = db_->Get(ReadOptions(), key1, &value);
866 ASSERT_OK(s);
867 ASSERT_EQ(kValueSize, value.size());
868 ASSERT_EQ('v', value[0]);
869
870 s = db_->Get(ReadOptions(), key2, &value);
871 ASSERT_OK(s);
872 ASSERT_EQ(kValueSize, value.size());
873 ASSERT_EQ('w', value[0]);
874
875 // Compact all files.
876 Flush();
877 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
878
879 // Check DB is not in read-only state.
880 ASSERT_OK(Put("boo", "v1"));
881
882 s = db_->Get(ReadOptions(), key1, &value);
883 ASSERT_OK(s);
884 ASSERT_EQ(kValueSize, value.size());
885 ASSERT_EQ('v', value[0]);
886
887 s = db_->Get(ReadOptions(), key2, &value);
888 ASSERT_OK(s);
889 ASSERT_EQ(kValueSize, value.size());
890 ASSERT_EQ('w', value[0]);
891 }
892
893 TEST_F(DBTest, GetFromImmutableLayer) {
894 do {
895 Options options = CurrentOptions();
896 options.env = env_;
897 CreateAndReopenWithCF({"pikachu"}, options);
898
899 ASSERT_OK(Put(1, "foo", "v1"));
900 ASSERT_EQ("v1", Get(1, "foo"));
901
902 // Block sync calls
903 env_->delay_sstable_sync_.store(true, std::memory_order_release);
904 ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
905 ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
906 ASSERT_EQ("v1", Get(1, "foo"));
907 ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
908 // Release sync calls
909 env_->delay_sstable_sync_.store(false, std::memory_order_release);
910 } while (ChangeOptions());
911 }
912
913 TEST_F(DBTest, GetLevel0Ordering) {
914 do {
915 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
916 // Check that we process level-0 files in correct order. The code
917 // below generates two level-0 files where the earlier one comes
918 // before the later one in the level-0 file list since the earlier
919 // one has a smaller "smallest" key.
920 ASSERT_OK(Put(1, "bar", "b"));
921 ASSERT_OK(Put(1, "foo", "v1"));
922 ASSERT_OK(Flush(1));
923 ASSERT_OK(Put(1, "foo", "v2"));
924 ASSERT_OK(Flush(1));
925 ASSERT_EQ("v2", Get(1, "foo"));
926 } while (ChangeOptions());
927 }
928
929 TEST_F(DBTest, WrongLevel0Config) {
930 Options options = CurrentOptions();
931 Close();
932 ASSERT_OK(DestroyDB(dbname_, options));
933 options.level0_stop_writes_trigger = 1;
934 options.level0_slowdown_writes_trigger = 2;
935 options.level0_file_num_compaction_trigger = 3;
936 ASSERT_OK(DB::Open(options, dbname_, &db_));
937 }
938
939 #ifndef ROCKSDB_LITE
940 TEST_F(DBTest, GetOrderedByLevels) {
941 do {
942 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
943 ASSERT_OK(Put(1, "foo", "v1"));
944 Compact(1, "a", "z");
945 ASSERT_EQ("v1", Get(1, "foo"));
946 ASSERT_OK(Put(1, "foo", "v2"));
947 ASSERT_EQ("v2", Get(1, "foo"));
948 ASSERT_OK(Flush(1));
949 ASSERT_EQ("v2", Get(1, "foo"));
950 } while (ChangeOptions());
951 }
952
953 TEST_F(DBTest, GetPicksCorrectFile) {
954 do {
955 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
956 // Arrange to have multiple files in a non-level-0 level.
957 ASSERT_OK(Put(1, "a", "va"));
958 Compact(1, "a", "b");
959 ASSERT_OK(Put(1, "x", "vx"));
960 Compact(1, "x", "y");
961 ASSERT_OK(Put(1, "f", "vf"));
962 Compact(1, "f", "g");
963 ASSERT_EQ("va", Get(1, "a"));
964 ASSERT_EQ("vf", Get(1, "f"));
965 ASSERT_EQ("vx", Get(1, "x"));
966 } while (ChangeOptions());
967 }
968
969 TEST_F(DBTest, GetEncountersEmptyLevel) {
970 do {
971 Options options = CurrentOptions();
972 CreateAndReopenWithCF({"pikachu"}, options);
973 // Arrange for the following to happen:
974 // * sstable A in level 0
975 // * nothing in level 1
976 // * sstable B in level 2
977 // Then do enough Get() calls to arrange for an automatic compaction
978 // of sstable A. A bug would cause the compaction to be marked as
979 // occurring at level 1 (instead of the correct level 0).
980
981 // Step 1: First place sstables in levels 0 and 2
982 ASSERT_OK(Put(1, "a", "begin"));
983 ASSERT_OK(Put(1, "z", "end"));
984 ASSERT_OK(Flush(1));
985 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
986 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
987 ASSERT_OK(Put(1, "a", "begin"));
988 ASSERT_OK(Put(1, "z", "end"));
989 ASSERT_OK(Flush(1));
990 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
991 ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
992
993 // Step 2: clear level 1 if necessary.
994 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
995 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
996 ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
997 ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
998
999 // Step 3: read a bunch of times
1000 for (int i = 0; i < 1000; i++) {
1001 ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
1002 }
1003
1004 // Step 4: Wait for compaction to finish
1005 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1006
1007 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX
1008 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
1009 }
1010 #endif // ROCKSDB_LITE
1011
1012 TEST_F(DBTest, FlushMultipleMemtable) {
1013 do {
1014 Options options = CurrentOptions();
1015 WriteOptions writeOpt = WriteOptions();
1016 writeOpt.disableWAL = true;
1017 options.max_write_buffer_number = 4;
1018 options.min_write_buffer_number_to_merge = 3;
1019 options.max_write_buffer_size_to_maintain = -1;
1020 CreateAndReopenWithCF({"pikachu"}, options);
1021 ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
1022 ASSERT_OK(Flush(1));
1023 ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
1024
1025 ASSERT_EQ("v1", Get(1, "foo"));
1026 ASSERT_EQ("v1", Get(1, "bar"));
1027 ASSERT_OK(Flush(1));
1028 } while (ChangeCompactOptions());
1029 }
1030 #ifndef ROCKSDB_LITE
1031 TEST_F(DBTest, FlushSchedule) {
1032 Options options = CurrentOptions();
1033 options.disable_auto_compactions = true;
1034 options.level0_stop_writes_trigger = 1 << 10;
1035 options.level0_slowdown_writes_trigger = 1 << 10;
1036 options.min_write_buffer_number_to_merge = 1;
1037 options.max_write_buffer_size_to_maintain =
1038 static_cast<int64_t>(options.write_buffer_size);
1039 options.max_write_buffer_number = 2;
1040 options.write_buffer_size = 120 * 1024;
1041 auto flush_listener = std::make_shared<FlushCounterListener>();
1042 flush_listener->expected_flush_reason = FlushReason::kWriteBufferFull;
1043 options.listeners.push_back(flush_listener);
1044 CreateAndReopenWithCF({"pikachu"}, options);
1045 std::vector<port::Thread> threads;
1046
1047 std::atomic<int> thread_num(0);
1048 // each column family will have 5 thread, each thread generating 2 memtables.
1049 // each column family should end up with 10 table files
1050 std::function<void()> fill_memtable_func = [&]() {
1051 int a = thread_num.fetch_add(1);
1052 Random rnd(a);
1053 WriteOptions wo;
1054 // this should fill up 2 memtables
1055 for (int k = 0; k < 5000; ++k) {
1056 ASSERT_OK(db_->Put(wo, handles_[a & 1], rnd.RandomString(13), ""));
1057 }
1058 };
1059
1060 for (int i = 0; i < 10; ++i) {
1061 threads.emplace_back(fill_memtable_func);
1062 }
1063
1064 for (auto& t : threads) {
1065 t.join();
1066 }
1067
1068 auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
1069 auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
1070 ASSERT_LE(default_tables, static_cast<uint64_t>(10));
1071 ASSERT_GT(default_tables, static_cast<uint64_t>(0));
1072 ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
1073 ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
1074 }
1075 #endif // ROCKSDB_LITE
1076
1077 namespace {
1078 class KeepFilter : public CompactionFilter {
1079 public:
1080 bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
1081 std::string* /*new_value*/,
1082 bool* /*value_changed*/) const override {
1083 return false;
1084 }
1085
1086 const char* Name() const override { return "KeepFilter"; }
1087 };
1088
1089 class KeepFilterFactory : public CompactionFilterFactory {
1090 public:
1091 explicit KeepFilterFactory(bool check_context = false)
1092 : check_context_(check_context) {}
1093
1094 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
1095 const CompactionFilter::Context& context) override {
1096 if (check_context_) {
1097 EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
1098 EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
1099 }
1100 return std::unique_ptr<CompactionFilter>(new KeepFilter());
1101 }
1102
1103 const char* Name() const override { return "KeepFilterFactory"; }
1104 bool check_context_;
1105 std::atomic_bool expect_full_compaction_;
1106 std::atomic_bool expect_manual_compaction_;
1107 };
1108
1109 class DelayFilter : public CompactionFilter {
1110 public:
1111 explicit DelayFilter(DBTestBase* d) : db_test(d) {}
1112 bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
1113 std::string* /*new_value*/,
1114 bool* /*value_changed*/) const override {
1115 db_test->env_->MockSleepForMicroseconds(1000);
1116 return true;
1117 }
1118
1119 const char* Name() const override { return "DelayFilter"; }
1120
1121 private:
1122 DBTestBase* db_test;
1123 };
1124
1125 class DelayFilterFactory : public CompactionFilterFactory {
1126 public:
1127 explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
1128 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
1129 const CompactionFilter::Context& /*context*/) override {
1130 return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
1131 }
1132
1133 const char* Name() const override { return "DelayFilterFactory"; }
1134
1135 private:
1136 DBTestBase* db_test;
1137 };
1138 } // anonymous namespace
1139
1140 #ifndef ROCKSDB_LITE
1141
1142 static std::string CompressibleString(Random* rnd, int len) {
1143 std::string r;
1144 test::CompressibleString(rnd, 0.8, len, &r);
1145 return r;
1146 }
1147 #endif // ROCKSDB_LITE
1148
1149 TEST_F(DBTest, FailMoreDbPaths) {
1150 Options options = CurrentOptions();
1151 options.db_paths.emplace_back(dbname_, 10000000);
1152 options.db_paths.emplace_back(dbname_ + "_2", 1000000);
1153 options.db_paths.emplace_back(dbname_ + "_3", 1000000);
1154 options.db_paths.emplace_back(dbname_ + "_4", 1000000);
1155 options.db_paths.emplace_back(dbname_ + "_5", 1000000);
1156 ASSERT_TRUE(TryReopen(options).IsNotSupported());
1157 }
1158
1159 void CheckColumnFamilyMeta(
1160 const ColumnFamilyMetaData& cf_meta, const std::string& cf_name,
1161 const std::vector<std::vector<FileMetaData>>& files_by_level,
1162 uint64_t start_time, uint64_t end_time) {
1163 ASSERT_EQ(cf_meta.name, cf_name);
1164 ASSERT_EQ(cf_meta.levels.size(), files_by_level.size());
1165
1166 uint64_t cf_size = 0;
1167 size_t file_count = 0;
1168
1169 for (size_t i = 0; i < cf_meta.levels.size(); ++i) {
1170 const auto& level_meta_from_cf = cf_meta.levels[i];
1171 const auto& level_meta_from_files = files_by_level[i];
1172
1173 ASSERT_EQ(level_meta_from_cf.level, i);
1174 ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size());
1175
1176 file_count += level_meta_from_cf.files.size();
1177
1178 uint64_t level_size = 0;
1179 for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) {
1180 const auto& file_meta_from_cf = level_meta_from_cf.files[j];
1181 const auto& file_meta_from_files = level_meta_from_files[j];
1182
1183 level_size += file_meta_from_cf.size;
1184
1185 ASSERT_EQ(file_meta_from_cf.file_number,
1186 file_meta_from_files.fd.GetNumber());
1187 ASSERT_EQ(file_meta_from_cf.file_number,
1188 TableFileNameToNumber(file_meta_from_cf.name));
1189 ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size);
1190 ASSERT_EQ(file_meta_from_cf.smallest_seqno,
1191 file_meta_from_files.fd.smallest_seqno);
1192 ASSERT_EQ(file_meta_from_cf.largest_seqno,
1193 file_meta_from_files.fd.largest_seqno);
1194 ASSERT_EQ(file_meta_from_cf.smallestkey,
1195 file_meta_from_files.smallest.user_key().ToString());
1196 ASSERT_EQ(file_meta_from_cf.largestkey,
1197 file_meta_from_files.largest.user_key().ToString());
1198 ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number,
1199 file_meta_from_files.oldest_blob_file_number);
1200 ASSERT_EQ(file_meta_from_cf.oldest_ancester_time,
1201 file_meta_from_files.oldest_ancester_time);
1202 ASSERT_EQ(file_meta_from_cf.file_creation_time,
1203 file_meta_from_files.file_creation_time);
1204 ASSERT_GE(file_meta_from_cf.file_creation_time, start_time);
1205 ASSERT_LE(file_meta_from_cf.file_creation_time, end_time);
1206 ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time);
1207 ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time);
1208 // More from FileStorageInfo
1209 ASSERT_EQ(file_meta_from_cf.file_type, kTableFile);
1210 ASSERT_EQ(file_meta_from_cf.name,
1211 "/" + file_meta_from_cf.relative_filename);
1212 ASSERT_EQ(file_meta_from_cf.directory, file_meta_from_cf.db_path);
1213 }
1214
1215 ASSERT_EQ(level_meta_from_cf.size, level_size);
1216 cf_size += level_size;
1217 }
1218
1219 ASSERT_EQ(cf_meta.file_count, file_count);
1220 ASSERT_EQ(cf_meta.size, cf_size);
1221 }
1222
1223 void CheckLiveFilesMeta(
1224 const std::vector<LiveFileMetaData>& live_file_meta,
1225 const std::vector<std::vector<FileMetaData>>& files_by_level) {
1226 size_t total_file_count = 0;
1227 for (const auto& f : files_by_level) {
1228 total_file_count += f.size();
1229 }
1230
1231 ASSERT_EQ(live_file_meta.size(), total_file_count);
1232
1233 int level = 0;
1234 int i = 0;
1235
1236 for (const auto& meta : live_file_meta) {
1237 if (level != meta.level) {
1238 level = meta.level;
1239 i = 0;
1240 }
1241
1242 ASSERT_LT(i, files_by_level[level].size());
1243
1244 const auto& expected_meta = files_by_level[level][i];
1245
1246 ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName);
1247 ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber());
1248 ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name));
1249 ASSERT_EQ(meta.size, expected_meta.fd.file_size);
1250 ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno);
1251 ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno);
1252 ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString());
1253 ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString());
1254 ASSERT_EQ(meta.oldest_blob_file_number,
1255 expected_meta.oldest_blob_file_number);
1256
1257 // More from FileStorageInfo
1258 ASSERT_EQ(meta.file_type, kTableFile);
1259 ASSERT_EQ(meta.name, "/" + meta.relative_filename);
1260 ASSERT_EQ(meta.directory, meta.db_path);
1261
1262 ++i;
1263 }
1264 }
1265
1266 #ifndef ROCKSDB_LITE
1267 void AddBlobFile(const ColumnFamilyHandle* cfh, uint64_t blob_file_number,
1268 uint64_t total_blob_count, uint64_t total_blob_bytes,
1269 const std::string& checksum_method,
1270 const std::string& checksum_value,
1271 uint64_t garbage_blob_count = 0,
1272 uint64_t garbage_blob_bytes = 0) {
1273 ColumnFamilyData* cfd =
1274 (static_cast<const ColumnFamilyHandleImpl*>(cfh))->cfd();
1275 assert(cfd);
1276
1277 Version* const version = cfd->current();
1278 assert(version);
1279
1280 VersionStorageInfo* const storage_info = version->storage_info();
1281 assert(storage_info);
1282
1283 // Add a live blob file.
1284
1285 auto shared_meta = SharedBlobFileMetaData::Create(
1286 blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
1287 checksum_value);
1288
1289 auto meta = BlobFileMetaData::Create(std::move(shared_meta),
1290 BlobFileMetaData::LinkedSsts(),
1291 garbage_blob_count, garbage_blob_bytes);
1292
1293 storage_info->AddBlobFile(std::move(meta));
1294 }
1295
1296 static void CheckBlobMetaData(
1297 const BlobMetaData& bmd, uint64_t blob_file_number,
1298 uint64_t total_blob_count, uint64_t total_blob_bytes,
1299 const std::string& checksum_method, const std::string& checksum_value,
1300 uint64_t garbage_blob_count = 0, uint64_t garbage_blob_bytes = 0) {
1301 ASSERT_EQ(bmd.blob_file_number, blob_file_number);
1302 ASSERT_EQ(bmd.blob_file_name, BlobFileName("", blob_file_number));
1303 ASSERT_EQ(bmd.blob_file_size,
1304 total_blob_bytes + BlobLogHeader::kSize + BlobLogFooter::kSize);
1305
1306 ASSERT_EQ(bmd.total_blob_count, total_blob_count);
1307 ASSERT_EQ(bmd.total_blob_bytes, total_blob_bytes);
1308 ASSERT_EQ(bmd.garbage_blob_count, garbage_blob_count);
1309 ASSERT_EQ(bmd.garbage_blob_bytes, garbage_blob_bytes);
1310 ASSERT_EQ(bmd.checksum_method, checksum_method);
1311 ASSERT_EQ(bmd.checksum_value, checksum_value);
1312 }
1313
1314 TEST_F(DBTest, MetaDataTest) {
1315 Options options = CurrentOptions();
1316 options.create_if_missing = true;
1317 options.disable_auto_compactions = true;
1318
1319 int64_t temp_time = 0;
1320 options.env->GetCurrentTime(&temp_time);
1321 uint64_t start_time = static_cast<uint64_t>(temp_time);
1322
1323 DestroyAndReopen(options);
1324
1325 Random rnd(301);
1326 int key_index = 0;
1327 for (int i = 0; i < 100; ++i) {
1328 // Add a single blob reference to each file
1329 std::string blob_index;
1330 BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000,
1331 /* offset */ 1234, /* size */ 5678, kNoCompression);
1332
1333 WriteBatch batch;
1334 ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index),
1335 blob_index));
1336 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
1337
1338 ++key_index;
1339
1340 // Fill up the rest of the file with random values.
1341 GenerateNewFile(&rnd, &key_index, /* nowait */ true);
1342
1343 ASSERT_OK(Flush());
1344 }
1345
1346 std::vector<std::vector<FileMetaData>> files_by_level;
1347 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level);
1348
1349 options.env->GetCurrentTime(&temp_time);
1350 uint64_t end_time = static_cast<uint64_t>(temp_time);
1351
1352 ColumnFamilyMetaData cf_meta;
1353 db_->GetColumnFamilyMetaData(&cf_meta);
1354 CheckColumnFamilyMeta(cf_meta, kDefaultColumnFamilyName, files_by_level,
1355 start_time, end_time);
1356 std::vector<LiveFileMetaData> live_file_meta;
1357 db_->GetLiveFilesMetaData(&live_file_meta);
1358 CheckLiveFilesMeta(live_file_meta, files_by_level);
1359 }
1360
1361 TEST_F(DBTest, AllMetaDataTest) {
1362 Options options = CurrentOptions();
1363 options.create_if_missing = true;
1364 options.disable_auto_compactions = true;
1365 DestroyAndReopen(options);
1366 CreateAndReopenWithCF({"pikachu"}, options);
1367
1368 constexpr uint64_t blob_file_number = 234;
1369 constexpr uint64_t total_blob_count = 555;
1370 constexpr uint64_t total_blob_bytes = 66666;
1371 constexpr char checksum_method[] = "CRC32";
1372 constexpr char checksum_value[] = "\x3d\x87\xff\x57";
1373
1374 int64_t temp_time = 0;
1375 options.env->GetCurrentTime(&temp_time).PermitUncheckedError();
1376 uint64_t start_time = static_cast<uint64_t>(temp_time);
1377
1378 Random rnd(301);
1379 dbfull()->TEST_LockMutex();
1380 for (int cf = 0; cf < 2; cf++) {
1381 AddBlobFile(handles_[cf], blob_file_number * (cf + 1),
1382 total_blob_count * (cf + 1), total_blob_bytes * (cf + 1),
1383 checksum_method, checksum_value);
1384 }
1385 dbfull()->TEST_UnlockMutex();
1386
1387 std::vector<ColumnFamilyMetaData> all_meta;
1388 db_->GetAllColumnFamilyMetaData(&all_meta);
1389
1390 std::vector<std::vector<FileMetaData>> default_files_by_level;
1391 std::vector<std::vector<FileMetaData>> pikachu_files_by_level;
1392 dbfull()->TEST_GetFilesMetaData(handles_[0], &default_files_by_level);
1393 dbfull()->TEST_GetFilesMetaData(handles_[1], &pikachu_files_by_level);
1394
1395 options.env->GetCurrentTime(&temp_time).PermitUncheckedError();
1396 uint64_t end_time = static_cast<uint64_t>(temp_time);
1397
1398 ASSERT_EQ(all_meta.size(), 2);
1399 for (int cf = 0; cf < 2; cf++) {
1400 const auto& cfmd = all_meta[cf];
1401 if (cf == 0) {
1402 CheckColumnFamilyMeta(cfmd, "default", default_files_by_level, start_time,
1403 end_time);
1404 } else {
1405 CheckColumnFamilyMeta(cfmd, "pikachu", pikachu_files_by_level, start_time,
1406 end_time);
1407 }
1408 ASSERT_EQ(cfmd.blob_files.size(), 1U);
1409 const auto& bmd = cfmd.blob_files[0];
1410 ASSERT_EQ(cfmd.blob_file_count, 1U);
1411 ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size);
1412 ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_));
1413 CheckBlobMetaData(bmd, blob_file_number * (cf + 1),
1414 total_blob_count * (cf + 1), total_blob_bytes * (cf + 1),
1415 checksum_method, checksum_value);
1416 }
1417 }
1418
1419 namespace {
1420 void MinLevelHelper(DBTest* self, Options& options) {
1421 Random rnd(301);
1422
1423 for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
1424 num++) {
1425 std::vector<std::string> values;
1426 // Write 120KB (12 values, each 10K)
1427 for (int i = 0; i < 12; i++) {
1428 values.push_back(rnd.RandomString(10000));
1429 ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
1430 }
1431 ASSERT_OK(self->dbfull()->TEST_WaitForFlushMemTable());
1432 ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
1433 }
1434
1435 // generate one more file in level-0, and should trigger level-0 compaction
1436 std::vector<std::string> values;
1437 for (int i = 0; i < 12; i++) {
1438 values.push_back(rnd.RandomString(10000));
1439 ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
1440 }
1441 ASSERT_OK(self->dbfull()->TEST_WaitForCompact());
1442
1443 ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
1444 ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
1445 }
1446
1447 // returns false if the calling-Test should be skipped
1448 bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
1449 int lev, int strategy) {
1450 fprintf(stderr,
1451 "Test with compression options : window_bits = %d, level = %d, "
1452 "strategy = %d}\n",
1453 wbits, lev, strategy);
1454 options.write_buffer_size = 100 << 10; // 100KB
1455 options.arena_block_size = 4096;
1456 options.num_levels = 3;
1457 options.level0_file_num_compaction_trigger = 3;
1458 options.create_if_missing = true;
1459
1460 if (Snappy_Supported()) {
1461 type = kSnappyCompression;
1462 fprintf(stderr, "using snappy\n");
1463 } else if (Zlib_Supported()) {
1464 type = kZlibCompression;
1465 fprintf(stderr, "using zlib\n");
1466 } else if (BZip2_Supported()) {
1467 type = kBZip2Compression;
1468 fprintf(stderr, "using bzip2\n");
1469 } else if (LZ4_Supported()) {
1470 type = kLZ4Compression;
1471 fprintf(stderr, "using lz4\n");
1472 } else if (XPRESS_Supported()) {
1473 type = kXpressCompression;
1474 fprintf(stderr, "using xpress\n");
1475 } else if (ZSTD_Supported()) {
1476 type = kZSTD;
1477 fprintf(stderr, "using ZSTD\n");
1478 } else {
1479 fprintf(stderr, "skipping test, compression disabled\n");
1480 return false;
1481 }
1482 options.compression_per_level.resize(options.num_levels);
1483
1484 // do not compress L0
1485 for (int i = 0; i < 1; i++) {
1486 options.compression_per_level[i] = kNoCompression;
1487 }
1488 for (int i = 1; i < options.num_levels; i++) {
1489 options.compression_per_level[i] = type;
1490 }
1491 return true;
1492 }
1493 } // anonymous namespace
1494
1495 TEST_F(DBTest, MinLevelToCompress1) {
1496 Options options = CurrentOptions();
1497 CompressionType type = kSnappyCompression;
1498 if (!MinLevelToCompress(type, options, -14, -1, 0)) {
1499 return;
1500 }
1501 Reopen(options);
1502 MinLevelHelper(this, options);
1503
1504 // do not compress L0 and L1
1505 for (int i = 0; i < 2; i++) {
1506 options.compression_per_level[i] = kNoCompression;
1507 }
1508 for (int i = 2; i < options.num_levels; i++) {
1509 options.compression_per_level[i] = type;
1510 }
1511 DestroyAndReopen(options);
1512 MinLevelHelper(this, options);
1513 }
1514
1515 TEST_F(DBTest, MinLevelToCompress2) {
1516 Options options = CurrentOptions();
1517 CompressionType type = kSnappyCompression;
1518 if (!MinLevelToCompress(type, options, 15, -1, 0)) {
1519 return;
1520 }
1521 Reopen(options);
1522 MinLevelHelper(this, options);
1523
1524 // do not compress L0 and L1
1525 for (int i = 0; i < 2; i++) {
1526 options.compression_per_level[i] = kNoCompression;
1527 }
1528 for (int i = 2; i < options.num_levels; i++) {
1529 options.compression_per_level[i] = type;
1530 }
1531 DestroyAndReopen(options);
1532 MinLevelHelper(this, options);
1533 }
1534
1535 // This test may fail because of a legit case that multiple L0 files
1536 // are trivial moved to L1.
1537 TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) {
1538 do {
1539 Options options = CurrentOptions();
1540 options.env = env_;
1541 options.write_buffer_size = 100000; // Small write buffer
1542 CreateAndReopenWithCF({"pikachu"}, options);
1543
1544 // We must have at most one file per level except for level-0,
1545 // which may have up to kL0_StopWritesTrigger files.
1546 const int kMaxFiles =
1547 options.num_levels + options.level0_stop_writes_trigger;
1548
1549 Random rnd(301);
1550 std::string value =
1551 rnd.RandomString(static_cast<int>(2 * options.write_buffer_size));
1552 for (int i = 0; i < 5 * kMaxFiles; i++) {
1553 ASSERT_OK(Put(1, "key", value));
1554 ASSERT_LE(TotalTableFiles(1), kMaxFiles);
1555 }
1556 } while (ChangeCompactOptions());
1557 }
1558 #endif // ROCKSDB_LITE
1559
1560 #ifndef ROCKSDB_LITE
1561 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1562 bool result = (val >= low) && (val <= high);
1563 if (!result) {
1564 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1565 (unsigned long long)(val), (unsigned long long)(low),
1566 (unsigned long long)(high));
1567 }
1568 return result;
1569 }
1570
1571 TEST_F(DBTest, ApproximateSizesMemTable) {
1572 Options options = CurrentOptions();
1573 options.write_buffer_size = 100000000; // Large write buffer
1574 options.compression = kNoCompression;
1575 options.create_if_missing = true;
1576 DestroyAndReopen(options);
1577 auto default_cf = db_->DefaultColumnFamily();
1578
1579 const int N = 128;
1580 Random rnd(301);
1581 for (int i = 0; i < N; i++) {
1582 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
1583 }
1584
1585 uint64_t size;
1586 std::string start = Key(50);
1587 std::string end = Key(60);
1588 Range r(start, end);
1589 SizeApproximationOptions size_approx_options;
1590 size_approx_options.include_memtables = true;
1591 size_approx_options.include_files = true;
1592 ASSERT_OK(
1593 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1594 ASSERT_GT(size, 6000);
1595 ASSERT_LT(size, 204800);
1596 // Zero if not including mem table
1597 ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size));
1598 ASSERT_EQ(size, 0);
1599
1600 start = Key(500);
1601 end = Key(600);
1602 r = Range(start, end);
1603 ASSERT_OK(
1604 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1605 ASSERT_EQ(size, 0);
1606
1607 for (int i = 0; i < N; i++) {
1608 ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024)));
1609 }
1610
1611 start = Key(500);
1612 end = Key(600);
1613 r = Range(start, end);
1614 ASSERT_OK(
1615 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1616 ASSERT_EQ(size, 0);
1617
1618 start = Key(100);
1619 end = Key(1020);
1620 r = Range(start, end);
1621 ASSERT_OK(
1622 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1623 ASSERT_GT(size, 6000);
1624
1625 options.max_write_buffer_number = 8;
1626 options.min_write_buffer_number_to_merge = 5;
1627 options.write_buffer_size = 1024 * N; // Not very large
1628 DestroyAndReopen(options);
1629 default_cf = db_->DefaultColumnFamily();
1630
1631 int keys[N * 3];
1632 for (int i = 0; i < N; i++) {
1633 keys[i * 3] = i * 5;
1634 keys[i * 3 + 1] = i * 5 + 1;
1635 keys[i * 3 + 2] = i * 5 + 2;
1636 }
1637 // MemTable entry counting is estimated and can vary greatly depending on
1638 // layout. Thus, using deterministic seed for test stability.
1639 RandomShuffle(std::begin(keys), std::end(keys), rnd.Next());
1640
1641 for (int i = 0; i < N * 3; i++) {
1642 ASSERT_OK(Put(Key(keys[i] + 1000), rnd.RandomString(1024)));
1643 }
1644
1645 start = Key(100);
1646 end = Key(300);
1647 r = Range(start, end);
1648 ASSERT_OK(
1649 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1650 ASSERT_EQ(size, 0);
1651
1652 start = Key(1050);
1653 end = Key(1080);
1654 r = Range(start, end);
1655 ASSERT_OK(
1656 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1657 ASSERT_GT(size, 6000);
1658
1659 start = Key(2100);
1660 end = Key(2300);
1661 r = Range(start, end);
1662 ASSERT_OK(
1663 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1664 ASSERT_EQ(size, 0);
1665
1666 start = Key(1050);
1667 end = Key(1080);
1668 r = Range(start, end);
1669 uint64_t size_with_mt, size_without_mt;
1670 ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1671 &size_with_mt));
1672 ASSERT_GT(size_with_mt, 6000);
1673 ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size_without_mt));
1674 ASSERT_EQ(size_without_mt, 0);
1675
1676 ASSERT_OK(Flush());
1677
1678 for (int i = 0; i < N; i++) {
1679 ASSERT_OK(Put(Key(i + 1000), rnd.RandomString(1024)));
1680 }
1681
1682 start = Key(1050);
1683 end = Key(1080);
1684 r = Range(start, end);
1685 ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1686 &size_with_mt));
1687 ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size_without_mt));
1688 ASSERT_GT(size_with_mt, size_without_mt);
1689 ASSERT_GT(size_without_mt, 6000);
1690
1691 // Check that include_memtables flag works as expected
1692 size_approx_options.include_memtables = false;
1693 ASSERT_OK(
1694 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1695 ASSERT_EQ(size, size_without_mt);
1696
1697 // Check that files_size_error_margin works as expected, when the heuristic
1698 // conditions are not met
1699 start = Key(1);
1700 end = Key(1000 + N - 2);
1701 r = Range(start, end);
1702 size_approx_options.files_size_error_margin = -1.0; // disabled
1703 ASSERT_OK(
1704 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size));
1705 uint64_t size2;
1706 size_approx_options.files_size_error_margin = 0.5; // enabled, but not used
1707 ASSERT_OK(
1708 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2));
1709 ASSERT_EQ(size, size2);
1710 }
1711
1712 TEST_F(DBTest, ApproximateSizesFilesWithErrorMargin) {
1713 // Roughly 4 keys per data block, 1000 keys per file,
1714 // with filter substantially larger than a data block
1715 BlockBasedTableOptions table_options;
1716 table_options.filter_policy.reset(NewBloomFilterPolicy(16));
1717 table_options.block_size = 100;
1718 Options options = CurrentOptions();
1719 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1720 options.write_buffer_size = 24 * 1024;
1721 options.compression = kNoCompression;
1722 options.create_if_missing = true;
1723 options.target_file_size_base = 24 * 1024;
1724 DestroyAndReopen(options);
1725 const auto default_cf = db_->DefaultColumnFamily();
1726
1727 const int N = 64000;
1728 Random rnd(301);
1729 for (int i = 0; i < N; i++) {
1730 ASSERT_OK(Put(Key(i), rnd.RandomString(24)));
1731 }
1732 // Flush everything to files
1733 ASSERT_OK(Flush());
1734 // Compact the entire key space into the next level
1735 ASSERT_OK(
1736 db_->CompactRange(CompactRangeOptions(), default_cf, nullptr, nullptr));
1737
1738 // Write more keys
1739 for (int i = N; i < (N + N / 4); i++) {
1740 ASSERT_OK(Put(Key(i), rnd.RandomString(24)));
1741 }
1742 // Flush everything to files again
1743 ASSERT_OK(Flush());
1744
1745 // Wait for compaction to finish
1746 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1747
1748 {
1749 const std::string start = Key(0);
1750 const std::string end = Key(2 * N);
1751 const Range r(start, end);
1752
1753 SizeApproximationOptions size_approx_options;
1754 size_approx_options.include_memtables = false;
1755 size_approx_options.include_files = true;
1756 size_approx_options.files_size_error_margin = -1.0; // disabled
1757
1758 // Get the precise size without any approximation heuristic
1759 uint64_t size;
1760 ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1761 &size));
1762 ASSERT_NE(size, 0);
1763
1764 // Get the size with an approximation heuristic
1765 uint64_t size2;
1766 const double error_margin = 0.2;
1767 size_approx_options.files_size_error_margin = error_margin;
1768 ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1769 &size2));
1770 ASSERT_LT(size2, size * (1 + error_margin));
1771 ASSERT_GT(size2, size * (1 - error_margin));
1772 }
1773
1774 {
1775 // Ensure that metadata is not falsely attributed only to the last data in
1776 // the file. (In some applications, filters can be large portion of data
1777 // size.)
1778 // Perform many queries over small range, enough to ensure crossing file
1779 // boundary, and make sure we never see a spike for large filter.
1780 for (int i = 0; i < 3000; i += 10) {
1781 const std::string start = Key(i);
1782 const std::string end = Key(i + 11); // overlap by 1 key
1783 const Range r(start, end);
1784 uint64_t size;
1785 ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size));
1786 ASSERT_LE(size, 11 * 100);
1787 }
1788 }
1789 }
1790
1791 TEST_F(DBTest, GetApproximateMemTableStats) {
1792 Options options = CurrentOptions();
1793 options.write_buffer_size = 100000000;
1794 options.compression = kNoCompression;
1795 options.create_if_missing = true;
1796 DestroyAndReopen(options);
1797
1798 const int N = 128;
1799 Random rnd(301);
1800 for (int i = 0; i < N; i++) {
1801 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
1802 }
1803
1804 uint64_t count;
1805 uint64_t size;
1806
1807 std::string start = Key(50);
1808 std::string end = Key(60);
1809 Range r(start, end);
1810 db_->GetApproximateMemTableStats(r, &count, &size);
1811 ASSERT_GT(count, 0);
1812 ASSERT_LE(count, N);
1813 ASSERT_GT(size, 6000);
1814 ASSERT_LT(size, 204800);
1815
1816 start = Key(500);
1817 end = Key(600);
1818 r = Range(start, end);
1819 db_->GetApproximateMemTableStats(r, &count, &size);
1820 ASSERT_EQ(count, 0);
1821 ASSERT_EQ(size, 0);
1822
1823 ASSERT_OK(Flush());
1824
1825 start = Key(50);
1826 end = Key(60);
1827 r = Range(start, end);
1828 db_->GetApproximateMemTableStats(r, &count, &size);
1829 ASSERT_EQ(count, 0);
1830 ASSERT_EQ(size, 0);
1831
1832 for (int i = 0; i < N; i++) {
1833 ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024)));
1834 }
1835
1836 start = Key(100);
1837 end = Key(1020);
1838 r = Range(start, end);
1839 db_->GetApproximateMemTableStats(r, &count, &size);
1840 ASSERT_GT(count, 20);
1841 ASSERT_GT(size, 6000);
1842 }
1843
1844 TEST_F(DBTest, ApproximateSizes) {
1845 do {
1846 Options options = CurrentOptions();
1847 options.write_buffer_size = 100000000; // Large write buffer
1848 options.compression = kNoCompression;
1849 options.create_if_missing = true;
1850 DestroyAndReopen(options);
1851 CreateAndReopenWithCF({"pikachu"}, options);
1852
1853 uint64_t size;
1854 ASSERT_OK(Size("", "xyz", 1, &size));
1855 ASSERT_TRUE(Between(size, 0, 0));
1856 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1857 ASSERT_OK(Size("", "xyz", 1, &size));
1858 ASSERT_TRUE(Between(size, 0, 0));
1859
1860 // Write 8MB (80 values, each 100K)
1861 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1862 const int N = 80;
1863 static const int S1 = 100000;
1864 static const int S2 = 105000; // Allow some expansion from metadata
1865 Random rnd(301);
1866 for (int i = 0; i < N; i++) {
1867 ASSERT_OK(Put(1, Key(i), rnd.RandomString(S1)));
1868 }
1869
1870 // 0 because GetApproximateSizes() does not account for memtable space
1871 ASSERT_OK(Size("", Key(50), 1, &size));
1872 ASSERT_TRUE(Between(size, 0, 0));
1873
1874 // Check sizes across recovery by reopening a few times
1875 for (int run = 0; run < 3; run++) {
1876 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1877
1878 for (int compact_start = 0; compact_start < N; compact_start += 10) {
1879 for (int i = 0; i < N; i += 10) {
1880 ASSERT_OK(Size("", Key(i), 1, &size));
1881 ASSERT_TRUE(Between(size, S1 * i, S2 * i));
1882 ASSERT_OK(Size("", Key(i) + ".suffix", 1, &size));
1883 ASSERT_TRUE(Between(size, S1 * (i + 1), S2 * (i + 1)));
1884 ASSERT_OK(Size(Key(i), Key(i + 10), 1, &size));
1885 ASSERT_TRUE(Between(size, S1 * 10, S2 * 10));
1886 }
1887 ASSERT_OK(Size("", Key(50), 1, &size));
1888 ASSERT_TRUE(Between(size, S1 * 50, S2 * 50));
1889 ASSERT_OK(Size("", Key(50) + ".suffix", 1, &size));
1890 ASSERT_TRUE(Between(size, S1 * 50, S2 * 50));
1891
1892 std::string cstart_str = Key(compact_start);
1893 std::string cend_str = Key(compact_start + 9);
1894 Slice cstart = cstart_str;
1895 Slice cend = cend_str;
1896 ASSERT_OK(dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]));
1897 }
1898
1899 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1900 ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
1901 }
1902 // ApproximateOffsetOf() is not yet implemented in plain table format.
1903 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
1904 kSkipPlainTable | kSkipHashIndex));
1905 }
1906
1907 TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1908 do {
1909 Options options = CurrentOptions();
1910 options.compression = kNoCompression;
1911 CreateAndReopenWithCF({"pikachu"}, options);
1912
1913 Random rnd(301);
1914 std::string big1 = rnd.RandomString(100000);
1915 ASSERT_OK(Put(1, Key(0), rnd.RandomString(10000)));
1916 ASSERT_OK(Put(1, Key(1), rnd.RandomString(10000)));
1917 ASSERT_OK(Put(1, Key(2), big1));
1918 ASSERT_OK(Put(1, Key(3), rnd.RandomString(10000)));
1919 ASSERT_OK(Put(1, Key(4), big1));
1920 ASSERT_OK(Put(1, Key(5), rnd.RandomString(10000)));
1921 ASSERT_OK(Put(1, Key(6), rnd.RandomString(300000)));
1922 ASSERT_OK(Put(1, Key(7), rnd.RandomString(10000)));
1923
1924 // Check sizes across recovery by reopening a few times
1925 uint64_t size;
1926 for (int run = 0; run < 3; run++) {
1927 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1928
1929 ASSERT_OK(Size("", Key(0), 1, &size));
1930 ASSERT_TRUE(Between(size, 0, 0));
1931 ASSERT_OK(Size("", Key(1), 1, &size));
1932 ASSERT_TRUE(Between(size, 10000, 11000));
1933 ASSERT_OK(Size("", Key(2), 1, &size));
1934 ASSERT_TRUE(Between(size, 20000, 21000));
1935 ASSERT_OK(Size("", Key(3), 1, &size));
1936 ASSERT_TRUE(Between(size, 120000, 121000));
1937 ASSERT_OK(Size("", Key(4), 1, &size));
1938 ASSERT_TRUE(Between(size, 130000, 131000));
1939 ASSERT_OK(Size("", Key(5), 1, &size));
1940 ASSERT_TRUE(Between(size, 230000, 232000));
1941 ASSERT_OK(Size("", Key(6), 1, &size));
1942 ASSERT_TRUE(Between(size, 240000, 242000));
1943 // Ensure some overhead is accounted for, even without including all
1944 ASSERT_OK(Size("", Key(7), 1, &size));
1945 ASSERT_TRUE(Between(size, 540500, 545000));
1946 ASSERT_OK(Size("", Key(8), 1, &size));
1947 ASSERT_TRUE(Between(size, 550500, 555000));
1948
1949 ASSERT_OK(Size(Key(3), Key(5), 1, &size));
1950 ASSERT_TRUE(Between(size, 110100, 111000));
1951
1952 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
1953 }
1954 // ApproximateOffsetOf() is not yet implemented in plain table format.
1955 } while (ChangeOptions(kSkipPlainTable));
1956 }
1957 #endif // ROCKSDB_LITE
1958
1959 #ifndef ROCKSDB_LITE
1960 TEST_F(DBTest, Snapshot) {
1961 env_->SetMockSleep();
1962 anon::OptionsOverride options_override;
1963 options_override.skip_policy = kSkipNoSnapshot;
1964 do {
1965 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
1966 ASSERT_OK(Put(0, "foo", "0v1"));
1967 ASSERT_OK(Put(1, "foo", "1v1"));
1968
1969 const Snapshot* s1 = db_->GetSnapshot();
1970 ASSERT_EQ(1U, GetNumSnapshots());
1971 uint64_t time_snap1 = GetTimeOldestSnapshots();
1972 ASSERT_GT(time_snap1, 0U);
1973 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
1974 ASSERT_EQ(GetTimeOldestSnapshots(),
1975 static_cast<uint64_t>(s1->GetUnixTime()));
1976 ASSERT_OK(Put(0, "foo", "0v2"));
1977 ASSERT_OK(Put(1, "foo", "1v2"));
1978
1979 env_->MockSleepForSeconds(1);
1980
1981 const Snapshot* s2 = db_->GetSnapshot();
1982 ASSERT_EQ(2U, GetNumSnapshots());
1983 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
1984 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
1985 ASSERT_EQ(GetTimeOldestSnapshots(),
1986 static_cast<uint64_t>(s1->GetUnixTime()));
1987 ASSERT_OK(Put(0, "foo", "0v3"));
1988 ASSERT_OK(Put(1, "foo", "1v3"));
1989
1990 {
1991 ManagedSnapshot s3(db_);
1992 ASSERT_EQ(3U, GetNumSnapshots());
1993 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
1994 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
1995 ASSERT_EQ(GetTimeOldestSnapshots(),
1996 static_cast<uint64_t>(s1->GetUnixTime()));
1997
1998 ASSERT_OK(Put(0, "foo", "0v4"));
1999 ASSERT_OK(Put(1, "foo", "1v4"));
2000 ASSERT_EQ("0v1", Get(0, "foo", s1));
2001 ASSERT_EQ("1v1", Get(1, "foo", s1));
2002 ASSERT_EQ("0v2", Get(0, "foo", s2));
2003 ASSERT_EQ("1v2", Get(1, "foo", s2));
2004 ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
2005 ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
2006 ASSERT_EQ("0v4", Get(0, "foo"));
2007 ASSERT_EQ("1v4", Get(1, "foo"));
2008 }
2009
2010 ASSERT_EQ(2U, GetNumSnapshots());
2011 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
2012 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
2013 ASSERT_EQ(GetTimeOldestSnapshots(),
2014 static_cast<uint64_t>(s1->GetUnixTime()));
2015 ASSERT_EQ("0v1", Get(0, "foo", s1));
2016 ASSERT_EQ("1v1", Get(1, "foo", s1));
2017 ASSERT_EQ("0v2", Get(0, "foo", s2));
2018 ASSERT_EQ("1v2", Get(1, "foo", s2));
2019 ASSERT_EQ("0v4", Get(0, "foo"));
2020 ASSERT_EQ("1v4", Get(1, "foo"));
2021
2022 db_->ReleaseSnapshot(s1);
2023 ASSERT_EQ("0v2", Get(0, "foo", s2));
2024 ASSERT_EQ("1v2", Get(1, "foo", s2));
2025 ASSERT_EQ("0v4", Get(0, "foo"));
2026 ASSERT_EQ("1v4", Get(1, "foo"));
2027 ASSERT_EQ(1U, GetNumSnapshots());
2028 ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
2029 ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber());
2030 ASSERT_EQ(GetTimeOldestSnapshots(),
2031 static_cast<uint64_t>(s2->GetUnixTime()));
2032
2033 db_->ReleaseSnapshot(s2);
2034 ASSERT_EQ(0U, GetNumSnapshots());
2035 ASSERT_EQ(GetSequenceOldestSnapshots(), 0);
2036 ASSERT_EQ("0v4", Get(0, "foo"));
2037 ASSERT_EQ("1v4", Get(1, "foo"));
2038 } while (ChangeOptions());
2039 }
2040
2041 TEST_F(DBTest, HiddenValuesAreRemoved) {
2042 anon::OptionsOverride options_override;
2043 options_override.skip_policy = kSkipNoSnapshot;
2044 uint64_t size;
2045 do {
2046 Options options = CurrentOptions(options_override);
2047 CreateAndReopenWithCF({"pikachu"}, options);
2048 Random rnd(301);
2049 FillLevels("a", "z", 1);
2050
2051 std::string big = rnd.RandomString(50000);
2052 ASSERT_OK(Put(1, "foo", big));
2053 ASSERT_OK(Put(1, "pastfoo", "v"));
2054 const Snapshot* snapshot = db_->GetSnapshot();
2055 ASSERT_OK(Put(1, "foo", "tiny"));
2056 ASSERT_OK(Put(1, "pastfoo2", "v2")); // Advance sequence number one more
2057
2058 ASSERT_OK(Flush(1));
2059 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
2060
2061 ASSERT_EQ(big, Get(1, "foo", snapshot));
2062 ASSERT_OK(Size("", "pastfoo", 1, &size));
2063 ASSERT_TRUE(Between(size, 50000, 60000));
2064 db_->ReleaseSnapshot(snapshot);
2065 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
2066 Slice x("x");
2067 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]));
2068 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2069 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2070 ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
2071 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]));
2072 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2073
2074 ASSERT_OK(Size("", "pastfoo", 1, &size));
2075 ASSERT_TRUE(Between(size, 0, 1000));
2076 // ApproximateOffsetOf() is not yet implemented in plain table format,
2077 // which is used by Size().
2078 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
2079 kSkipPlainTable));
2080 }
2081 #endif // ROCKSDB_LITE
2082
2083 TEST_F(DBTest, UnremovableSingleDelete) {
2084 // If we compact:
2085 //
2086 // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
2087 //
2088 // We do not want to end up with:
2089 //
2090 // Put(A, v1) Snapshot Put(A, v2)
2091 //
2092 // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
2093 // but not Put(A, v1), so Get(A) would return v1.
2094 anon::OptionsOverride options_override;
2095 options_override.skip_policy = kSkipNoSnapshot;
2096 do {
2097 Options options = CurrentOptions(options_override);
2098 options.disable_auto_compactions = true;
2099 CreateAndReopenWithCF({"pikachu"}, options);
2100
2101 ASSERT_OK(Put(1, "foo", "first"));
2102 const Snapshot* snapshot = db_->GetSnapshot();
2103 ASSERT_OK(SingleDelete(1, "foo"));
2104 ASSERT_OK(Put(1, "foo", "second"));
2105 ASSERT_OK(Flush(1));
2106
2107 ASSERT_EQ("first", Get(1, "foo", snapshot));
2108 ASSERT_EQ("second", Get(1, "foo"));
2109
2110 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1],
2111 nullptr, nullptr));
2112 ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
2113
2114 ASSERT_OK(SingleDelete(1, "foo"));
2115
2116 ASSERT_EQ("first", Get(1, "foo", snapshot));
2117 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2118
2119 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1],
2120 nullptr, nullptr));
2121
2122 ASSERT_EQ("first", Get(1, "foo", snapshot));
2123 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2124 db_->ReleaseSnapshot(snapshot);
2125 // Skip FIFO and universal compaction because they do not apply to the test
2126 // case. Skip MergePut because single delete does not get removed when it
2127 // encounters a merge.
2128 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
2129 kSkipMergePut));
2130 }
2131
2132 #ifndef ROCKSDB_LITE
2133 TEST_F(DBTest, DeletionMarkers1) {
2134 Options options = CurrentOptions();
2135 CreateAndReopenWithCF({"pikachu"}, options);
2136 ASSERT_OK(Put(1, "foo", "v1"));
2137 ASSERT_OK(Flush(1));
2138 const int last = 2;
2139 MoveFilesToLevel(last, 1);
2140 // foo => v1 is now in last level
2141 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2142
2143 // Place a table at level last-1 to prevent merging with preceding mutation
2144 ASSERT_OK(Put(1, "a", "begin"));
2145 ASSERT_OK(Put(1, "z", "end"));
2146 ASSERT_OK(Flush(1));
2147 MoveFilesToLevel(last - 1, 1);
2148 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2149 ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
2150
2151 ASSERT_OK(Delete(1, "foo"));
2152 ASSERT_OK(Put(1, "foo", "v2"));
2153 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
2154 ASSERT_OK(Flush(1)); // Moves to level last-2
2155 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2156 Slice z("z");
2157 ASSERT_OK(dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]));
2158 // DEL eliminated, but v1 remains because we aren't compacting that level
2159 // (DEL can be eliminated because v2 hides v1).
2160 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2161 ASSERT_OK(
2162 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]));
2163 // Merging last-1 w/ last, so we are the base level for "foo", so
2164 // DEL is removed. (as is v1).
2165 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
2166 }
2167
2168 TEST_F(DBTest, DeletionMarkers2) {
2169 Options options = CurrentOptions();
2170 CreateAndReopenWithCF({"pikachu"}, options);
2171 ASSERT_OK(Put(1, "foo", "v1"));
2172 ASSERT_OK(Flush(1));
2173 const int last = 2;
2174 MoveFilesToLevel(last, 1);
2175 // foo => v1 is now in last level
2176 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2177
2178 // Place a table at level last-1 to prevent merging with preceding mutation
2179 ASSERT_OK(Put(1, "a", "begin"));
2180 ASSERT_OK(Put(1, "z", "end"));
2181 ASSERT_OK(Flush(1));
2182 MoveFilesToLevel(last - 1, 1);
2183 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2184 ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
2185
2186 ASSERT_OK(Delete(1, "foo"));
2187 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2188 ASSERT_OK(Flush(1)); // Moves to level last-2
2189 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2190 ASSERT_OK(
2191 dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]));
2192 // DEL kept: "last" file overlaps
2193 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2194 ASSERT_OK(
2195 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]));
2196 // Merging last-1 w/ last, so we are the base level for "foo", so
2197 // DEL is removed. (as is v1).
2198 ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
2199 }
2200
2201 TEST_F(DBTest, OverlapInLevel0) {
2202 do {
2203 Options options = CurrentOptions();
2204 CreateAndReopenWithCF({"pikachu"}, options);
2205
2206 // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
2207 // 0.
2208 ASSERT_OK(Put(1, "100", "v100"));
2209 ASSERT_OK(Put(1, "999", "v999"));
2210 ASSERT_OK(Flush(1));
2211 MoveFilesToLevel(2, 1);
2212 ASSERT_OK(Delete(1, "100"));
2213 ASSERT_OK(Delete(1, "999"));
2214 ASSERT_OK(Flush(1));
2215 MoveFilesToLevel(1, 1);
2216 ASSERT_EQ("0,1,1", FilesPerLevel(1));
2217
2218 // Make files spanning the following ranges in level-0:
2219 // files[0] 200 .. 900
2220 // files[1] 300 .. 500
2221 // Note that files are sorted by smallest key.
2222 ASSERT_OK(Put(1, "300", "v300"));
2223 ASSERT_OK(Put(1, "500", "v500"));
2224 ASSERT_OK(Flush(1));
2225 ASSERT_OK(Put(1, "200", "v200"));
2226 ASSERT_OK(Put(1, "600", "v600"));
2227 ASSERT_OK(Put(1, "900", "v900"));
2228 ASSERT_OK(Flush(1));
2229 ASSERT_EQ("2,1,1", FilesPerLevel(1));
2230
2231 // BEGIN addition to existing test
2232 // Take this opportunity to verify SST unique ids (including Plain table)
2233 TablePropertiesCollection tbc;
2234 ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &tbc));
2235 VerifySstUniqueIds(tbc);
2236 // END addition to existing test
2237
2238 // Compact away the placeholder files we created initially
2239 ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
2240 ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]));
2241 ASSERT_EQ("2", FilesPerLevel(1));
2242
2243 // Do a memtable compaction. Before bug-fix, the compaction would
2244 // not detect the overlap with level-0 files and would incorrectly place
2245 // the deletion in a deeper level.
2246 ASSERT_OK(Delete(1, "600"));
2247 ASSERT_OK(Flush(1));
2248 ASSERT_EQ("3", FilesPerLevel(1));
2249 ASSERT_EQ("NOT_FOUND", Get(1, "600"));
2250 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
2251 }
2252 #endif // ROCKSDB_LITE
2253
2254 TEST_F(DBTest, ComparatorCheck) {
2255 class NewComparator : public Comparator {
2256 public:
2257 const char* Name() const override { return "rocksdb.NewComparator"; }
2258 int Compare(const Slice& a, const Slice& b) const override {
2259 return BytewiseComparator()->Compare(a, b);
2260 }
2261 void FindShortestSeparator(std::string* s, const Slice& l) const override {
2262 BytewiseComparator()->FindShortestSeparator(s, l);
2263 }
2264 void FindShortSuccessor(std::string* key) const override {
2265 BytewiseComparator()->FindShortSuccessor(key);
2266 }
2267 };
2268 Options new_options, options;
2269 NewComparator cmp;
2270 do {
2271 options = CurrentOptions();
2272 CreateAndReopenWithCF({"pikachu"}, options);
2273 new_options = CurrentOptions();
2274 new_options.comparator = &cmp;
2275 // only the non-default column family has non-matching comparator
2276 Status s = TryReopenWithColumnFamilies(
2277 {"default", "pikachu"}, std::vector<Options>({options, new_options}));
2278 ASSERT_TRUE(!s.ok());
2279 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
2280 << s.ToString();
2281 } while (ChangeCompactOptions());
2282 }
2283
2284 TEST_F(DBTest, CustomComparator) {
2285 class NumberComparator : public Comparator {
2286 public:
2287 const char* Name() const override { return "test.NumberComparator"; }
2288 int Compare(const Slice& a, const Slice& b) const override {
2289 return ToNumber(a) - ToNumber(b);
2290 }
2291 void FindShortestSeparator(std::string* s, const Slice& l) const override {
2292 ToNumber(*s); // Check format
2293 ToNumber(l); // Check format
2294 }
2295 void FindShortSuccessor(std::string* key) const override {
2296 ToNumber(*key); // Check format
2297 }
2298
2299 private:
2300 static int ToNumber(const Slice& x) {
2301 // Check that there are no extra characters.
2302 EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
2303 << EscapeString(x);
2304 int val;
2305 char ignored;
2306 EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
2307 << EscapeString(x);
2308 return val;
2309 }
2310 };
2311 Options new_options;
2312 NumberComparator cmp;
2313 do {
2314 new_options = CurrentOptions();
2315 new_options.create_if_missing = true;
2316 new_options.comparator = &cmp;
2317 new_options.write_buffer_size = 4096; // Compact more often
2318 new_options.arena_block_size = 4096;
2319 new_options = CurrentOptions(new_options);
2320 DestroyAndReopen(new_options);
2321 CreateAndReopenWithCF({"pikachu"}, new_options);
2322 ASSERT_OK(Put(1, "[10]", "ten"));
2323 ASSERT_OK(Put(1, "[0x14]", "twenty"));
2324 for (int i = 0; i < 2; i++) {
2325 ASSERT_EQ("ten", Get(1, "[10]"));
2326 ASSERT_EQ("ten", Get(1, "[0xa]"));
2327 ASSERT_EQ("twenty", Get(1, "[20]"));
2328 ASSERT_EQ("twenty", Get(1, "[0x14]"));
2329 ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
2330 ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
2331 Compact(1, "[0]", "[9999]");
2332 }
2333
2334 for (int run = 0; run < 2; run++) {
2335 for (int i = 0; i < 1000; i++) {
2336 char buf[100];
2337 snprintf(buf, sizeof(buf), "[%d]", i * 10);
2338 ASSERT_OK(Put(1, buf, buf));
2339 }
2340 Compact(1, "[0]", "[1000000]");
2341 }
2342 } while (ChangeCompactOptions());
2343 }
2344
2345 TEST_F(DBTest, DBOpen_Options) {
2346 Options options = CurrentOptions();
2347 std::string dbname = test::PerThreadDBPath("db_options_test");
2348 ASSERT_OK(DestroyDB(dbname, options));
2349
2350 // Does not exist, and create_if_missing == false: error
2351 DB* db = nullptr;
2352 options.create_if_missing = false;
2353 Status s = DB::Open(options, dbname, &db);
2354 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
2355 ASSERT_TRUE(db == nullptr);
2356
2357 // Does not exist, and create_if_missing == true: OK
2358 options.create_if_missing = true;
2359 s = DB::Open(options, dbname, &db);
2360 ASSERT_OK(s);
2361 ASSERT_TRUE(db != nullptr);
2362
2363 delete db;
2364 db = nullptr;
2365
2366 // Does exist, and error_if_exists == true: error
2367 options.create_if_missing = false;
2368 options.error_if_exists = true;
2369 s = DB::Open(options, dbname, &db);
2370 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
2371 ASSERT_TRUE(db == nullptr);
2372
2373 // Does exist, and error_if_exists == false: OK
2374 options.create_if_missing = true;
2375 options.error_if_exists = false;
2376 s = DB::Open(options, dbname, &db);
2377 ASSERT_OK(s);
2378 ASSERT_TRUE(db != nullptr);
2379
2380 delete db;
2381 db = nullptr;
2382 }
2383
2384 TEST_F(DBTest, DBOpen_Change_NumLevels) {
2385 Options options = CurrentOptions();
2386 options.create_if_missing = true;
2387 DestroyAndReopen(options);
2388 ASSERT_TRUE(db_ != nullptr);
2389 CreateAndReopenWithCF({"pikachu"}, options);
2390
2391 ASSERT_OK(Put(1, "a", "123"));
2392 ASSERT_OK(Put(1, "b", "234"));
2393 ASSERT_OK(Flush(1));
2394 MoveFilesToLevel(3, 1);
2395 Close();
2396
2397 options.create_if_missing = false;
2398 options.num_levels = 2;
2399 Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
2400 ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
2401 ASSERT_TRUE(db_ == nullptr);
2402 }
2403
2404 TEST_F(DBTest, DestroyDBMetaDatabase) {
2405 std::string dbname = test::PerThreadDBPath("db_meta");
2406 ASSERT_OK(env_->CreateDirIfMissing(dbname));
2407 std::string metadbname = MetaDatabaseName(dbname, 0);
2408 ASSERT_OK(env_->CreateDirIfMissing(metadbname));
2409 std::string metametadbname = MetaDatabaseName(metadbname, 0);
2410 ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
2411
2412 // Destroy previous versions if they exist. Using the long way.
2413 Options options = CurrentOptions();
2414 ASSERT_OK(DestroyDB(metametadbname, options));
2415 ASSERT_OK(DestroyDB(metadbname, options));
2416 ASSERT_OK(DestroyDB(dbname, options));
2417
2418 // Setup databases
2419 DB* db = nullptr;
2420 ASSERT_OK(DB::Open(options, dbname, &db));
2421 delete db;
2422 db = nullptr;
2423 ASSERT_OK(DB::Open(options, metadbname, &db));
2424 delete db;
2425 db = nullptr;
2426 ASSERT_OK(DB::Open(options, metametadbname, &db));
2427 delete db;
2428 db = nullptr;
2429
2430 // Delete databases
2431 ASSERT_OK(DestroyDB(dbname, options));
2432
2433 // Check if deletion worked.
2434 options.create_if_missing = false;
2435 ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
2436 ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
2437 ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
2438 }
2439
2440 #ifndef ROCKSDB_LITE
2441 TEST_F(DBTest, SnapshotFiles) {
2442 do {
2443 Options options = CurrentOptions();
2444 options.write_buffer_size = 100000000; // Large write buffer
2445 CreateAndReopenWithCF({"pikachu"}, options);
2446
2447 Random rnd(301);
2448
2449 // Write 8MB (80 values, each 100K)
2450 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2451 std::vector<std::string> values;
2452 for (int i = 0; i < 80; i++) {
2453 values.push_back(rnd.RandomString(100000));
2454 ASSERT_OK(Put((i < 40), Key(i), values[i]));
2455 }
2456
2457 // assert that nothing makes it to disk yet.
2458 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2459
2460 // get a file snapshot
2461 uint64_t manifest_number = 0;
2462 uint64_t manifest_size = 0;
2463 std::vector<std::string> files;
2464 ASSERT_OK(dbfull()->DisableFileDeletions());
2465 ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size));
2466
2467 // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF)
2468 ASSERT_EQ(files.size(), 5U);
2469
2470 uint64_t number = 0;
2471 FileType type;
2472
2473 // copy these files to a new snapshot directory
2474 std::string snapdir = dbname_ + ".snapdir/";
2475 if (env_->FileExists(snapdir).ok()) {
2476 ASSERT_OK(DestroyDir(env_, snapdir));
2477 }
2478 ASSERT_OK(env_->CreateDir(snapdir));
2479
2480 for (size_t i = 0; i < files.size(); i++) {
2481 // our clients require that GetLiveFiles returns
2482 // files with "/" as first character!
2483 ASSERT_EQ(files[i][0], '/');
2484 std::string src = dbname_ + files[i];
2485 std::string dest = snapdir + files[i];
2486
2487 uint64_t size;
2488 ASSERT_OK(env_->GetFileSize(src, &size));
2489
2490 // record the number and the size of the
2491 // latest manifest file
2492 if (ParseFileName(files[i].substr(1), &number, &type)) {
2493 if (type == kDescriptorFile) {
2494 ASSERT_EQ(manifest_number, 0);
2495 manifest_number = number;
2496 ASSERT_GE(size, manifest_size);
2497 size = manifest_size; // copy only valid MANIFEST data
2498 }
2499 }
2500 CopyFile(src, dest, size);
2501 }
2502
2503 // release file snapshot
2504 ASSERT_OK(dbfull()->EnableFileDeletions(/*force*/ false));
2505 // overwrite one key, this key should not appear in the snapshot
2506 std::vector<std::string> extras;
2507 for (unsigned int i = 0; i < 1; i++) {
2508 extras.push_back(rnd.RandomString(100000));
2509 ASSERT_OK(Put(0, Key(i), extras[i]));
2510 }
2511
2512 // verify that data in the snapshot are correct
2513 std::vector<ColumnFamilyDescriptor> column_families;
2514 column_families.emplace_back("default", ColumnFamilyOptions());
2515 column_families.emplace_back("pikachu", ColumnFamilyOptions());
2516 std::vector<ColumnFamilyHandle*> cf_handles;
2517 DB* snapdb;
2518 DBOptions opts;
2519 opts.env = env_;
2520 opts.create_if_missing = false;
2521 Status stat =
2522 DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
2523 ASSERT_OK(stat);
2524
2525 ReadOptions roptions;
2526 std::string val;
2527 for (unsigned int i = 0; i < 80; i++) {
2528 ASSERT_OK(snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val));
2529 ASSERT_EQ(values[i].compare(val), 0);
2530 }
2531 for (auto cfh : cf_handles) {
2532 delete cfh;
2533 }
2534 delete snapdb;
2535
2536 // look at the new live files after we added an 'extra' key
2537 // and after we took the first snapshot.
2538 uint64_t new_manifest_number = 0;
2539 uint64_t new_manifest_size = 0;
2540 std::vector<std::string> newfiles;
2541 ASSERT_OK(dbfull()->DisableFileDeletions());
2542 ASSERT_OK(dbfull()->GetLiveFiles(newfiles, &new_manifest_size));
2543
2544 // find the new manifest file. assert that this manifest file is
2545 // the same one as in the previous snapshot. But its size should be
2546 // larger because we added an extra key after taking the
2547 // previous shapshot.
2548 for (size_t i = 0; i < newfiles.size(); i++) {
2549 std::string src = dbname_ + "/" + newfiles[i];
2550 // record the lognumber and the size of the
2551 // latest manifest file
2552 if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
2553 if (type == kDescriptorFile) {
2554 ASSERT_EQ(new_manifest_number, 0);
2555 uint64_t size;
2556 new_manifest_number = number;
2557 ASSERT_OK(env_->GetFileSize(src, &size));
2558 ASSERT_GE(size, new_manifest_size);
2559 }
2560 }
2561 }
2562 ASSERT_EQ(manifest_number, new_manifest_number);
2563 ASSERT_GT(new_manifest_size, manifest_size);
2564
2565 // Also test GetLiveFilesStorageInfo
2566 std::vector<LiveFileStorageInfo> new_infos;
2567 ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
2568 &new_infos));
2569
2570 // Close DB (while deletions disabled)
2571 Close();
2572
2573 // Validate
2574 for (auto& info : new_infos) {
2575 std::string path = info.directory + "/" + info.relative_filename;
2576 uint64_t size;
2577 ASSERT_OK(env_->GetFileSize(path, &size));
2578 if (info.trim_to_size) {
2579 ASSERT_LE(info.size, size);
2580 } else if (!info.replacement_contents.empty()) {
2581 ASSERT_EQ(info.size, info.replacement_contents.size());
2582 } else {
2583 ASSERT_EQ(info.size, size);
2584 }
2585 if (info.file_type == kDescriptorFile) {
2586 ASSERT_EQ(info.file_number, manifest_number);
2587 }
2588 }
2589 } while (ChangeCompactOptions());
2590 }
2591
2592 TEST_F(DBTest, ReadonlyDBGetLiveManifestSize) {
2593 do {
2594 Options options = CurrentOptions();
2595 options.level0_file_num_compaction_trigger = 2;
2596 DestroyAndReopen(options);
2597
2598 ASSERT_OK(Put("foo", "bar"));
2599 ASSERT_OK(Flush());
2600 ASSERT_OK(Put("foo", "bar"));
2601 ASSERT_OK(Flush());
2602 ASSERT_OK(dbfull()->TEST_WaitForCompact());
2603
2604 Close();
2605 ASSERT_OK(ReadOnlyReopen(options));
2606
2607 uint64_t manifest_size = 0;
2608 std::vector<std::string> files;
2609 ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size));
2610
2611 for (const std::string& f : files) {
2612 uint64_t number = 0;
2613 FileType type;
2614 if (ParseFileName(f.substr(1), &number, &type)) {
2615 if (type == kDescriptorFile) {
2616 uint64_t size_on_disk;
2617 ASSERT_OK(env_->GetFileSize(dbname_ + "/" + f, &size_on_disk));
2618 ASSERT_EQ(manifest_size, size_on_disk);
2619 break;
2620 }
2621 }
2622 }
2623 Close();
2624 } while (ChangeCompactOptions());
2625 }
2626
2627 TEST_F(DBTest, GetLiveBlobFiles) {
2628 // Note: the following prevents an otherwise harmless data race between the
2629 // test setup code (AddBlobFile) below and the periodic stat dumping thread.
2630 Options options = CurrentOptions();
2631 options.stats_dump_period_sec = 0;
2632
2633 constexpr uint64_t blob_file_number = 234;
2634 constexpr uint64_t total_blob_count = 555;
2635 constexpr uint64_t total_blob_bytes = 66666;
2636 constexpr char checksum_method[] = "CRC32";
2637 constexpr char checksum_value[] = "\x3d\x87\xff\x57";
2638 constexpr uint64_t garbage_blob_count = 0;
2639 constexpr uint64_t garbage_blob_bytes = 0;
2640
2641 Reopen(options);
2642
2643 AddBlobFile(db_->DefaultColumnFamily(), blob_file_number, total_blob_count,
2644 total_blob_bytes, checksum_method, checksum_value,
2645 garbage_blob_count, garbage_blob_bytes);
2646 // Make sure it appears in the results returned by GetLiveFiles.
2647 uint64_t manifest_size = 0;
2648 std::vector<std::string> files;
2649 ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size));
2650
2651 ASSERT_FALSE(files.empty());
2652 ASSERT_EQ(files[0], BlobFileName("", blob_file_number));
2653
2654 ColumnFamilyMetaData cfmd;
2655
2656 db_->GetColumnFamilyMetaData(&cfmd);
2657 ASSERT_EQ(cfmd.blob_files.size(), 1);
2658 const BlobMetaData& bmd = cfmd.blob_files[0];
2659
2660 CheckBlobMetaData(bmd, blob_file_number, total_blob_count, total_blob_bytes,
2661 checksum_method, checksum_value, garbage_blob_count,
2662 garbage_blob_bytes);
2663 ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_));
2664 ASSERT_EQ(cfmd.blob_file_count, 1U);
2665 ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size);
2666 }
2667 #endif
2668
2669 TEST_F(DBTest, PurgeInfoLogs) {
2670 Options options = CurrentOptions();
2671 options.keep_log_file_num = 5;
2672 options.create_if_missing = true;
2673 options.env = env_;
2674 for (int mode = 0; mode <= 1; mode++) {
2675 if (mode == 1) {
2676 options.db_log_dir = dbname_ + "_logs";
2677 ASSERT_OK(env_->CreateDirIfMissing(options.db_log_dir));
2678 } else {
2679 options.db_log_dir = "";
2680 }
2681 for (int i = 0; i < 8; i++) {
2682 Reopen(options);
2683 }
2684
2685 std::vector<std::string> files;
2686 ASSERT_OK(env_->GetChildren(
2687 options.db_log_dir.empty() ? dbname_ : options.db_log_dir, &files));
2688 int info_log_count = 0;
2689 for (std::string file : files) {
2690 if (file.find("LOG") != std::string::npos) {
2691 info_log_count++;
2692 }
2693 }
2694 ASSERT_EQ(5, info_log_count);
2695
2696 Destroy(options);
2697 // For mode (1), test DestroyDB() to delete all the logs under DB dir.
2698 // For mode (2), no info log file should have been put under DB dir.
2699 // Since dbname_ has no children, there is no need to loop db_files
2700 std::vector<std::string> db_files;
2701 ASSERT_TRUE(env_->GetChildren(dbname_, &db_files).IsNotFound());
2702 ASSERT_TRUE(db_files.empty());
2703
2704 if (mode == 1) {
2705 // Cleaning up
2706 ASSERT_OK(env_->GetChildren(options.db_log_dir, &files));
2707 for (std::string file : files) {
2708 ASSERT_OK(env_->DeleteFile(options.db_log_dir + "/" + file));
2709 }
2710 ASSERT_OK(env_->DeleteDir(options.db_log_dir));
2711 }
2712 }
2713 }
2714
2715 #ifndef ROCKSDB_LITE
2716 // Multi-threaded test:
2717 namespace {
2718
2719 static const int kColumnFamilies = 10;
2720 static const int kNumThreads = 10;
2721 static const int kTestSeconds = 10;
2722 static const int kNumKeys = 1000;
2723
2724 struct MTState {
2725 DBTest* test;
2726 std::atomic<int> counter[kNumThreads];
2727 };
2728
2729 struct MTThread {
2730 MTState* state;
2731 int id;
2732 bool multiget_batched;
2733 };
2734
2735 static void MTThreadBody(void* arg) {
2736 MTThread* t = reinterpret_cast<MTThread*>(arg);
2737 int id = t->id;
2738 DB* db = t->state->test->db_;
2739 int counter = 0;
2740 std::shared_ptr<SystemClock> clock = SystemClock::Default();
2741 auto end_micros = clock->NowMicros() + kTestSeconds * 1000000U;
2742
2743 fprintf(stderr, "... starting thread %d\n", id);
2744 Random rnd(1000 + id);
2745 char valbuf[1500];
2746 while (clock->NowMicros() < end_micros) {
2747 t->state->counter[id].store(counter, std::memory_order_release);
2748
2749 int key = rnd.Uniform(kNumKeys);
2750 char keybuf[20];
2751 snprintf(keybuf, sizeof(keybuf), "%016d", key);
2752
2753 if (rnd.OneIn(2)) {
2754 // Write values of the form <key, my id, counter, cf, unique_id>.
2755 // into each of the CFs
2756 // We add some padding for force compactions.
2757 int unique_id = rnd.Uniform(1000000);
2758
2759 // Half of the time directly use WriteBatch. Half of the time use
2760 // WriteBatchWithIndex.
2761 if (rnd.OneIn(2)) {
2762 WriteBatch batch;
2763 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2764 snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
2765 static_cast<int>(counter), cf, unique_id);
2766 ASSERT_OK(batch.Put(t->state->test->handles_[cf], Slice(keybuf),
2767 Slice(valbuf)));
2768 }
2769 ASSERT_OK(db->Write(WriteOptions(), &batch));
2770 } else {
2771 WriteBatchWithIndex batch(db->GetOptions().comparator);
2772 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2773 snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
2774 static_cast<int>(counter), cf, unique_id);
2775 ASSERT_OK(batch.Put(t->state->test->handles_[cf], Slice(keybuf),
2776 Slice(valbuf)));
2777 }
2778 ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
2779 }
2780 } else {
2781 // Read a value and verify that it matches the pattern written above
2782 // and that writes to all column families were atomic (unique_id is the
2783 // same)
2784 std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
2785 std::vector<std::string> values;
2786 std::vector<Status> statuses;
2787 if (!t->multiget_batched) {
2788 statuses = db->MultiGet(ReadOptions(), t->state->test->handles_, keys,
2789 &values);
2790 } else {
2791 std::vector<PinnableSlice> pin_values(keys.size());
2792 statuses.resize(keys.size());
2793 const Snapshot* snapshot = db->GetSnapshot();
2794 ReadOptions ro;
2795 ro.snapshot = snapshot;
2796 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2797 db->MultiGet(ro, t->state->test->handles_[cf], 1, &keys[cf],
2798 &pin_values[cf], &statuses[cf]);
2799 }
2800 db->ReleaseSnapshot(snapshot);
2801 values.resize(keys.size());
2802 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2803 if (statuses[cf].ok()) {
2804 values[cf].assign(pin_values[cf].data(), pin_values[cf].size());
2805 }
2806 }
2807 }
2808 Status s = statuses[0];
2809 // all statuses have to be the same
2810 for (size_t i = 1; i < statuses.size(); ++i) {
2811 // they are either both ok or both not-found
2812 ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
2813 (s.IsNotFound() && statuses[i].IsNotFound()));
2814 }
2815 if (s.IsNotFound()) {
2816 // Key has not yet been written
2817 } else {
2818 // Check that the writer thread counter is >= the counter in the value
2819 ASSERT_OK(s);
2820 int unique_id = -1;
2821 for (int i = 0; i < kColumnFamilies; ++i) {
2822 int k, w, c, cf, u;
2823 ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c,
2824 &cf, &u))
2825 << values[i];
2826 ASSERT_EQ(k, key);
2827 ASSERT_GE(w, 0);
2828 ASSERT_LT(w, kNumThreads);
2829 ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
2830 ASSERT_EQ(cf, i);
2831 if (i == 0) {
2832 unique_id = u;
2833 } else {
2834 // this checks that updates across column families happened
2835 // atomically -- all unique ids are the same
2836 ASSERT_EQ(u, unique_id);
2837 }
2838 }
2839 }
2840 }
2841 counter++;
2842 }
2843 fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
2844 }
2845
2846 } // anonymous namespace
2847
2848 class MultiThreadedDBTest
2849 : public DBTest,
2850 public ::testing::WithParamInterface<std::tuple<int, bool>> {
2851 public:
2852 void SetUp() override {
2853 std::tie(option_config_, multiget_batched_) = GetParam();
2854 }
2855
2856 static std::vector<int> GenerateOptionConfigs() {
2857 std::vector<int> optionConfigs;
2858 for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
2859 optionConfigs.push_back(optionConfig);
2860 }
2861 return optionConfigs;
2862 }
2863
2864 bool multiget_batched_;
2865 };
2866
2867 TEST_P(MultiThreadedDBTest, MultiThreaded) {
2868 if (option_config_ == kPipelinedWrite) return;
2869 anon::OptionsOverride options_override;
2870 options_override.skip_policy = kSkipNoSnapshot;
2871 Options options = CurrentOptions(options_override);
2872 std::vector<std::string> cfs;
2873 for (int i = 1; i < kColumnFamilies; ++i) {
2874 cfs.push_back(std::to_string(i));
2875 }
2876 Reopen(options);
2877 CreateAndReopenWithCF(cfs, options);
2878 // Initialize state
2879 MTState mt;
2880 mt.test = this;
2881 for (int id = 0; id < kNumThreads; id++) {
2882 mt.counter[id].store(0, std::memory_order_release);
2883 }
2884
2885 // Start threads
2886 MTThread thread[kNumThreads];
2887 for (int id = 0; id < kNumThreads; id++) {
2888 thread[id].state = &mt;
2889 thread[id].id = id;
2890 thread[id].multiget_batched = multiget_batched_;
2891 env_->StartThread(MTThreadBody, &thread[id]);
2892 }
2893
2894 env_->WaitForJoin();
2895 }
2896
2897 INSTANTIATE_TEST_CASE_P(
2898 MultiThreaded, MultiThreadedDBTest,
2899 ::testing::Combine(
2900 ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()),
2901 ::testing::Bool()));
2902 #endif // ROCKSDB_LITE
2903
2904 // Group commit test:
2905 #if !defined(OS_WIN)
2906 // Disable this test temporarily on Travis and appveyor as it fails
2907 // intermittently. Github issue: #4151
2908 namespace {
2909
2910 static const int kGCNumThreads = 4;
2911 static const int kGCNumKeys = 1000;
2912
2913 struct GCThread {
2914 DB* db;
2915 int id;
2916 std::atomic<bool> done;
2917 };
2918
2919 static void GCThreadBody(void* arg) {
2920 GCThread* t = reinterpret_cast<GCThread*>(arg);
2921 int id = t->id;
2922 DB* db = t->db;
2923 WriteOptions wo;
2924
2925 for (int i = 0; i < kGCNumKeys; ++i) {
2926 std::string kv(std::to_string(i + id * kGCNumKeys));
2927 ASSERT_OK(db->Put(wo, kv, kv));
2928 }
2929 t->done = true;
2930 }
2931
2932 } // anonymous namespace
2933
2934 TEST_F(DBTest, GroupCommitTest) {
2935 do {
2936 Options options = CurrentOptions();
2937 options.env = env_;
2938 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2939 Reopen(options);
2940
2941 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
2942 {{"WriteThread::JoinBatchGroup:BeganWaiting",
2943 "DBImpl::WriteImpl:BeforeLeaderEnters"},
2944 {"WriteThread::AwaitState:BlockingWaiting",
2945 "WriteThread::EnterAsBatchGroupLeader:End"}});
2946 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
2947
2948 // Start threads
2949 GCThread thread[kGCNumThreads];
2950 for (int id = 0; id < kGCNumThreads; id++) {
2951 thread[id].id = id;
2952 thread[id].db = db_;
2953 thread[id].done = false;
2954 env_->StartThread(GCThreadBody, &thread[id]);
2955 }
2956 env_->WaitForJoin();
2957
2958 ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
2959
2960 std::vector<std::string> expected_db;
2961 for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
2962 expected_db.push_back(std::to_string(i));
2963 }
2964 std::sort(expected_db.begin(), expected_db.end());
2965
2966 Iterator* itr = db_->NewIterator(ReadOptions());
2967 itr->SeekToFirst();
2968 for (auto x : expected_db) {
2969 ASSERT_TRUE(itr->Valid());
2970 ASSERT_EQ(itr->key().ToString(), x);
2971 ASSERT_EQ(itr->value().ToString(), x);
2972 itr->Next();
2973 }
2974 ASSERT_TRUE(!itr->Valid());
2975 delete itr;
2976
2977 HistogramData hist_data;
2978 options.statistics->histogramData(DB_WRITE, &hist_data);
2979 ASSERT_GT(hist_data.average, 0.0);
2980 } while (ChangeOptions(kSkipNoSeekToLast));
2981 }
2982 #endif // OS_WIN
2983
2984 namespace {
2985 using KVMap = std::map<std::string, std::string>;
2986 }
2987
2988 class ModelDB : public DB {
2989 public:
2990 class ModelSnapshot : public Snapshot {
2991 public:
2992 KVMap map_;
2993
2994 SequenceNumber GetSequenceNumber() const override {
2995 // no need to call this
2996 assert(false);
2997 return 0;
2998 }
2999
3000 int64_t GetUnixTime() const override {
3001 // no need to call this
3002 assert(false);
3003 return 0;
3004 }
3005
3006 uint64_t GetTimestamp() const override {
3007 // no need to call this
3008 assert(false);
3009 return 0;
3010 }
3011 };
3012
3013 explicit ModelDB(const Options& options) : options_(options) {}
3014 using DB::Put;
3015 Status Put(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
3016 const Slice& v) override {
3017 WriteBatch batch;
3018 Status s = batch.Put(cf, k, v);
3019 if (!s.ok()) {
3020 return s;
3021 }
3022 return Write(o, &batch);
3023 }
3024 Status Put(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/,
3025 const Slice& /*k*/, const Slice& /*ts*/,
3026 const Slice& /*v*/) override {
3027 return Status::NotSupported();
3028 }
3029
3030 using DB::PutEntity;
3031 Status PutEntity(const WriteOptions& /* options */,
3032 ColumnFamilyHandle* /* column_family */,
3033 const Slice& /* key */,
3034 const WideColumns& /* columns */) override {
3035 return Status::NotSupported();
3036 }
3037
3038 using DB::Close;
3039 Status Close() override { return Status::OK(); }
3040 using DB::Delete;
3041 Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
3042 const Slice& key) override {
3043 WriteBatch batch;
3044 Status s = batch.Delete(cf, key);
3045 if (!s.ok()) {
3046 return s;
3047 }
3048 return Write(o, &batch);
3049 }
3050 Status Delete(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/,
3051 const Slice& /*key*/, const Slice& /*ts*/) override {
3052 return Status::NotSupported();
3053 }
3054 using DB::SingleDelete;
3055 Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
3056 const Slice& key) override {
3057 WriteBatch batch;
3058 Status s = batch.SingleDelete(cf, key);
3059 if (!s.ok()) {
3060 return s;
3061 }
3062 return Write(o, &batch);
3063 }
3064 Status SingleDelete(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/,
3065 const Slice& /*key*/, const Slice& /*ts*/) override {
3066 return Status::NotSupported();
3067 }
3068 using DB::Merge;
3069 Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
3070 const Slice& v) override {
3071 WriteBatch batch;
3072 Status s = batch.Merge(cf, k, v);
3073 if (!s.ok()) {
3074 return s;
3075 }
3076 return Write(o, &batch);
3077 }
3078 Status Merge(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/,
3079 const Slice& /*k*/, const Slice& /*ts*/,
3080 const Slice& /*value*/) override {
3081 return Status::NotSupported();
3082 }
3083 using DB::Get;
3084 Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/,
3085 const Slice& key, PinnableSlice* /*value*/) override {
3086 return Status::NotSupported(key);
3087 }
3088
3089 using DB::GetMergeOperands;
3090 virtual Status GetMergeOperands(
3091 const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/,
3092 const Slice& key, PinnableSlice* /*slice*/,
3093 GetMergeOperandsOptions* /*merge_operands_options*/,
3094 int* /*number_of_operands*/) override {
3095 return Status::NotSupported(key);
3096 }
3097
3098 using DB::MultiGet;
3099 std::vector<Status> MultiGet(
3100 const ReadOptions& /*options*/,
3101 const std::vector<ColumnFamilyHandle*>& /*column_family*/,
3102 const std::vector<Slice>& keys,
3103 std::vector<std::string>* /*values*/) override {
3104 std::vector<Status> s(keys.size(),
3105 Status::NotSupported("Not implemented."));
3106 return s;
3107 }
3108
3109 #ifndef ROCKSDB_LITE
3110 using DB::IngestExternalFile;
3111 Status IngestExternalFile(
3112 ColumnFamilyHandle* /*column_family*/,
3113 const std::vector<std::string>& /*external_files*/,
3114 const IngestExternalFileOptions& /*options*/) override {
3115 return Status::NotSupported("Not implemented.");
3116 }
3117
3118 using DB::IngestExternalFiles;
3119 Status IngestExternalFiles(
3120 const std::vector<IngestExternalFileArg>& /*args*/) override {
3121 return Status::NotSupported("Not implemented");
3122 }
3123
3124 using DB::CreateColumnFamilyWithImport;
3125 virtual Status CreateColumnFamilyWithImport(
3126 const ColumnFamilyOptions& /*options*/,
3127 const std::string& /*column_family_name*/,
3128 const ImportColumnFamilyOptions& /*import_options*/,
3129 const ExportImportFilesMetaData& /*metadata*/,
3130 ColumnFamilyHandle** /*handle*/) override {
3131 return Status::NotSupported("Not implemented.");
3132 }
3133
3134 using DB::VerifyChecksum;
3135 Status VerifyChecksum(const ReadOptions&) override {
3136 return Status::NotSupported("Not implemented.");
3137 }
3138
3139 using DB::GetPropertiesOfAllTables;
3140 Status GetPropertiesOfAllTables(
3141 ColumnFamilyHandle* /*column_family*/,
3142 TablePropertiesCollection* /*props*/) override {
3143 return Status();
3144 }
3145
3146 Status GetPropertiesOfTablesInRange(
3147 ColumnFamilyHandle* /*column_family*/, const Range* /*range*/,
3148 std::size_t /*n*/, TablePropertiesCollection* /*props*/) override {
3149 return Status();
3150 }
3151 #endif // ROCKSDB_LITE
3152
3153 using DB::KeyMayExist;
3154 bool KeyMayExist(const ReadOptions& /*options*/,
3155 ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
3156 std::string* /*value*/,
3157 bool* value_found = nullptr) override {
3158 if (value_found != nullptr) {
3159 *value_found = false;
3160 }
3161 return true; // Not Supported directly
3162 }
3163 using DB::NewIterator;
3164 Iterator* NewIterator(const ReadOptions& options,
3165 ColumnFamilyHandle* /*column_family*/) override {
3166 if (options.snapshot == nullptr) {
3167 KVMap* saved = new KVMap;
3168 *saved = map_;
3169 return new ModelIter(saved, true);
3170 } else {
3171 const KVMap* snapshot_state =
3172 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
3173 return new ModelIter(snapshot_state, false);
3174 }
3175 }
3176 Status NewIterators(const ReadOptions& /*options*/,
3177 const std::vector<ColumnFamilyHandle*>& /*column_family*/,
3178 std::vector<Iterator*>* /*iterators*/) override {
3179 return Status::NotSupported("Not supported yet");
3180 }
3181 const Snapshot* GetSnapshot() override {
3182 ModelSnapshot* snapshot = new ModelSnapshot;
3183 snapshot->map_ = map_;
3184 return snapshot;
3185 }
3186
3187 void ReleaseSnapshot(const Snapshot* snapshot) override {
3188 delete reinterpret_cast<const ModelSnapshot*>(snapshot);
3189 }
3190
3191 Status Write(const WriteOptions& /*options*/, WriteBatch* batch) override {
3192 class Handler : public WriteBatch::Handler {
3193 public:
3194 KVMap* map_;
3195 void Put(const Slice& key, const Slice& value) override {
3196 (*map_)[key.ToString()] = value.ToString();
3197 }
3198 void Merge(const Slice& /*key*/, const Slice& /*value*/) override {
3199 // ignore merge for now
3200 // (*map_)[key.ToString()] = value.ToString();
3201 }
3202 void Delete(const Slice& key) override { map_->erase(key.ToString()); }
3203 };
3204 Handler handler;
3205 handler.map_ = &map_;
3206 return batch->Iterate(&handler);
3207 }
3208
3209 using DB::GetProperty;
3210 bool GetProperty(ColumnFamilyHandle* /*column_family*/,
3211 const Slice& /*property*/, std::string* /*value*/) override {
3212 return false;
3213 }
3214 using DB::GetIntProperty;
3215 bool GetIntProperty(ColumnFamilyHandle* /*column_family*/,
3216 const Slice& /*property*/, uint64_t* /*value*/) override {
3217 return false;
3218 }
3219 using DB::GetMapProperty;
3220 bool GetMapProperty(ColumnFamilyHandle* /*column_family*/,
3221 const Slice& /*property*/,
3222 std::map<std::string, std::string>* /*value*/) override {
3223 return false;
3224 }
3225 using DB::GetAggregatedIntProperty;
3226 bool GetAggregatedIntProperty(const Slice& /*property*/,
3227 uint64_t* /*value*/) override {
3228 return false;
3229 }
3230 using DB::GetApproximateSizes;
3231 Status GetApproximateSizes(const SizeApproximationOptions& /*options*/,
3232 ColumnFamilyHandle* /*column_family*/,
3233 const Range* /*range*/, int n,
3234 uint64_t* sizes) override {
3235 for (int i = 0; i < n; i++) {
3236 sizes[i] = 0;
3237 }
3238 return Status::OK();
3239 }
3240 using DB::GetApproximateMemTableStats;
3241 void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/,
3242 const Range& /*range*/,
3243 uint64_t* const count,
3244 uint64_t* const size) override {
3245 *count = 0;
3246 *size = 0;
3247 }
3248 using DB::CompactRange;
3249 Status CompactRange(const CompactRangeOptions& /*options*/,
3250 ColumnFamilyHandle* /*column_family*/,
3251 const Slice* /*start*/, const Slice* /*end*/) override {
3252 return Status::NotSupported("Not supported operation.");
3253 }
3254
3255 Status SetDBOptions(
3256 const std::unordered_map<std::string, std::string>& /*new_options*/)
3257 override {
3258 return Status::NotSupported("Not supported operation.");
3259 }
3260
3261 using DB::CompactFiles;
3262 Status CompactFiles(
3263 const CompactionOptions& /*compact_options*/,
3264 ColumnFamilyHandle* /*column_family*/,
3265 const std::vector<std::string>& /*input_file_names*/,
3266 const int /*output_level*/, const int /*output_path_id*/ = -1,
3267 std::vector<std::string>* const /*output_file_names*/ = nullptr,
3268 CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
3269 return Status::NotSupported("Not supported operation.");
3270 }
3271
3272 Status PauseBackgroundWork() override {
3273 return Status::NotSupported("Not supported operation.");
3274 }
3275
3276 Status ContinueBackgroundWork() override {
3277 return Status::NotSupported("Not supported operation.");
3278 }
3279
3280 Status EnableAutoCompaction(
3281 const std::vector<ColumnFamilyHandle*>& /*column_family_handles*/)
3282 override {
3283 return Status::NotSupported("Not supported operation.");
3284 }
3285
3286 void EnableManualCompaction() override { return; }
3287
3288 void DisableManualCompaction() override { return; }
3289
3290 using DB::NumberLevels;
3291 int NumberLevels(ColumnFamilyHandle* /*column_family*/) override { return 1; }
3292
3293 using DB::MaxMemCompactionLevel;
3294 int MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) override {
3295 return 1;
3296 }
3297
3298 using DB::Level0StopWriteTrigger;
3299 int Level0StopWriteTrigger(ColumnFamilyHandle* /*column_family*/) override {
3300 return -1;
3301 }
3302
3303 const std::string& GetName() const override { return name_; }
3304
3305 Env* GetEnv() const override { return nullptr; }
3306
3307 using DB::GetOptions;
3308 Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override {
3309 return options_;
3310 }
3311
3312 using DB::GetDBOptions;
3313 DBOptions GetDBOptions() const override { return options_; }
3314
3315 using DB::Flush;
3316 Status Flush(const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
3317 ColumnFamilyHandle* /*column_family*/) override {
3318 Status ret;
3319 return ret;
3320 }
3321 Status Flush(
3322 const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
3323 const std::vector<ColumnFamilyHandle*>& /*column_families*/) override {
3324 return Status::OK();
3325 }
3326
3327 Status SyncWAL() override { return Status::OK(); }
3328
3329 Status DisableFileDeletions() override { return Status::OK(); }
3330
3331 Status EnableFileDeletions(bool /*force*/) override { return Status::OK(); }
3332 #ifndef ROCKSDB_LITE
3333
3334 Status GetLiveFiles(std::vector<std::string>&, uint64_t* /*size*/,
3335 bool /*flush_memtable*/ = true) override {
3336 return Status::OK();
3337 }
3338
3339 Status GetLiveFilesChecksumInfo(
3340 FileChecksumList* /*checksum_list*/) override {
3341 return Status::OK();
3342 }
3343
3344 Status GetLiveFilesStorageInfo(
3345 const LiveFilesStorageInfoOptions& /*opts*/,
3346 std::vector<LiveFileStorageInfo>* /*files*/) override {
3347 return Status::OK();
3348 }
3349
3350 Status GetSortedWalFiles(VectorLogPtr& /*files*/) override {
3351 return Status::OK();
3352 }
3353
3354 Status GetCurrentWalFile(
3355 std::unique_ptr<LogFile>* /*current_log_file*/) override {
3356 return Status::OK();
3357 }
3358
3359 virtual Status GetCreationTimeOfOldestFile(
3360 uint64_t* /*creation_time*/) override {
3361 return Status::NotSupported();
3362 }
3363
3364 Status DeleteFile(std::string /*name*/) override { return Status::OK(); }
3365
3366 Status GetUpdatesSince(
3367 ROCKSDB_NAMESPACE::SequenceNumber,
3368 std::unique_ptr<ROCKSDB_NAMESPACE::TransactionLogIterator>*,
3369 const TransactionLogIterator::ReadOptions& /*read_options*/ =
3370 TransactionLogIterator::ReadOptions()) override {
3371 return Status::NotSupported("Not supported in Model DB");
3372 }
3373
3374 void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
3375 ColumnFamilyMetaData* /*metadata*/) override {}
3376 #endif // ROCKSDB_LITE
3377
3378 Status GetDbIdentity(std::string& /*identity*/) const override {
3379 return Status::OK();
3380 }
3381
3382 Status GetDbSessionId(std::string& /*session_id*/) const override {
3383 return Status::OK();
3384 }
3385
3386 SequenceNumber GetLatestSequenceNumber() const override { return 0; }
3387
3388 Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* /*cf*/,
3389 std::string /*ts_low*/) override {
3390 return Status::OK();
3391 }
3392
3393 Status GetFullHistoryTsLow(ColumnFamilyHandle* /*cf*/,
3394 std::string* /*ts_low*/) override {
3395 return Status::OK();
3396 }
3397
3398 ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; }
3399
3400 private:
3401 class ModelIter : public Iterator {
3402 public:
3403 ModelIter(const KVMap* map, bool owned)
3404 : map_(map), owned_(owned), iter_(map_->end()) {}
3405 ~ModelIter() override {
3406 if (owned_) delete map_;
3407 }
3408 bool Valid() const override { return iter_ != map_->end(); }
3409 void SeekToFirst() override { iter_ = map_->begin(); }
3410 void SeekToLast() override {
3411 if (map_->empty()) {
3412 iter_ = map_->end();
3413 } else {
3414 iter_ = map_->find(map_->rbegin()->first);
3415 }
3416 }
3417 void Seek(const Slice& k) override {
3418 iter_ = map_->lower_bound(k.ToString());
3419 }
3420 void SeekForPrev(const Slice& k) override {
3421 iter_ = map_->upper_bound(k.ToString());
3422 Prev();
3423 }
3424 void Next() override { ++iter_; }
3425 void Prev() override {
3426 if (iter_ == map_->begin()) {
3427 iter_ = map_->end();
3428 return;
3429 }
3430 --iter_;
3431 }
3432
3433 Slice key() const override { return iter_->first; }
3434 Slice value() const override { return iter_->second; }
3435 Status status() const override { return Status::OK(); }
3436
3437 private:
3438 const KVMap* const map_;
3439 const bool owned_; // Do we own map_
3440 KVMap::const_iterator iter_;
3441 };
3442 const Options options_;
3443 KVMap map_;
3444 std::string name_ = "";
3445 };
3446
3447 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
3448 static std::string RandomKey(Random* rnd, int minimum = 0) {
3449 int len;
3450 do {
3451 len = (rnd->OneIn(3)
3452 ? 1 // Short sometimes to encourage collisions
3453 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
3454 } while (len < minimum);
3455 return test::RandomKey(rnd, len);
3456 }
3457
3458 static bool CompareIterators(int step, DB* model, DB* db,
3459 const Snapshot* model_snap,
3460 const Snapshot* db_snap) {
3461 ReadOptions options;
3462 options.snapshot = model_snap;
3463 Iterator* miter = model->NewIterator(options);
3464 options.snapshot = db_snap;
3465 Iterator* dbiter = db->NewIterator(options);
3466 bool ok = true;
3467 int count = 0;
3468 for (miter->SeekToFirst(), dbiter->SeekToFirst();
3469 ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
3470 count++;
3471 if (miter->key().compare(dbiter->key()) != 0) {
3472 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
3473 EscapeString(miter->key()).c_str(),
3474 EscapeString(dbiter->key()).c_str());
3475 ok = false;
3476 break;
3477 }
3478
3479 if (miter->value().compare(dbiter->value()) != 0) {
3480 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
3481 step, EscapeString(miter->key()).c_str(),
3482 EscapeString(miter->value()).c_str(),
3483 EscapeString(dbiter->value()).c_str());
3484 ok = false;
3485 }
3486 }
3487
3488 if (ok) {
3489 if (miter->Valid() != dbiter->Valid()) {
3490 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
3491 step, miter->Valid(), dbiter->Valid());
3492 ok = false;
3493 }
3494 }
3495 delete miter;
3496 delete dbiter;
3497 return ok;
3498 }
3499
3500 class DBTestRandomized : public DBTest,
3501 public ::testing::WithParamInterface<int> {
3502 public:
3503 void SetUp() override { option_config_ = GetParam(); }
3504
3505 static std::vector<int> GenerateOptionConfigs() {
3506 std::vector<int> option_configs;
3507 // skip cuckoo hash as it does not support snapshot.
3508 for (int option_config = kDefault; option_config < kEnd; ++option_config) {
3509 if (!ShouldSkipOptions(option_config,
3510 kSkipDeletesFilterFirst | kSkipNoSeekToLast)) {
3511 option_configs.push_back(option_config);
3512 }
3513 }
3514 option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
3515 return option_configs;
3516 }
3517 };
3518
3519 INSTANTIATE_TEST_CASE_P(
3520 DBTestRandomized, DBTestRandomized,
3521 ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
3522
3523 TEST_P(DBTestRandomized, Randomized) {
3524 anon::OptionsOverride options_override;
3525 options_override.skip_policy = kSkipNoSnapshot;
3526 Options options = CurrentOptions(options_override);
3527 DestroyAndReopen(options);
3528
3529 Random rnd(test::RandomSeed() + GetParam());
3530 ModelDB model(options);
3531 const int N = 10000;
3532 const Snapshot* model_snap = nullptr;
3533 const Snapshot* db_snap = nullptr;
3534 std::string k, v;
3535 for (int step = 0; step < N; step++) {
3536 // TODO(sanjay): Test Get() works
3537 int p = rnd.Uniform(100);
3538 int minimum = 0;
3539 if (option_config_ == kHashSkipList || option_config_ == kHashLinkList ||
3540 option_config_ == kPlainTableFirstBytePrefix ||
3541 option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
3542 option_config_ == kBlockBasedTableWithPrefixHashIndex) {
3543 minimum = 1;
3544 }
3545 if (p < 45) { // Put
3546 k = RandomKey(&rnd, minimum);
3547 v = rnd.RandomString(rnd.OneIn(20) ? 100 + rnd.Uniform(100)
3548 : rnd.Uniform(8));
3549 ASSERT_OK(model.Put(WriteOptions(), k, v));
3550 ASSERT_OK(db_->Put(WriteOptions(), k, v));
3551 } else if (p < 90) { // Delete
3552 k = RandomKey(&rnd, minimum);
3553 ASSERT_OK(model.Delete(WriteOptions(), k));
3554 ASSERT_OK(db_->Delete(WriteOptions(), k));
3555 } else { // Multi-element batch
3556 WriteBatch b;
3557 const int num = rnd.Uniform(8);
3558 for (int i = 0; i < num; i++) {
3559 if (i == 0 || !rnd.OneIn(10)) {
3560 k = RandomKey(&rnd, minimum);
3561 } else {
3562 // Periodically re-use the same key from the previous iter, so
3563 // we have multiple entries in the write batch for the same key
3564 }
3565 if (rnd.OneIn(2)) {
3566 v = rnd.RandomString(rnd.Uniform(10));
3567 ASSERT_OK(b.Put(k, v));
3568 } else {
3569 ASSERT_OK(b.Delete(k));
3570 }
3571 }
3572 ASSERT_OK(model.Write(WriteOptions(), &b));
3573 ASSERT_OK(db_->Write(WriteOptions(), &b));
3574 }
3575
3576 if ((step % 100) == 0) {
3577 // For DB instances that use the hash index + block-based table, the
3578 // iterator will be invalid right when seeking a non-existent key, right
3579 // than return a key that is close to it.
3580 if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
3581 option_config_ != kBlockBasedTableWithPrefixHashIndex) {
3582 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
3583 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
3584 }
3585
3586 // Save a snapshot from each DB this time that we'll use next
3587 // time we compare things, to make sure the current state is
3588 // preserved with the snapshot
3589 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
3590 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
3591
3592 Reopen(options);
3593 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
3594
3595 model_snap = model.GetSnapshot();
3596 db_snap = db_->GetSnapshot();
3597 }
3598 }
3599 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
3600 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
3601 }
3602 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
3603
3604 TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
3605 // create a DB with block prefix index
3606 BlockBasedTableOptions table_options;
3607 Options options = CurrentOptions();
3608 table_options.index_type = BlockBasedTableOptions::kHashSearch;
3609 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3610 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3611
3612 Reopen(options);
3613 ASSERT_OK(Put("k1", "v1"));
3614 ASSERT_OK(Flush());
3615 ASSERT_OK(Put("k2", "v2"));
3616
3617 // Reopen with different prefix extractor, make sure everything still works.
3618 // RocksDB should just fall back to the binary index.
3619 options.prefix_extractor.reset(NewFixedPrefixTransform(2));
3620
3621 Reopen(options);
3622 ASSERT_EQ("v1", Get("k1"));
3623 ASSERT_EQ("v2", Get("k2"));
3624
3625 #ifndef ROCKSDB_LITE
3626 // Back to original
3627 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
3628 ASSERT_EQ("v1", Get("k1"));
3629 ASSERT_EQ("v2", Get("k2"));
3630 #endif // !ROCKSDB_LITE
3631
3632 // Same if there's a problem initally loading prefix transform
3633 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3634 SyncPoint::GetInstance()->SetCallBack(
3635 "BlockBasedTable::Open::ForceNullTablePrefixExtractor",
3636 [&](void* arg) { *static_cast<bool*>(arg) = true; });
3637 SyncPoint::GetInstance()->EnableProcessing();
3638 Reopen(options);
3639 ASSERT_EQ("v1", Get("k1"));
3640 ASSERT_EQ("v2", Get("k2"));
3641
3642 #ifndef ROCKSDB_LITE
3643 // Change again
3644 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}}));
3645 ASSERT_EQ("v1", Get("k1"));
3646 ASSERT_EQ("v2", Get("k2"));
3647 #endif // !ROCKSDB_LITE
3648 SyncPoint::GetInstance()->DisableProcessing();
3649
3650 // Reopen with no prefix extractor, make sure everything still works.
3651 // RocksDB should just fall back to the binary index.
3652 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
3653 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3654 options.prefix_extractor.reset();
3655
3656 Reopen(options);
3657 ASSERT_EQ("v1", Get("k1"));
3658 ASSERT_EQ("v2", Get("k2"));
3659 }
3660
3661 TEST_F(DBTest, BlockBasedTablePrefixHashIndexTest) {
3662 // create a DB with block prefix index
3663 BlockBasedTableOptions table_options;
3664 Options options = CurrentOptions();
3665 table_options.index_type = BlockBasedTableOptions::kHashSearch;
3666 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3667 options.prefix_extractor.reset(NewCappedPrefixTransform(2));
3668
3669 Reopen(options);
3670 ASSERT_OK(Put("kk1", "v1"));
3671 ASSERT_OK(Put("kk2", "v2"));
3672 ASSERT_OK(Put("kk", "v3"));
3673 ASSERT_OK(Put("k", "v4"));
3674 Flush();
3675
3676 ASSERT_EQ("v1", Get("kk1"));
3677 ASSERT_EQ("v2", Get("kk2"));
3678
3679 ASSERT_EQ("v3", Get("kk"));
3680 ASSERT_EQ("v4", Get("k"));
3681 }
3682
3683 TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) {
3684 // create a DB with block prefix index
3685 BlockBasedTableOptions table_options;
3686 Options options = CurrentOptions();
3687 options.max_open_files = 10;
3688 table_options.index_type = BlockBasedTableOptions::kHashSearch;
3689 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3690 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3691
3692 // RocksDB sanitize max open files to at least 20. Modify it back.
3693 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
3694 "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
3695 int* max_open_files = static_cast<int*>(arg);
3696 *max_open_files = 11;
3697 });
3698 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3699
3700 Reopen(options);
3701 ASSERT_OK(Put("k1", "v1"));
3702 ASSERT_OK(Flush());
3703
3704 CompactRangeOptions cro;
3705 cro.change_level = true;
3706 cro.target_level = 1;
3707 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
3708
3709 // Force evict tables
3710 dbfull()->TEST_table_cache()->SetCapacity(0);
3711 // Make table cache to keep one entry.
3712 dbfull()->TEST_table_cache()->SetCapacity(1);
3713
3714 ReadOptions read_options;
3715 read_options.total_order_seek = true;
3716 {
3717 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
3718 iter->Seek("k1");
3719 ASSERT_TRUE(iter->Valid());
3720 ASSERT_EQ("k1", iter->key().ToString());
3721 }
3722
3723 // After total order seek, prefix index should still be used.
3724 read_options.total_order_seek = false;
3725 {
3726 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
3727 iter->Seek("k1");
3728 ASSERT_TRUE(iter->Valid());
3729 ASSERT_EQ("k1", iter->key().ToString());
3730 }
3731 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3732 }
3733
3734 TEST_F(DBTest, ChecksumTest) {
3735 BlockBasedTableOptions table_options;
3736 Options options = CurrentOptions();
3737
3738 table_options.checksum = kCRC32c;
3739 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3740 Reopen(options);
3741 ASSERT_OK(Put("a", "b"));
3742 ASSERT_OK(Put("c", "d"));
3743 ASSERT_OK(Flush()); // table with crc checksum
3744
3745 table_options.checksum = kxxHash;
3746 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3747 Reopen(options);
3748 ASSERT_OK(Put("e", "f"));
3749 ASSERT_OK(Put("g", "h"));
3750 ASSERT_OK(Flush()); // table with xxhash checksum
3751
3752 table_options.checksum = kCRC32c;
3753 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3754 Reopen(options);
3755 ASSERT_EQ("b", Get("a"));
3756 ASSERT_EQ("d", Get("c"));
3757 ASSERT_EQ("f", Get("e"));
3758 ASSERT_EQ("h", Get("g"));
3759
3760 table_options.checksum = kCRC32c;
3761 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3762 Reopen(options);
3763 ASSERT_EQ("b", Get("a"));
3764 ASSERT_EQ("d", Get("c"));
3765 ASSERT_EQ("f", Get("e"));
3766 ASSERT_EQ("h", Get("g"));
3767 }
3768
3769 #ifndef ROCKSDB_LITE
3770 TEST_P(DBTestWithParam, FIFOCompactionTest) {
3771 for (int iter = 0; iter < 2; ++iter) {
3772 // first iteration -- auto compaction
3773 // second iteration -- manual compaction
3774 Options options;
3775 options.compaction_style = kCompactionStyleFIFO;
3776 options.write_buffer_size = 100 << 10; // 100KB
3777 options.arena_block_size = 4096;
3778 options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB
3779 options.compression = kNoCompression;
3780 options.create_if_missing = true;
3781 options.max_subcompactions = max_subcompactions_;
3782 if (iter == 1) {
3783 options.disable_auto_compactions = true;
3784 }
3785 options = CurrentOptions(options);
3786 DestroyAndReopen(options);
3787
3788 Random rnd(301);
3789 for (int i = 0; i < 6; ++i) {
3790 for (int j = 0; j < 110; ++j) {
3791 ASSERT_OK(Put(std::to_string(i * 100 + j), rnd.RandomString(980)));
3792 }
3793 // flush should happen here
3794 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
3795 }
3796 if (iter == 0) {
3797 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3798 } else {
3799 CompactRangeOptions cro;
3800 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
3801 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
3802 }
3803 // only 5 files should survive
3804 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3805 for (int i = 0; i < 50; ++i) {
3806 // these keys should be deleted in previous compaction
3807 ASSERT_EQ("NOT_FOUND", Get(std::to_string(i)));
3808 }
3809 }
3810 }
3811
3812 TEST_F(DBTest, FIFOCompactionTestWithCompaction) {
3813 Options options;
3814 options.compaction_style = kCompactionStyleFIFO;
3815 options.write_buffer_size = 20 << 10; // 20K
3816 options.arena_block_size = 4096;
3817 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
3818 options.compaction_options_fifo.allow_compaction = true;
3819 options.level0_file_num_compaction_trigger = 6;
3820 options.compression = kNoCompression;
3821 options.create_if_missing = true;
3822 options = CurrentOptions(options);
3823 DestroyAndReopen(options);
3824
3825 Random rnd(301);
3826 for (int i = 0; i < 60; i++) {
3827 // Generate and flush a file about 20KB.
3828 for (int j = 0; j < 20; j++) {
3829 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
3830 }
3831 ASSERT_OK(Flush());
3832 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3833 }
3834 // It should be compacted to 10 files.
3835 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3836
3837 for (int i = 0; i < 60; i++) {
3838 // Generate and flush a file about 20KB.
3839 for (int j = 0; j < 20; j++) {
3840 ASSERT_OK(Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980)));
3841 }
3842 ASSERT_OK(Flush());
3843 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3844 }
3845
3846 // It should be compacted to no more than 20 files.
3847 ASSERT_GT(NumTableFilesAtLevel(0), 10);
3848 ASSERT_LT(NumTableFilesAtLevel(0), 18);
3849 // Size limit is still guaranteed.
3850 ASSERT_LE(SizeAtLevel(0),
3851 options.compaction_options_fifo.max_table_files_size);
3852 }
3853
3854 TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) {
3855 Options options;
3856 options.compaction_style = kCompactionStyleFIFO;
3857 options.write_buffer_size = 20 << 10; // 20K
3858 options.arena_block_size = 4096;
3859 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
3860 options.compaction_options_fifo.allow_compaction = true;
3861 options.level0_file_num_compaction_trigger = 3;
3862 options.compression = kNoCompression;
3863 options.create_if_missing = true;
3864 options = CurrentOptions(options);
3865 DestroyAndReopen(options);
3866
3867 Random rnd(301);
3868 for (int i = 0; i < 3; i++) {
3869 // Each file contains a different key which will be dropped later.
3870 ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500)));
3871 ASSERT_OK(Put("key" + std::to_string(i), ""));
3872 ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500)));
3873 ASSERT_OK(Flush());
3874 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3875 }
3876 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
3877 for (int i = 0; i < 3; i++) {
3878 ASSERT_EQ("", Get("key" + std::to_string(i)));
3879 }
3880 for (int i = 0; i < 3; i++) {
3881 // Each file contains a different key which will be dropped later.
3882 ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500)));
3883 ASSERT_OK(Delete("key" + std::to_string(i)));
3884 ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500)));
3885 ASSERT_OK(Flush());
3886 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3887 }
3888 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
3889 for (int i = 0; i < 3; i++) {
3890 ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i)));
3891 }
3892 }
3893
3894 // Check that FIFO-with-TTL is not supported with max_open_files != -1.
3895 // Github issue #8014
3896 TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) {
3897 Options options = CurrentOptions();
3898 options.compaction_style = kCompactionStyleFIFO;
3899 options.create_if_missing = true;
3900 options.ttl = 600; // seconds
3901
3902 // TTL is not supported with max_open_files != -1.
3903 options.max_open_files = 0;
3904 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3905
3906 options.max_open_files = 100;
3907 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3908
3909 // TTL is supported with unlimited max_open_files
3910 options.max_open_files = -1;
3911 ASSERT_OK(TryReopen(options));
3912 }
3913
3914 // Check that FIFO-with-TTL is supported only with BlockBasedTableFactory.
3915 TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) {
3916 Options options;
3917 options.compaction_style = kCompactionStyleFIFO;
3918 options.create_if_missing = true;
3919 options.ttl = 600; // seconds
3920
3921 options = CurrentOptions(options);
3922 options.table_factory.reset(NewBlockBasedTableFactory());
3923 ASSERT_OK(TryReopen(options));
3924
3925 Destroy(options);
3926 options.table_factory.reset(NewPlainTableFactory());
3927 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3928
3929 Destroy(options);
3930 options.table_factory.reset(NewAdaptiveTableFactory());
3931 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3932 }
3933
3934 TEST_F(DBTest, FIFOCompactionWithTTLTest) {
3935 Options options;
3936 options.compaction_style = kCompactionStyleFIFO;
3937 options.write_buffer_size = 10 << 10; // 10KB
3938 options.arena_block_size = 4096;
3939 options.compression = kNoCompression;
3940 options.create_if_missing = true;
3941 env_->SetMockSleep();
3942 options.env = env_;
3943
3944 // Test to make sure that all files with expired ttl are deleted on next
3945 // manual compaction.
3946 {
3947 // NOTE: Presumed unnecessary and removed: resetting mock time in env
3948
3949 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3950 options.compaction_options_fifo.allow_compaction = false;
3951 options.ttl = 1 * 60 * 60; // 1 hour
3952 options = CurrentOptions(options);
3953 DestroyAndReopen(options);
3954
3955 Random rnd(301);
3956 for (int i = 0; i < 10; i++) {
3957 // Generate and flush a file about 10KB.
3958 for (int j = 0; j < 10; j++) {
3959 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
3960 }
3961 ASSERT_OK(Flush());
3962 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3963 }
3964 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3965
3966 // Sleep for 2 hours -- which is much greater than TTL.
3967 env_->MockSleepForSeconds(2 * 60 * 60);
3968
3969 // Since no flushes and compactions have run, the db should still be in
3970 // the same state even after considerable time has passed.
3971 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3972 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3973
3974 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
3975 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
3976 }
3977
3978 // Test to make sure that all files with expired ttl are deleted on next
3979 // automatic compaction.
3980 {
3981 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3982 options.compaction_options_fifo.allow_compaction = false;
3983 options.ttl = 1 * 60 * 60; // 1 hour
3984 options = CurrentOptions(options);
3985 DestroyAndReopen(options);
3986
3987 Random rnd(301);
3988 for (int i = 0; i < 10; i++) {
3989 // Generate and flush a file about 10KB.
3990 for (int j = 0; j < 10; j++) {
3991 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
3992 }
3993 ASSERT_OK(Flush());
3994 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3995 }
3996 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3997
3998 // Sleep for 2 hours -- which is much greater than TTL.
3999 env_->MockSleepForSeconds(2 * 60 * 60);
4000 // Just to make sure that we are in the same state even after sleeping.
4001 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4002 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
4003
4004 // Create 1 more file to trigger TTL compaction. The old files are dropped.
4005 for (int i = 0; i < 1; i++) {
4006 for (int j = 0; j < 10; j++) {
4007 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4008 }
4009 ASSERT_OK(Flush());
4010 }
4011
4012 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4013 // Only the new 10 files remain.
4014 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4015 ASSERT_LE(SizeAtLevel(0),
4016 options.compaction_options_fifo.max_table_files_size);
4017 }
4018
4019 // Test that shows the fall back to size-based FIFO compaction if TTL-based
4020 // deletion doesn't move the total size to be less than max_table_files_size.
4021 {
4022 options.write_buffer_size = 10 << 10; // 10KB
4023 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
4024 options.compaction_options_fifo.allow_compaction = false;
4025 options.ttl = 1 * 60 * 60; // 1 hour
4026 options = CurrentOptions(options);
4027 DestroyAndReopen(options);
4028
4029 Random rnd(301);
4030 for (int i = 0; i < 3; i++) {
4031 // Generate and flush a file about 10KB.
4032 for (int j = 0; j < 10; j++) {
4033 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4034 }
4035 ASSERT_OK(Flush());
4036 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4037 }
4038 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
4039
4040 // Sleep for 2 hours -- which is much greater than TTL.
4041 env_->MockSleepForSeconds(2 * 60 * 60);
4042 // Just to make sure that we are in the same state even after sleeping.
4043 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4044 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
4045
4046 for (int i = 0; i < 5; i++) {
4047 for (int j = 0; j < 140; j++) {
4048 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4049 }
4050 ASSERT_OK(Flush());
4051 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4052 }
4053 // Size limit is still guaranteed.
4054 ASSERT_LE(SizeAtLevel(0),
4055 options.compaction_options_fifo.max_table_files_size);
4056 }
4057
4058 // Test with TTL + Intra-L0 compactions.
4059 {
4060 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
4061 options.compaction_options_fifo.allow_compaction = true;
4062 options.ttl = 1 * 60 * 60; // 1 hour
4063 options.level0_file_num_compaction_trigger = 6;
4064 options = CurrentOptions(options);
4065 DestroyAndReopen(options);
4066
4067 Random rnd(301);
4068 for (int i = 0; i < 10; i++) {
4069 // Generate and flush a file about 10KB.
4070 for (int j = 0; j < 10; j++) {
4071 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4072 }
4073 ASSERT_OK(Flush());
4074 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4075 }
4076 // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1
4077 // (due to level0_file_num_compaction_trigger = 6).
4078 // So total files = 1 + remaining 4 = 5.
4079 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4080
4081 // Sleep for 2 hours -- which is much greater than TTL.
4082 env_->MockSleepForSeconds(2 * 60 * 60);
4083 // Just to make sure that we are in the same state even after sleeping.
4084 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4085 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4086
4087 // Create 10 more files. The old 5 files are dropped as their ttl expired.
4088 for (int i = 0; i < 10; i++) {
4089 for (int j = 0; j < 10; j++) {
4090 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4091 }
4092 ASSERT_OK(Flush());
4093 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4094 }
4095 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
4096 ASSERT_LE(SizeAtLevel(0),
4097 options.compaction_options_fifo.max_table_files_size);
4098 }
4099
4100 // Test with large TTL + Intra-L0 compactions.
4101 // Files dropped based on size, as ttl doesn't kick in.
4102 {
4103 options.write_buffer_size = 20 << 10; // 20K
4104 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1.5MB
4105 options.compaction_options_fifo.allow_compaction = true;
4106 options.ttl = 1 * 60 * 60; // 1 hour
4107 options.level0_file_num_compaction_trigger = 6;
4108 options = CurrentOptions(options);
4109 DestroyAndReopen(options);
4110
4111 Random rnd(301);
4112 for (int i = 0; i < 60; i++) {
4113 // Generate and flush a file about 20KB.
4114 for (int j = 0; j < 20; j++) {
4115 ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
4116 }
4117 ASSERT_OK(Flush());
4118 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4119 }
4120 // It should be compacted to 10 files.
4121 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
4122
4123 for (int i = 0; i < 60; i++) {
4124 // Generate and flush a file about 20KB.
4125 for (int j = 0; j < 20; j++) {
4126 ASSERT_OK(
4127 Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980)));
4128 }
4129 ASSERT_OK(Flush());
4130 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4131 }
4132
4133 // It should be compacted to no more than 20 files.
4134 ASSERT_GT(NumTableFilesAtLevel(0), 10);
4135 ASSERT_LT(NumTableFilesAtLevel(0), 18);
4136 // Size limit is still guaranteed.
4137 ASSERT_LE(SizeAtLevel(0),
4138 options.compaction_options_fifo.max_table_files_size);
4139 }
4140 }
4141 #endif // ROCKSDB_LITE
4142
4143 #ifndef ROCKSDB_LITE
4144 /*
4145 * This test is not reliable enough as it heavily depends on disk behavior.
4146 * Disable as it is flaky.
4147 */
4148 TEST_F(DBTest, DISABLED_RateLimitingTest) {
4149 Options options = CurrentOptions();
4150 options.write_buffer_size = 1 << 20; // 1MB
4151 options.level0_file_num_compaction_trigger = 2;
4152 options.target_file_size_base = 1 << 20; // 1MB
4153 options.max_bytes_for_level_base = 4 << 20; // 4MB
4154 options.max_bytes_for_level_multiplier = 4;
4155 options.compression = kNoCompression;
4156 options.create_if_missing = true;
4157 options.env = env_;
4158 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
4159 options.IncreaseParallelism(4);
4160 DestroyAndReopen(options);
4161
4162 WriteOptions wo;
4163 wo.disableWAL = true;
4164
4165 // # no rate limiting
4166 Random rnd(301);
4167 uint64_t start = env_->NowMicros();
4168 // Write ~96M data
4169 for (int64_t i = 0; i < (96 << 10); ++i) {
4170 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
4171 }
4172 uint64_t elapsed = env_->NowMicros() - start;
4173 double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed;
4174 uint64_t rate_limiter_drains =
4175 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS);
4176 ASSERT_EQ(0, rate_limiter_drains);
4177 Close();
4178
4179 // # rate limiting with 0.7 x threshold
4180 options.rate_limiter.reset(
4181 NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
4182 env_->bytes_written_ = 0;
4183 DestroyAndReopen(options);
4184
4185 start = env_->NowMicros();
4186 // Write ~96M data
4187 for (int64_t i = 0; i < (96 << 10); ++i) {
4188 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
4189 }
4190 rate_limiter_drains =
4191 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
4192 rate_limiter_drains;
4193 elapsed = env_->NowMicros() - start;
4194 Close();
4195 ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
4196 // Most intervals should've been drained (interval time is 100ms, elapsed is
4197 // micros)
4198 ASSERT_GT(rate_limiter_drains, 0);
4199 ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
4200 double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
4201 fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
4202 ASSERT_TRUE(ratio < 0.8);
4203
4204 // # rate limiting with half of the raw_rate
4205 options.rate_limiter.reset(
4206 NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
4207 env_->bytes_written_ = 0;
4208 DestroyAndReopen(options);
4209
4210 start = env_->NowMicros();
4211 // Write ~96M data
4212 for (int64_t i = 0; i < (96 << 10); ++i) {
4213 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
4214 }
4215 elapsed = env_->NowMicros() - start;
4216 rate_limiter_drains =
4217 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
4218 rate_limiter_drains;
4219 Close();
4220 ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
4221 // Most intervals should've been drained (interval time is 100ms, elapsed is
4222 // micros)
4223 ASSERT_GT(rate_limiter_drains, elapsed / 100000 / 2);
4224 ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
4225 ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
4226 fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
4227 ASSERT_LT(ratio, 0.6);
4228 }
4229
4230 // This is a mocked customed rate limiter without implementing optional APIs
4231 // (e.g, RateLimiter::GetTotalPendingRequests())
4232 class MockedRateLimiterWithNoOptionalAPIImpl : public RateLimiter {
4233 public:
4234 MockedRateLimiterWithNoOptionalAPIImpl() {}
4235
4236 ~MockedRateLimiterWithNoOptionalAPIImpl() override {}
4237
4238 void SetBytesPerSecond(int64_t bytes_per_second) override {
4239 (void)bytes_per_second;
4240 }
4241
4242 using RateLimiter::Request;
4243 void Request(const int64_t bytes, const Env::IOPriority pri,
4244 Statistics* stats) override {
4245 (void)bytes;
4246 (void)pri;
4247 (void)stats;
4248 }
4249
4250 int64_t GetSingleBurstBytes() const override { return 200; }
4251
4252 int64_t GetTotalBytesThrough(
4253 const Env::IOPriority pri = Env::IO_TOTAL) const override {
4254 (void)pri;
4255 return 0;
4256 }
4257
4258 int64_t GetTotalRequests(
4259 const Env::IOPriority pri = Env::IO_TOTAL) const override {
4260 (void)pri;
4261 return 0;
4262 }
4263
4264 int64_t GetBytesPerSecond() const override { return 0; }
4265 };
4266
4267 // To test that customed rate limiter not implementing optional APIs (e.g,
4268 // RateLimiter::GetTotalPendingRequests()) works fine with RocksDB basic
4269 // operations (e.g, Put, Get, Flush)
4270 TEST_F(DBTest, CustomedRateLimiterWithNoOptionalAPIImplTest) {
4271 Options options = CurrentOptions();
4272 options.rate_limiter.reset(new MockedRateLimiterWithNoOptionalAPIImpl());
4273 DestroyAndReopen(options);
4274 ASSERT_OK(Put("abc", "def"));
4275 ASSERT_EQ(Get("abc"), "def");
4276 ASSERT_OK(Flush());
4277 ASSERT_EQ(Get("abc"), "def");
4278 }
4279
4280 TEST_F(DBTest, TableOptionsSanitizeTest) {
4281 Options options = CurrentOptions();
4282 options.create_if_missing = true;
4283 DestroyAndReopen(options);
4284 ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
4285
4286 options.table_factory.reset(NewPlainTableFactory());
4287 options.prefix_extractor.reset(NewNoopTransform());
4288 Destroy(options);
4289 ASSERT_TRUE(!TryReopen(options).IsNotSupported());
4290
4291 // Test for check of prefix_extractor when hash index is used for
4292 // block-based table
4293 BlockBasedTableOptions to;
4294 to.index_type = BlockBasedTableOptions::kHashSearch;
4295 options = CurrentOptions();
4296 options.create_if_missing = true;
4297 options.table_factory.reset(NewBlockBasedTableFactory(to));
4298 ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
4299 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
4300 ASSERT_OK(TryReopen(options));
4301 }
4302
4303 TEST_F(DBTest, ConcurrentMemtableNotSupported) {
4304 Options options = CurrentOptions();
4305 options.allow_concurrent_memtable_write = true;
4306 options.soft_pending_compaction_bytes_limit = 0;
4307 options.hard_pending_compaction_bytes_limit = 100;
4308 options.create_if_missing = true;
4309
4310 DestroyDB(dbname_, options);
4311 options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
4312 ASSERT_NOK(TryReopen(options));
4313
4314 options.memtable_factory.reset(new SkipListFactory);
4315 ASSERT_OK(TryReopen(options));
4316
4317 ColumnFamilyOptions cf_options(options);
4318 cf_options.memtable_factory.reset(
4319 NewHashLinkListRepFactory(4, 0, 3, true, 4));
4320 ColumnFamilyHandle* handle;
4321 ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
4322 }
4323
4324 #endif // ROCKSDB_LITE
4325
4326 TEST_F(DBTest, SanitizeNumThreads) {
4327 for (int attempt = 0; attempt < 2; attempt++) {
4328 const size_t kTotalTasks = 8;
4329 test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
4330
4331 Options options = CurrentOptions();
4332 if (attempt == 0) {
4333 options.max_background_compactions = 3;
4334 options.max_background_flushes = 2;
4335 }
4336 options.create_if_missing = true;
4337 DestroyAndReopen(options);
4338
4339 for (size_t i = 0; i < kTotalTasks; i++) {
4340 // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
4341 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
4342 &sleeping_tasks[i],
4343 (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
4344 }
4345
4346 // Wait until 10s for they are scheduled.
4347 for (int i = 0; i < 10000; i++) {
4348 if (options.env->GetThreadPoolQueueLen(Env::Priority::LOW) <= 1 &&
4349 options.env->GetThreadPoolQueueLen(Env::Priority::HIGH) <= 2) {
4350 break;
4351 }
4352 env_->SleepForMicroseconds(1000);
4353 }
4354
4355 // pool size 3, total task 4. Queue size should be 1.
4356 ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
4357 // pool size 2, total task 4. Queue size should be 2.
4358 ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
4359
4360 for (size_t i = 0; i < kTotalTasks; i++) {
4361 sleeping_tasks[i].WakeUp();
4362 sleeping_tasks[i].WaitUntilDone();
4363 }
4364
4365 ASSERT_OK(Put("abc", "def"));
4366 ASSERT_EQ("def", Get("abc"));
4367 ASSERT_OK(Flush());
4368 ASSERT_EQ("def", Get("abc"));
4369 }
4370 }
4371
4372 TEST_F(DBTest, WriteSingleThreadEntry) {
4373 std::vector<port::Thread> threads;
4374 dbfull()->TEST_LockMutex();
4375 auto w = dbfull()->TEST_BeginWrite();
4376 threads.emplace_back([&] { ASSERT_OK(Put("a", "b")); });
4377 env_->SleepForMicroseconds(10000);
4378 threads.emplace_back([&] { ASSERT_OK(Flush()); });
4379 env_->SleepForMicroseconds(10000);
4380 dbfull()->TEST_UnlockMutex();
4381 dbfull()->TEST_LockMutex();
4382 dbfull()->TEST_EndWrite(w);
4383 dbfull()->TEST_UnlockMutex();
4384
4385 for (auto& t : threads) {
4386 t.join();
4387 }
4388 }
4389
4390 TEST_F(DBTest, ConcurrentFlushWAL) {
4391 const size_t cnt = 100;
4392 Options options;
4393 options.env = env_;
4394 WriteOptions wopt;
4395 ReadOptions ropt;
4396 for (bool two_write_queues : {false, true}) {
4397 for (bool manual_wal_flush : {false, true}) {
4398 options.two_write_queues = two_write_queues;
4399 options.manual_wal_flush = manual_wal_flush;
4400 options.create_if_missing = true;
4401 DestroyAndReopen(options);
4402 std::vector<port::Thread> threads;
4403 threads.emplace_back([&] {
4404 for (size_t i = 0; i < cnt; i++) {
4405 auto istr = std::to_string(i);
4406 ASSERT_OK(db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr,
4407 "b" + istr));
4408 }
4409 });
4410 if (two_write_queues) {
4411 threads.emplace_back([&] {
4412 for (size_t i = cnt; i < 2 * cnt; i++) {
4413 auto istr = std::to_string(i);
4414 WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
4415 wopt.protection_bytes_per_key,
4416 0 /* default_cf_ts_sz */);
4417 ASSERT_OK(batch.Put("a" + istr, "b" + istr));
4418 ASSERT_OK(
4419 dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true));
4420 }
4421 });
4422 }
4423 threads.emplace_back([&] {
4424 for (size_t i = 0; i < cnt * 100; i++) { // FlushWAL is faster than Put
4425 ASSERT_OK(db_->FlushWAL(false));
4426 }
4427 });
4428 for (auto& t : threads) {
4429 t.join();
4430 }
4431 options.create_if_missing = false;
4432 // Recover from the wal and make sure that it is not corrupted
4433 Reopen(options);
4434 for (size_t i = 0; i < cnt; i++) {
4435 PinnableSlice pval;
4436 auto istr = std::to_string(i);
4437 ASSERT_OK(
4438 db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval));
4439 ASSERT_TRUE(pval == ("b" + istr));
4440 }
4441 }
4442 }
4443 }
4444
4445 // This test failure will be caught with a probability
4446 TEST_F(DBTest, ManualFlushWalAndWriteRace) {
4447 Options options;
4448 options.env = env_;
4449 options.manual_wal_flush = true;
4450 options.create_if_missing = true;
4451
4452 DestroyAndReopen(options);
4453
4454 WriteOptions wopts;
4455 wopts.sync = true;
4456
4457 port::Thread writeThread([&]() {
4458 for (int i = 0; i < 100; i++) {
4459 auto istr = std::to_string(i);
4460 ASSERT_OK(dbfull()->Put(wopts, "key_" + istr, "value_" + istr));
4461 }
4462 });
4463 port::Thread flushThread([&]() {
4464 for (int i = 0; i < 100; i++) {
4465 ASSERT_OK(dbfull()->FlushWAL(false));
4466 }
4467 });
4468
4469 writeThread.join();
4470 flushThread.join();
4471 ASSERT_OK(dbfull()->Put(wopts, "foo1", "value1"));
4472 ASSERT_OK(dbfull()->Put(wopts, "foo2", "value2"));
4473 Reopen(options);
4474 ASSERT_EQ("value1", Get("foo1"));
4475 ASSERT_EQ("value2", Get("foo2"));
4476 }
4477
4478 #ifndef ROCKSDB_LITE
4479 TEST_F(DBTest, DynamicMemtableOptions) {
4480 const uint64_t k64KB = 1 << 16;
4481 const uint64_t k128KB = 1 << 17;
4482 const uint64_t k5KB = 5 * 1024;
4483 Options options;
4484 options.env = env_;
4485 options.create_if_missing = true;
4486 options.compression = kNoCompression;
4487 options.max_background_compactions = 1;
4488 options.write_buffer_size = k64KB;
4489 options.arena_block_size = 16 * 1024;
4490 options.max_write_buffer_number = 2;
4491 // Don't trigger compact/slowdown/stop
4492 options.level0_file_num_compaction_trigger = 1024;
4493 options.level0_slowdown_writes_trigger = 1024;
4494 options.level0_stop_writes_trigger = 1024;
4495 DestroyAndReopen(options);
4496
4497 auto gen_l0_kb = [this](int size) {
4498 const int kNumPutsBeforeWaitForFlush = 64;
4499 Random rnd(301);
4500 for (int i = 0; i < size; i++) {
4501 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
4502
4503 // The following condition prevents a race condition between flush jobs
4504 // acquiring work and this thread filling up multiple memtables. Without
4505 // this, the flush might produce less files than expected because
4506 // multiple memtables are flushed into a single L0 file. This race
4507 // condition affects assertion (A).
4508 if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
4509 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
4510 }
4511 }
4512 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
4513 };
4514
4515 // Test write_buffer_size
4516 gen_l0_kb(64);
4517 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4518 ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
4519 ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
4520
4521 // Clean up L0
4522 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4523 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4524
4525 // Increase buffer size
4526 ASSERT_OK(dbfull()->SetOptions({
4527 {"write_buffer_size", "131072"},
4528 }));
4529
4530 // The existing memtable inflated 64KB->128KB when we invoked SetOptions().
4531 // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data.
4532 gen_l0_kb(192);
4533 ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A)
4534 ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB);
4535 ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB);
4536
4537 // Decrease buffer size below current usage
4538 ASSERT_OK(dbfull()->SetOptions({
4539 {"write_buffer_size", "65536"},
4540 }));
4541 // The existing memtable became eligible for flush when we reduced its
4542 // capacity to 64KB. Two keys need to be added to trigger flush: first causes
4543 // memtable to be marked full, second schedules the flush. Then we should have
4544 // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key.
4545 gen_l0_kb(2);
4546 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
4547 ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
4548 ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
4549
4550 // Test max_write_buffer_number
4551 // Block compaction thread, which will also block the flushes because
4552 // max_background_flushes == 0, so flushes are getting executed by the
4553 // compaction thread
4554 env_->SetBackgroundThreads(1, Env::LOW);
4555 test::SleepingBackgroundTask sleeping_task_low;
4556 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4557 Env::Priority::LOW);
4558 // Start from scratch and disable compaction/flush. Flush can only happen
4559 // during compaction but trigger is pretty high
4560 options.disable_auto_compactions = true;
4561 DestroyAndReopen(options);
4562 env_->SetBackgroundThreads(0, Env::HIGH);
4563
4564 // Put until writes are stopped, bounded by 256 puts. We should see stop at
4565 // ~128KB
4566 int count = 0;
4567 Random rnd(301);
4568
4569 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
4570 "DBImpl::DelayWrite:Wait",
4571 [&](void* /*arg*/) { sleeping_task_low.WakeUp(); });
4572 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4573
4574 while (!sleeping_task_low.WokenUp() && count < 256) {
4575 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
4576 count++;
4577 }
4578 ASSERT_GT(static_cast<double>(count), 128 * 0.8);
4579 ASSERT_LT(static_cast<double>(count), 128 * 1.2);
4580
4581 sleeping_task_low.WaitUntilDone();
4582
4583 // Increase
4584 ASSERT_OK(dbfull()->SetOptions({
4585 {"max_write_buffer_number", "8"},
4586 }));
4587 // Clean up memtable and L0
4588 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4589
4590 sleeping_task_low.Reset();
4591 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4592 Env::Priority::LOW);
4593 count = 0;
4594 while (!sleeping_task_low.WokenUp() && count < 1024) {
4595 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
4596 count++;
4597 }
4598 // Windows fails this test. Will tune in the future and figure out
4599 // approp number
4600 #ifndef OS_WIN
4601 ASSERT_GT(static_cast<double>(count), 512 * 0.8);
4602 ASSERT_LT(static_cast<double>(count), 512 * 1.2);
4603 #endif
4604 sleeping_task_low.WaitUntilDone();
4605
4606 // Decrease
4607 ASSERT_OK(dbfull()->SetOptions({
4608 {"max_write_buffer_number", "4"},
4609 }));
4610 // Clean up memtable and L0
4611 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
4612
4613 sleeping_task_low.Reset();
4614 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4615 Env::Priority::LOW);
4616
4617 count = 0;
4618 while (!sleeping_task_low.WokenUp() && count < 1024) {
4619 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
4620 count++;
4621 }
4622 // Windows fails this test. Will tune in the future and figure out
4623 // approp number
4624 #ifndef OS_WIN
4625 ASSERT_GT(static_cast<double>(count), 256 * 0.8);
4626 ASSERT_LT(static_cast<double>(count), 266 * 1.2);
4627 #endif
4628 sleeping_task_low.WaitUntilDone();
4629
4630 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4631 }
4632 #endif // ROCKSDB_LITE
4633
4634 #ifdef ROCKSDB_USING_THREAD_STATUS
4635 namespace {
4636 void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
4637 int expected_count) {
4638 int op_count = 0;
4639 std::vector<ThreadStatus> thread_list;
4640 ASSERT_OK(env->GetThreadList(&thread_list));
4641 for (auto thread : thread_list) {
4642 if (thread.operation_type == op_type) {
4643 op_count++;
4644 }
4645 }
4646 ASSERT_EQ(op_count, expected_count);
4647 }
4648 } // anonymous namespace
4649
4650 TEST_F(DBTest, GetThreadStatus) {
4651 Options options;
4652 options.env = env_;
4653 options.enable_thread_tracking = true;
4654 TryReopen(options);
4655
4656 std::vector<ThreadStatus> thread_list;
4657 Status s = env_->GetThreadList(&thread_list);
4658
4659 for (int i = 0; i < 2; ++i) {
4660 // repeat the test with differet number of high / low priority threads
4661 const int kTestCount = 3;
4662 const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
4663 const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
4664 const unsigned int kBottomPriCounts[kTestCount] = {2, 1, 4};
4665 for (int test = 0; test < kTestCount; ++test) {
4666 // Change the number of threads in high / low priority pool.
4667 env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
4668 env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
4669 env_->SetBackgroundThreads(kBottomPriCounts[test], Env::BOTTOM);
4670 // Wait to ensure the all threads has been registered
4671 unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
4672 // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after
4673 // all threads have been registered.
4674 // Try up to 60 seconds.
4675 for (int num_try = 0; num_try < 60000; num_try++) {
4676 env_->SleepForMicroseconds(1000);
4677 thread_list.clear();
4678 s = env_->GetThreadList(&thread_list);
4679 ASSERT_OK(s);
4680 memset(thread_type_counts, 0, sizeof(thread_type_counts));
4681 for (auto thread : thread_list) {
4682 ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
4683 thread_type_counts[thread.thread_type]++;
4684 }
4685 if (thread_type_counts[ThreadStatus::HIGH_PRIORITY] ==
4686 kHighPriCounts[test] &&
4687 thread_type_counts[ThreadStatus::LOW_PRIORITY] ==
4688 kLowPriCounts[test] &&
4689 thread_type_counts[ThreadStatus::BOTTOM_PRIORITY] ==
4690 kBottomPriCounts[test]) {
4691 break;
4692 }
4693 }
4694 // Verify the number of high-priority threads
4695 ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY],
4696 kHighPriCounts[test]);
4697 // Verify the number of low-priority threads
4698 ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY],
4699 kLowPriCounts[test]);
4700 // Verify the number of bottom-priority threads
4701 ASSERT_EQ(thread_type_counts[ThreadStatus::BOTTOM_PRIORITY],
4702 kBottomPriCounts[test]);
4703 }
4704 if (i == 0) {
4705 // repeat the test with multiple column families
4706 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
4707 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4708 true);
4709 }
4710 }
4711 ASSERT_OK(db_->DropColumnFamily(handles_[2]));
4712 delete handles_[2];
4713 handles_.erase(handles_.begin() + 2);
4714 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4715 true);
4716 Close();
4717 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4718 true);
4719 }
4720
4721 TEST_F(DBTest, DisableThreadStatus) {
4722 Options options;
4723 options.env = env_;
4724 options.enable_thread_tracking = false;
4725 TryReopen(options);
4726 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
4727 // Verify non of the column family info exists
4728 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4729 false);
4730 }
4731
4732 TEST_F(DBTest, ThreadStatusFlush) {
4733 Options options;
4734 options.env = env_;
4735 options.write_buffer_size = 100000; // Small write buffer
4736 options.enable_thread_tracking = true;
4737 options = CurrentOptions(options);
4738
4739 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
4740 {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
4741 {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"},
4742 });
4743 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4744
4745 CreateAndReopenWithCF({"pikachu"}, options);
4746 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
4747
4748 ASSERT_OK(Put(1, "foo", "v1"));
4749 ASSERT_EQ("v1", Get(1, "foo"));
4750 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
4751
4752 uint64_t num_running_flushes = 0;
4753 ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes,
4754 &num_running_flushes));
4755 ASSERT_EQ(num_running_flushes, 0);
4756
4757 ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
4758 ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
4759
4760 // The first sync point is to make sure there's one flush job
4761 // running when we perform VerifyOperationCount().
4762 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
4763 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
4764 ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes,
4765 &num_running_flushes));
4766 ASSERT_EQ(num_running_flushes, 1);
4767 // This second sync point is to ensure the flush job will not
4768 // be completed until we already perform VerifyOperationCount().
4769 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
4770 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4771 }
4772
4773 TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
4774 const int kTestKeySize = 16;
4775 const int kTestValueSize = 984;
4776 const int kEntrySize = kTestKeySize + kTestValueSize;
4777 const int kEntriesPerBuffer = 100;
4778 Options options;
4779 options.create_if_missing = true;
4780 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
4781 options.compaction_style = kCompactionStyleLevel;
4782 options.target_file_size_base = options.write_buffer_size;
4783 options.max_bytes_for_level_base = options.target_file_size_base * 2;
4784 options.max_bytes_for_level_multiplier = 2;
4785 options.compression = kNoCompression;
4786 options = CurrentOptions(options);
4787 options.env = env_;
4788 options.enable_thread_tracking = true;
4789 const int kNumL0Files = 4;
4790 options.level0_file_num_compaction_trigger = kNumL0Files;
4791 options.max_subcompactions = max_subcompactions_;
4792
4793 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
4794 {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
4795 {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
4796 {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
4797 });
4798 for (int tests = 0; tests < 2; ++tests) {
4799 DestroyAndReopen(options);
4800 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
4801 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4802
4803 Random rnd(301);
4804 // The Put Phase.
4805 for (int file = 0; file < kNumL0Files; ++file) {
4806 for (int key = 0; key < kEntriesPerBuffer; ++key) {
4807 ASSERT_OK(Put(std::to_string(key + file * kEntriesPerBuffer),
4808 rnd.RandomString(kTestValueSize)));
4809 }
4810 ASSERT_OK(Flush());
4811 }
4812 // This makes sure a compaction won't be scheduled until
4813 // we have done with the above Put Phase.
4814 uint64_t num_running_compactions = 0;
4815 ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
4816 &num_running_compactions));
4817 ASSERT_EQ(num_running_compactions, 0);
4818 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
4819 ASSERT_GE(NumTableFilesAtLevel(0),
4820 options.level0_file_num_compaction_trigger);
4821
4822 // This makes sure at least one compaction is running.
4823 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
4824
4825 if (options.enable_thread_tracking) {
4826 // expecting one single L0 to L1 compaction
4827 VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
4828 } else {
4829 // If thread tracking is not enabled, compaction count should be 0.
4830 VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
4831 }
4832 ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
4833 &num_running_compactions));
4834 ASSERT_EQ(num_running_compactions, 1);
4835 // TODO(yhchiang): adding assert to verify each compaction stage.
4836 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
4837
4838 // repeat the test with disabling thread tracking.
4839 options.enable_thread_tracking = false;
4840 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4841 }
4842 }
4843
4844 TEST_P(DBTestWithParam, PreShutdownManualCompaction) {
4845 Options options = CurrentOptions();
4846 options.max_subcompactions = max_subcompactions_;
4847 CreateAndReopenWithCF({"pikachu"}, options);
4848
4849 // iter - 0 with 7 levels
4850 // iter - 1 with 3 levels
4851 for (int iter = 0; iter < 2; ++iter) {
4852 MakeTables(3, "p", "q", 1);
4853 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4854
4855 // Compaction range falls before files
4856 Compact(1, "", "c");
4857 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4858
4859 // Compaction range falls after files
4860 Compact(1, "r", "z");
4861 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4862
4863 // Compaction range overlaps files
4864 Compact(1, "p", "q");
4865 ASSERT_EQ("0,0,1", FilesPerLevel(1));
4866
4867 // Populate a different range
4868 MakeTables(3, "c", "e", 1);
4869 ASSERT_EQ("1,1,2", FilesPerLevel(1));
4870
4871 // Compact just the new range
4872 Compact(1, "b", "f");
4873 ASSERT_EQ("0,0,2", FilesPerLevel(1));
4874
4875 // Compact all
4876 MakeTables(1, "a", "z", 1);
4877 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4878 CancelAllBackgroundWork(db_);
4879 ASSERT_TRUE(
4880 db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)
4881 .IsShutdownInProgress());
4882 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4883
4884 if (iter == 0) {
4885 options = CurrentOptions();
4886 options.num_levels = 3;
4887 options.create_if_missing = true;
4888 DestroyAndReopen(options);
4889 CreateAndReopenWithCF({"pikachu"}, options);
4890 }
4891 }
4892 }
4893
4894 TEST_F(DBTest, PreShutdownFlush) {
4895 Options options = CurrentOptions();
4896 CreateAndReopenWithCF({"pikachu"}, options);
4897 ASSERT_OK(Put(1, "key", "value"));
4898 CancelAllBackgroundWork(db_);
4899 Status s =
4900 db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
4901 ASSERT_TRUE(s.IsShutdownInProgress());
4902 }
4903
4904 TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) {
4905 const int kTestKeySize = 16;
4906 const int kTestValueSize = 984;
4907 const int kEntrySize = kTestKeySize + kTestValueSize;
4908 const int kEntriesPerBuffer = 40;
4909 const int kNumL0Files = 4;
4910
4911 const int kHighPriCount = 3;
4912 const int kLowPriCount = 5;
4913 env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
4914 env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
4915
4916 Options options;
4917 options.create_if_missing = true;
4918 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
4919 options.compaction_style = kCompactionStyleLevel;
4920 options.target_file_size_base = options.write_buffer_size;
4921 options.max_bytes_for_level_base =
4922 options.target_file_size_base * kNumL0Files;
4923 options.compression = kNoCompression;
4924 options = CurrentOptions(options);
4925 options.env = env_;
4926 options.enable_thread_tracking = true;
4927 options.level0_file_num_compaction_trigger = kNumL0Files;
4928 options.max_bytes_for_level_multiplier = 2;
4929 options.max_background_compactions = kLowPriCount;
4930 options.level0_stop_writes_trigger = 1 << 10;
4931 options.level0_slowdown_writes_trigger = 1 << 10;
4932 options.max_subcompactions = max_subcompactions_;
4933
4934 TryReopen(options);
4935 Random rnd(301);
4936
4937 std::vector<ThreadStatus> thread_list;
4938 // Delay both flush and compaction
4939 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
4940 {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
4941 {"CompactionJob::Run():Start",
4942 "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
4943 {"CompactionJob::Run():Start",
4944 "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
4945 {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
4946 "CompactionJob::Run():End"},
4947 {"CompactionJob::Run():End",
4948 "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
4949
4950 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
4951
4952 // Make rocksdb busy
4953 int key = 0;
4954 // check how many threads are doing compaction using GetThreadList
4955 int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
4956 for (int file = 0; file < 16 * kNumL0Files; ++file) {
4957 for (int k = 0; k < kEntriesPerBuffer; ++k) {
4958 ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize)));
4959 }
4960
4961 ASSERT_OK(env_->GetThreadList(&thread_list));
4962 for (auto thread : thread_list) {
4963 operation_count[thread.operation_type]++;
4964 }
4965
4966 // Speed up the test
4967 if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
4968 operation_count[ThreadStatus::OP_COMPACTION] >
4969 0.6 * options.max_background_compactions) {
4970 break;
4971 }
4972 if (file == 15 * kNumL0Files) {
4973 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4974 }
4975 }
4976
4977 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4978 ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
4979 CancelAllBackgroundWork(db_);
4980 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
4981 ASSERT_OK(dbfull()->TEST_WaitForCompact());
4982 // Record the number of compactions at a time.
4983 for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
4984 operation_count[i] = 0;
4985 }
4986 ASSERT_OK(env_->GetThreadList(&thread_list));
4987 for (auto thread : thread_list) {
4988 operation_count[thread.operation_type]++;
4989 }
4990 ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
4991 }
4992
4993 TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) {
4994 const int kTestKeySize = 16;
4995 const int kTestValueSize = 984;
4996 const int kEntrySize = kTestKeySize + kTestValueSize;
4997 const int kEntriesPerBuffer = 40;
4998 const int kNumL0Files = 4;
4999
5000 const int kHighPriCount = 3;
5001 const int kLowPriCount = 5;
5002 env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
5003 env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
5004
5005 Options options;
5006 options.create_if_missing = true;
5007 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
5008 options.compaction_style = kCompactionStyleLevel;
5009 options.target_file_size_base = options.write_buffer_size;
5010 options.max_bytes_for_level_base =
5011 options.target_file_size_base * kNumL0Files;
5012 options.compression = kNoCompression;
5013 options = CurrentOptions(options);
5014 options.env = env_;
5015 options.enable_thread_tracking = true;
5016 options.level0_file_num_compaction_trigger = kNumL0Files;
5017 options.max_bytes_for_level_multiplier = 2;
5018 options.max_background_compactions = kLowPriCount;
5019 options.level0_stop_writes_trigger = 1 << 10;
5020 options.level0_slowdown_writes_trigger = 1 << 10;
5021 options.max_subcompactions = max_subcompactions_;
5022
5023 TryReopen(options);
5024 Random rnd(301);
5025
5026 std::vector<ThreadStatus> thread_list;
5027 // Delay both flush and compaction
5028 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
5029 {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
5030 "CompactionJob::Run():Inprogress"},
5031 {"CompactionJob::Run():Start",
5032 "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
5033 {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
5034 {"CompactionJob::Run():End",
5035 "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
5036
5037 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5038
5039 // Make rocksdb busy
5040 int key = 0;
5041 // check how many threads are doing compaction using GetThreadList
5042 int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
5043 for (int file = 0; file < 16 * kNumL0Files; ++file) {
5044 for (int k = 0; k < kEntriesPerBuffer; ++k) {
5045 ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize)));
5046 }
5047
5048 ASSERT_OK(env_->GetThreadList(&thread_list));
5049 for (auto thread : thread_list) {
5050 operation_count[thread.operation_type]++;
5051 }
5052
5053 // Speed up the test
5054 if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
5055 operation_count[ThreadStatus::OP_COMPACTION] >
5056 0.6 * options.max_background_compactions) {
5057 break;
5058 }
5059 if (file == 15 * kNumL0Files) {
5060 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
5061 }
5062 }
5063
5064 ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
5065 CancelAllBackgroundWork(db_);
5066 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
5067 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
5068 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5069 // Record the number of compactions at a time.
5070 for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
5071 operation_count[i] = 0;
5072 }
5073 ASSERT_OK(env_->GetThreadList(&thread_list));
5074 for (auto thread : thread_list) {
5075 operation_count[thread.operation_type]++;
5076 }
5077 ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
5078 }
5079
5080 #endif // ROCKSDB_USING_THREAD_STATUS
5081
5082 #ifndef ROCKSDB_LITE
5083 TEST_F(DBTest, FlushOnDestroy) {
5084 WriteOptions wo;
5085 wo.disableWAL = true;
5086 ASSERT_OK(Put("foo", "v1", wo));
5087 CancelAllBackgroundWork(db_);
5088 }
5089
5090 TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
5091 if (!Snappy_Supported()) {
5092 return;
5093 }
5094 const int kNKeys = 120;
5095 int keys[kNKeys];
5096 for (int i = 0; i < kNKeys; i++) {
5097 keys[i] = i;
5098 }
5099 RandomShuffle(std::begin(keys), std::end(keys));
5100
5101 Random rnd(301);
5102 Options options;
5103 options.env = env_;
5104 options.create_if_missing = true;
5105 options.db_write_buffer_size = 20480;
5106 options.write_buffer_size = 20480;
5107 options.max_write_buffer_number = 2;
5108 options.level0_file_num_compaction_trigger = 2;
5109 options.level0_slowdown_writes_trigger = 2;
5110 options.level0_stop_writes_trigger = 2;
5111 options.target_file_size_base = 20480;
5112 options.level_compaction_dynamic_level_bytes = true;
5113 options.max_bytes_for_level_base = 102400;
5114 options.max_bytes_for_level_multiplier = 4;
5115 options.max_background_compactions = 1;
5116 options.num_levels = 5;
5117
5118 options.compression_per_level.resize(3);
5119 options.compression_per_level[0] = kNoCompression;
5120 options.compression_per_level[1] = kNoCompression;
5121 options.compression_per_level[2] = kSnappyCompression;
5122
5123 OnFileDeletionListener* listener = new OnFileDeletionListener();
5124 options.listeners.emplace_back(listener);
5125
5126 DestroyAndReopen(options);
5127
5128 // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
5129 // be compressed, so total data size should be more than 80K.
5130 for (int i = 0; i < 20; i++) {
5131 ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
5132 }
5133 ASSERT_OK(Flush());
5134 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5135
5136 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5137 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5138 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5139 // Assuming each files' metadata is at least 50 bytes/
5140 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4);
5141
5142 // Insert 400KB. Some data will be compressed
5143 for (int i = 21; i < 120; i++) {
5144 ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
5145 }
5146 ASSERT_OK(Flush());
5147 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5148 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5149 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5150
5151 ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4),
5152 120U * 4000U + 50U * 24);
5153 // Make sure data in files in L3 is not compacted by removing all files
5154 // in L4 and calculate number of rows
5155 ASSERT_OK(dbfull()->SetOptions({
5156 {"disable_auto_compactions", "true"},
5157 }));
5158 ColumnFamilyMetaData cf_meta;
5159 db_->GetColumnFamilyMetaData(&cf_meta);
5160 for (auto file : cf_meta.levels[4].files) {
5161 listener->SetExpectedFileName(dbname_ + file.name);
5162 ASSERT_OK(dbfull()->DeleteFile(file.name));
5163 }
5164 listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
5165
5166 int num_keys = 0;
5167 std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
5168 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
5169 num_keys++;
5170 }
5171 ASSERT_OK(iter->status());
5172 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U + num_keys * 10U);
5173 }
5174
5175 TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
5176 if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
5177 return;
5178 }
5179 const int kNKeys = 500;
5180 int keys[kNKeys];
5181 for (int i = 0; i < kNKeys; i++) {
5182 keys[i] = i;
5183 }
5184 RandomShuffle(std::begin(keys), std::end(keys));
5185
5186 Random rnd(301);
5187 Options options;
5188 options.create_if_missing = true;
5189 options.db_write_buffer_size = 6000000;
5190 options.write_buffer_size = 600000;
5191 options.max_write_buffer_number = 2;
5192 options.level0_file_num_compaction_trigger = 2;
5193 options.level0_slowdown_writes_trigger = 2;
5194 options.level0_stop_writes_trigger = 2;
5195 options.soft_pending_compaction_bytes_limit = 1024 * 1024;
5196 options.target_file_size_base = 20;
5197 options.env = env_;
5198 options.level_compaction_dynamic_level_bytes = true;
5199 options.max_bytes_for_level_base = 200;
5200 options.max_bytes_for_level_multiplier = 8;
5201 options.max_background_compactions = 1;
5202 options.num_levels = 5;
5203 std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
5204 options.table_factory = mtf;
5205
5206 options.compression_per_level.resize(3);
5207 options.compression_per_level[0] = kNoCompression;
5208 options.compression_per_level[1] = kLZ4Compression;
5209 options.compression_per_level[2] = kZlibCompression;
5210
5211 DestroyAndReopen(options);
5212 // When base level is L4, L4 is LZ4.
5213 std::atomic<int> num_zlib(0);
5214 std::atomic<int> num_lz4(0);
5215 std::atomic<int> num_no(0);
5216 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5217 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
5218 Compaction* compaction = reinterpret_cast<Compaction*>(arg);
5219 if (compaction->output_level() == 4) {
5220 ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
5221 num_lz4.fetch_add(1);
5222 }
5223 });
5224 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5225 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
5226 auto* compression = reinterpret_cast<CompressionType*>(arg);
5227 ASSERT_TRUE(*compression == kNoCompression);
5228 num_no.fetch_add(1);
5229 });
5230 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5231
5232 for (int i = 0; i < 100; i++) {
5233 std::string value = rnd.RandomString(200);
5234 ASSERT_OK(Put(Key(keys[i]), value));
5235 if (i % 25 == 24) {
5236 ASSERT_OK(Flush());
5237 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5238 }
5239 }
5240
5241 ASSERT_OK(Flush());
5242 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5243 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5244 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
5245 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
5246
5247 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5248 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5249 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5250 ASSERT_GT(NumTableFilesAtLevel(4), 0);
5251 ASSERT_GT(num_no.load(), 2);
5252 ASSERT_GT(num_lz4.load(), 0);
5253 int prev_num_files_l4 = NumTableFilesAtLevel(4);
5254
5255 // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
5256 num_lz4.store(0);
5257 num_no.store(0);
5258 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5259 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
5260 Compaction* compaction = reinterpret_cast<Compaction*>(arg);
5261 if (compaction->output_level() == 4 && compaction->start_level() == 3) {
5262 ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
5263 num_zlib.fetch_add(1);
5264 } else {
5265 ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
5266 num_lz4.fetch_add(1);
5267 }
5268 });
5269 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
5270 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
5271 auto* compression = reinterpret_cast<CompressionType*>(arg);
5272 ASSERT_TRUE(*compression == kNoCompression);
5273 num_no.fetch_add(1);
5274 });
5275 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5276
5277 for (int i = 101; i < 500; i++) {
5278 std::string value = rnd.RandomString(200);
5279 ASSERT_OK(Put(Key(keys[i]), value));
5280 if (i % 100 == 99) {
5281 ASSERT_OK(Flush());
5282 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5283 }
5284 }
5285
5286 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
5287 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
5288 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5289 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5290 ASSERT_GT(NumTableFilesAtLevel(3), 0);
5291 ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
5292 ASSERT_GT(num_no.load(), 2);
5293 ASSERT_GT(num_lz4.load(), 0);
5294 ASSERT_GT(num_zlib.load(), 0);
5295 }
5296
5297 TEST_F(DBTest, DynamicCompactionOptions) {
5298 // minimum write buffer size is enforced at 64KB
5299 const uint64_t k32KB = 1 << 15;
5300 const uint64_t k64KB = 1 << 16;
5301 const uint64_t k128KB = 1 << 17;
5302 const uint64_t k1MB = 1 << 20;
5303 const uint64_t k4KB = 1 << 12;
5304 Options options;
5305 options.env = env_;
5306 options.create_if_missing = true;
5307 options.compression = kNoCompression;
5308 options.soft_pending_compaction_bytes_limit = 1024 * 1024;
5309 options.write_buffer_size = k64KB;
5310 options.arena_block_size = 4 * k4KB;
5311 options.max_write_buffer_number = 2;
5312 // Compaction related options
5313 options.level0_file_num_compaction_trigger = 3;
5314 options.level0_slowdown_writes_trigger = 4;
5315 options.level0_stop_writes_trigger = 8;
5316 options.target_file_size_base = k64KB;
5317 options.max_compaction_bytes = options.target_file_size_base * 10;
5318 options.target_file_size_multiplier = 1;
5319 options.max_bytes_for_level_base = k128KB;
5320 options.max_bytes_for_level_multiplier = 4;
5321
5322 // Block flush thread and disable compaction thread
5323 env_->SetBackgroundThreads(1, Env::LOW);
5324 env_->SetBackgroundThreads(1, Env::HIGH);
5325 DestroyAndReopen(options);
5326
5327 auto gen_l0_kb = [this](int start, int size, int stride) {
5328 Random rnd(301);
5329 for (int i = 0; i < size; i++) {
5330 ASSERT_OK(Put(Key(start + stride * i), rnd.RandomString(1024)));
5331 }
5332 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5333 };
5334
5335 // Write 3 files that have the same key range.
5336 // Since level0_file_num_compaction_trigger is 3, compaction should be
5337 // triggered. The compaction should result in one L1 file
5338 gen_l0_kb(0, 64, 1);
5339 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
5340 gen_l0_kb(0, 64, 1);
5341 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
5342 gen_l0_kb(0, 64, 1);
5343 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5344 ASSERT_EQ("0,1", FilesPerLevel());
5345 std::vector<LiveFileMetaData> metadata;
5346 db_->GetLiveFilesMetaData(&metadata);
5347 ASSERT_EQ(1U, metadata.size());
5348 ASSERT_LE(metadata[0].size, k64KB + k4KB);
5349 ASSERT_GE(metadata[0].size, k64KB - k4KB);
5350
5351 // Test compaction trigger and target_file_size_base
5352 // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
5353 // Writing to 64KB L0 files should trigger a compaction. Since these
5354 // 2 L0 files have the same key range, compaction merge them and should
5355 // result in 2 32KB L1 files.
5356 ASSERT_OK(
5357 dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
5358 {"target_file_size_base", std::to_string(k32KB)}}));
5359
5360 gen_l0_kb(0, 64, 1);
5361 ASSERT_EQ("1,1", FilesPerLevel());
5362 gen_l0_kb(0, 64, 1);
5363 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5364 ASSERT_EQ("0,2", FilesPerLevel());
5365 metadata.clear();
5366 db_->GetLiveFilesMetaData(&metadata);
5367 ASSERT_EQ(2U, metadata.size());
5368 ASSERT_LE(metadata[0].size, k32KB + k4KB);
5369 ASSERT_GE(metadata[0].size, k32KB - k4KB);
5370 ASSERT_LE(metadata[1].size, k32KB + k4KB);
5371 ASSERT_GE(metadata[1].size, k32KB - k4KB);
5372
5373 // Test max_bytes_for_level_base
5374 // Increase level base size to 256KB and write enough data that will
5375 // fill L1 and L2. L1 size should be around 256KB while L2 size should be
5376 // around 256KB x 4.
5377 ASSERT_OK(dbfull()->SetOptions(
5378 {{"max_bytes_for_level_base", std::to_string(k1MB)}}));
5379
5380 // writing 96 x 64KB => 6 * 1024KB
5381 // (L1 + L2) = (1 + 4) * 1024KB
5382 for (int i = 0; i < 96; ++i) {
5383 gen_l0_kb(i, 64, 96);
5384 }
5385 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5386 ASSERT_GT(SizeAtLevel(1), k1MB / 2);
5387 ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
5388
5389 // Within (0.5, 1.5) of 4MB.
5390 ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
5391 ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
5392
5393 // Test max_bytes_for_level_multiplier and
5394 // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
5395 // After filling enough data that can fit in L1 - L3, we should see L1 size
5396 // reduces to 128KB from 256KB which was asserted previously. Same for L2.
5397 ASSERT_OK(dbfull()->SetOptions(
5398 {{"max_bytes_for_level_multiplier", "2"},
5399 {"max_bytes_for_level_base", std::to_string(k128KB)}}));
5400
5401 // writing 20 x 64KB = 10 x 128KB
5402 // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
5403 for (int i = 0; i < 20; ++i) {
5404 gen_l0_kb(i, 64, 32);
5405 }
5406 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5407 uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
5408 ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
5409
5410 // Test level0_stop_writes_trigger.
5411 // Clean up memtable and L0. Block compaction threads. If continue to write
5412 // and flush memtables. We should see put stop after 8 memtable flushes
5413 // since level0_stop_writes_trigger = 8
5414 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5415 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5416 // Block compaction
5417 test::SleepingBackgroundTask sleeping_task_low;
5418 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5419 Env::Priority::LOW);
5420 sleeping_task_low.WaitUntilSleeping();
5421 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5422 int count = 0;
5423 Random rnd(301);
5424 WriteOptions wo;
5425 while (count < 64) {
5426 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo));
5427 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5428 count++;
5429 if (dbfull()->TEST_write_controler().IsStopped()) {
5430 sleeping_task_low.WakeUp();
5431 break;
5432 }
5433 }
5434 // Stop trigger = 8
5435 ASSERT_EQ(count, 8);
5436 // Unblock
5437 sleeping_task_low.WaitUntilDone();
5438
5439 // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
5440 // Block compaction thread again. Perform the put and memtable flushes
5441 // until we see the stop after 6 memtable flushes.
5442 ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}}));
5443 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5444 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5445 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5446
5447 // Block compaction again
5448 sleeping_task_low.Reset();
5449 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5450 Env::Priority::LOW);
5451 sleeping_task_low.WaitUntilSleeping();
5452 count = 0;
5453 while (count < 64) {
5454 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo));
5455 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
5456 count++;
5457 if (dbfull()->TEST_write_controler().IsStopped()) {
5458 sleeping_task_low.WakeUp();
5459 break;
5460 }
5461 }
5462 ASSERT_EQ(count, 6);
5463 // Unblock
5464 sleeping_task_low.WaitUntilDone();
5465
5466 // Test disable_auto_compactions
5467 // Compaction thread is unblocked but auto compaction is disabled. Write
5468 // 4 L0 files and compaction should be triggered. If auto compaction is
5469 // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
5470 // L0 files do not change after the call.
5471 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
5472 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5473 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5474
5475 for (int i = 0; i < 4; ++i) {
5476 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
5477 // Wait for compaction so that put won't stop
5478 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5479 }
5480 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5481 ASSERT_EQ(NumTableFilesAtLevel(0), 4);
5482
5483 // Enable auto compaction and perform the same test, # of L0 files should be
5484 // reduced after compaction.
5485 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
5486 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5487 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5488
5489 for (int i = 0; i < 4; ++i) {
5490 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
5491 // Wait for compaction so that put won't stop
5492 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5493 }
5494 ASSERT_OK(dbfull()->TEST_WaitForCompact());
5495 ASSERT_LT(NumTableFilesAtLevel(0), 4);
5496 }
5497
5498 // Test dynamic FIFO compaction options.
5499 // This test covers just option parsing and makes sure that the options are
5500 // correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions
5501 // test which makes sure that the FIFO compaction funcionality is working
5502 // as expected on dynamically changing the options.
5503 // Even more FIFOCompactionTests are at DBTest.FIFOCompaction* .
5504 TEST_F(DBTest, DynamicFIFOCompactionOptions) {
5505 Options options;
5506 options.ttl = 0;
5507 options.create_if_missing = true;
5508 options.env = env_;
5509 DestroyAndReopen(options);
5510
5511 // Initial defaults
5512 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5513 1024 * 1024 * 1024);
5514 ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
5515 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5516 false);
5517
5518 ASSERT_OK(dbfull()->SetOptions(
5519 {{"compaction_options_fifo", "{max_table_files_size=23;}"}}));
5520 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5521 23);
5522 ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
5523 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5524 false);
5525
5526 ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}}));
5527 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5528 23);
5529 ASSERT_EQ(dbfull()->GetOptions().ttl, 97);
5530 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5531 false);
5532
5533 ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}}));
5534 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5535 23);
5536 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
5537 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5538 false);
5539
5540 ASSERT_OK(dbfull()->SetOptions(
5541 {{"compaction_options_fifo", "{allow_compaction=true;}"}}));
5542 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5543 23);
5544 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
5545 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5546 true);
5547
5548 ASSERT_OK(dbfull()->SetOptions(
5549 {{"compaction_options_fifo", "{max_table_files_size=31;}"}}));
5550 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5551 31);
5552 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
5553 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5554 true);
5555
5556 ASSERT_OK(dbfull()->SetOptions(
5557 {{"compaction_options_fifo",
5558 "{max_table_files_size=51;allow_compaction=true;}"}}));
5559 ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}}));
5560 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5561 51);
5562 ASSERT_EQ(dbfull()->GetOptions().ttl, 49);
5563 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5564 true);
5565 }
5566
5567 TEST_F(DBTest, DynamicUniversalCompactionOptions) {
5568 Options options;
5569 options.create_if_missing = true;
5570 options.env = env_;
5571 DestroyAndReopen(options);
5572
5573 // Initial defaults
5574 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 1U);
5575 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
5576 2u);
5577 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5578 UINT_MAX);
5579 ASSERT_EQ(dbfull()
5580 ->GetOptions()
5581 .compaction_options_universal.max_size_amplification_percent,
5582 200u);
5583 ASSERT_EQ(dbfull()
5584 ->GetOptions()
5585 .compaction_options_universal.compression_size_percent,
5586 -1);
5587 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5588 kCompactionStopStyleTotalSize);
5589 ASSERT_EQ(
5590 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5591 false);
5592
5593 ASSERT_OK(dbfull()->SetOptions(
5594 {{"compaction_options_universal", "{size_ratio=7;}"}}));
5595 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
5596 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
5597 2u);
5598 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5599 UINT_MAX);
5600 ASSERT_EQ(dbfull()
5601 ->GetOptions()
5602 .compaction_options_universal.max_size_amplification_percent,
5603 200u);
5604 ASSERT_EQ(dbfull()
5605 ->GetOptions()
5606 .compaction_options_universal.compression_size_percent,
5607 -1);
5608 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5609 kCompactionStopStyleTotalSize);
5610 ASSERT_EQ(
5611 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5612 false);
5613
5614 ASSERT_OK(dbfull()->SetOptions(
5615 {{"compaction_options_universal", "{min_merge_width=11;}"}}));
5616 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
5617 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
5618 11u);
5619 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5620 UINT_MAX);
5621 ASSERT_EQ(dbfull()
5622 ->GetOptions()
5623 .compaction_options_universal.max_size_amplification_percent,
5624 200u);
5625 ASSERT_EQ(dbfull()
5626 ->GetOptions()
5627 .compaction_options_universal.compression_size_percent,
5628 -1);
5629 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5630 kCompactionStopStyleTotalSize);
5631 ASSERT_EQ(
5632 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5633 false);
5634 }
5635 #endif // ROCKSDB_LITE
5636
5637 TEST_F(DBTest, FileCreationRandomFailure) {
5638 Options options;
5639 options.env = env_;
5640 options.create_if_missing = true;
5641 options.write_buffer_size = 100000; // Small write buffer
5642 options.target_file_size_base = 200000;
5643 options.max_bytes_for_level_base = 1000000;
5644 options.max_bytes_for_level_multiplier = 2;
5645
5646 DestroyAndReopen(options);
5647 Random rnd(301);
5648
5649 constexpr int kCDTKeysPerBuffer = 4;
5650 constexpr int kTestSize = kCDTKeysPerBuffer * 4096;
5651 constexpr int kTotalIteration = 20;
5652 // the second half of the test involves in random failure
5653 // of file creation.
5654 constexpr int kRandomFailureTest = kTotalIteration / 2;
5655
5656 std::vector<std::string> values;
5657 for (int i = 0; i < kTestSize; ++i) {
5658 values.push_back("NOT_FOUND");
5659 }
5660 for (int j = 0; j < kTotalIteration; ++j) {
5661 if (j == kRandomFailureTest) {
5662 env_->non_writeable_rate_.store(90);
5663 }
5664 for (int k = 0; k < kTestSize; ++k) {
5665 // here we expect some of the Put fails.
5666 std::string value = rnd.RandomString(100);
5667 Status s = Put(Key(k), Slice(value));
5668 if (s.ok()) {
5669 // update the latest successful put
5670 values[k] = value;
5671 }
5672 // But everything before we simulate the failure-test should succeed.
5673 if (j < kRandomFailureTest) {
5674 ASSERT_OK(s);
5675 }
5676 }
5677 }
5678
5679 // If rocksdb does not do the correct job, internal assert will fail here.
5680 ASSERT_TRUE(dbfull()->TEST_WaitForFlushMemTable().IsIOError());
5681 ASSERT_TRUE(dbfull()->TEST_WaitForCompact().IsIOError());
5682
5683 // verify we have the latest successful update
5684 for (int k = 0; k < kTestSize; ++k) {
5685 auto v = Get(Key(k));
5686 ASSERT_EQ(v, values[k]);
5687 }
5688
5689 // reopen and reverify we have the latest successful update
5690 env_->non_writeable_rate_.store(0);
5691 Reopen(options);
5692 for (int k = 0; k < kTestSize; ++k) {
5693 auto v = Get(Key(k));
5694 ASSERT_EQ(v, values[k]);
5695 }
5696 }
5697
5698 #ifndef ROCKSDB_LITE
5699
5700 TEST_F(DBTest, DynamicMiscOptions) {
5701 // Test max_sequential_skip_in_iterations
5702 Options options;
5703 options.env = env_;
5704 options.create_if_missing = true;
5705 options.max_sequential_skip_in_iterations = 16;
5706 options.compression = kNoCompression;
5707 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
5708 DestroyAndReopen(options);
5709
5710 auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
5711 int key0 = key_start;
5712 int key1 = key_start + 1;
5713 int key2 = key_start + 2;
5714 Random rnd(301);
5715 ASSERT_OK(Put(Key(key0), rnd.RandomString(8)));
5716 for (int i = 0; i < 10; ++i) {
5717 ASSERT_OK(Put(Key(key1), rnd.RandomString(8)));
5718 }
5719 ASSERT_OK(Put(Key(key2), rnd.RandomString(8)));
5720 std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
5721 iter->Seek(Key(key1));
5722 ASSERT_TRUE(iter->Valid());
5723 ASSERT_EQ(iter->key().compare(Key(key1)), 0);
5724 iter->Next();
5725 ASSERT_TRUE(iter->Valid());
5726 ASSERT_EQ(iter->key().compare(Key(key2)), 0);
5727 ASSERT_EQ(num_reseek,
5728 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
5729 };
5730 // No reseek
5731 assert_reseek_count(100, 0);
5732
5733 ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}}));
5734 // Clear memtable and make new option effective
5735 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5736 // Trigger reseek
5737 assert_reseek_count(200, 1);
5738
5739 ASSERT_OK(
5740 dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}}));
5741 // Clear memtable and make new option effective
5742 ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
5743 // No reseek
5744 assert_reseek_count(300, 1);
5745
5746 MutableCFOptions mutable_cf_options;
5747 CreateAndReopenWithCF({"pikachu"}, options);
5748 // Test soft_pending_compaction_bytes_limit,
5749 // hard_pending_compaction_bytes_limit
5750 ASSERT_OK(dbfull()->SetOptions(
5751 handles_[1], {{"soft_pending_compaction_bytes_limit", "200"},
5752 {"hard_pending_compaction_bytes_limit", "300"}}));
5753 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5754 &mutable_cf_options));
5755 ASSERT_EQ(200, mutable_cf_options.soft_pending_compaction_bytes_limit);
5756 ASSERT_EQ(300, mutable_cf_options.hard_pending_compaction_bytes_limit);
5757 // Test report_bg_io_stats
5758 ASSERT_OK(
5759 dbfull()->SetOptions(handles_[1], {{"report_bg_io_stats", "true"}}));
5760 // sanity check
5761 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5762 &mutable_cf_options));
5763 ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
5764 // Test compression
5765 // sanity check
5766 ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}}));
5767 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
5768 &mutable_cf_options));
5769 ASSERT_EQ(CompressionType::kNoCompression, mutable_cf_options.compression);
5770
5771 if (Snappy_Supported()) {
5772 ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}}));
5773 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
5774 &mutable_cf_options));
5775 ASSERT_EQ(CompressionType::kSnappyCompression,
5776 mutable_cf_options.compression);
5777 }
5778
5779 // Test paranoid_file_checks already done in db_block_cache_test
5780 ASSERT_OK(
5781 dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "true"}}));
5782 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5783 &mutable_cf_options));
5784 ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
5785 ASSERT_TRUE(mutable_cf_options.check_flush_compaction_key_order);
5786
5787 ASSERT_OK(dbfull()->SetOptions(
5788 handles_[1], {{"check_flush_compaction_key_order", "false"}}));
5789 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5790 &mutable_cf_options));
5791 ASSERT_FALSE(mutable_cf_options.check_flush_compaction_key_order);
5792 }
5793 #endif // ROCKSDB_LITE
5794
5795 TEST_F(DBTest, L0L1L2AndUpHitCounter) {
5796 const int kNumLevels = 3;
5797 const int kNumKeysPerLevel = 10000;
5798 const int kNumKeysPerDb = kNumLevels * kNumKeysPerLevel;
5799
5800 Options options = CurrentOptions();
5801 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
5802 Reopen(options);
5803
5804 // After the below loop there will be one file on each of L0, L1, and L2.
5805 int key = 0;
5806 for (int output_level = kNumLevels - 1; output_level >= 0; --output_level) {
5807 for (int i = 0; i < kNumKeysPerLevel; ++i) {
5808 ASSERT_OK(Put(Key(key), "val"));
5809 key++;
5810 }
5811 ASSERT_OK(Flush());
5812 for (int input_level = 0; input_level < output_level; ++input_level) {
5813 // `TEST_CompactRange(input_level, ...)` compacts from `input_level` to
5814 // `input_level + 1`.
5815 ASSERT_OK(dbfull()->TEST_CompactRange(input_level, nullptr, nullptr));
5816 }
5817 }
5818 assert(key == kNumKeysPerDb);
5819
5820 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
5821 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
5822 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
5823
5824 for (int i = 0; i < kNumKeysPerDb; i++) {
5825 ASSERT_EQ(Get(Key(i)), "val");
5826 }
5827
5828 ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L0));
5829 ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L1));
5830 ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
5831
5832 ASSERT_EQ(kNumKeysPerDb, TestGetTickerCount(options, GET_HIT_L0) +
5833 TestGetTickerCount(options, GET_HIT_L1) +
5834 TestGetTickerCount(options, GET_HIT_L2_AND_UP));
5835 }
5836
5837 TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
5838 // iter 0 -- zlib
5839 // iter 1 -- bzip2
5840 // iter 2 -- lz4
5841 // iter 3 -- lz4HC
5842 // iter 4 -- xpress
5843 CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
5844 kLZ4Compression, kLZ4HCCompression,
5845 kXpressCompression};
5846 for (auto comp : compressions) {
5847 if (!CompressionTypeSupported(comp)) {
5848 continue;
5849 }
5850 // first_table_version 1 -- generate with table_version == 1, read with
5851 // table_version == 2
5852 // first_table_version 2 -- generate with table_version == 2, read with
5853 // table_version == 1
5854 for (int first_table_version = 1; first_table_version <= 2;
5855 ++first_table_version) {
5856 BlockBasedTableOptions table_options;
5857 table_options.format_version = first_table_version;
5858 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
5859 Options options = CurrentOptions();
5860 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5861 options.create_if_missing = true;
5862 options.compression = comp;
5863 DestroyAndReopen(options);
5864
5865 int kNumKeysWritten = 1000;
5866
5867 Random rnd(301);
5868 for (int i = 0; i < kNumKeysWritten; ++i) {
5869 // compressible string
5870 ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a')));
5871 }
5872
5873 table_options.format_version = first_table_version == 1 ? 2 : 1;
5874 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5875 Reopen(options);
5876 for (int i = 0; i < kNumKeysWritten; ++i) {
5877 auto r = Get(Key(i));
5878 ASSERT_EQ(r.substr(128), std::string(128, 'a'));
5879 }
5880 }
5881 }
5882 }
5883
5884 TEST_F(DBTest, CloseSpeedup) {
5885 Options options = CurrentOptions();
5886 options.compaction_style = kCompactionStyleLevel;
5887 options.write_buffer_size = 110 << 10; // 110KB
5888 options.arena_block_size = 4 << 10;
5889 options.level0_file_num_compaction_trigger = 2;
5890 options.num_levels = 4;
5891 options.max_bytes_for_level_base = 400 * 1024;
5892 options.max_write_buffer_number = 16;
5893
5894 // Block background threads
5895 env_->SetBackgroundThreads(1, Env::LOW);
5896 env_->SetBackgroundThreads(1, Env::HIGH);
5897 test::SleepingBackgroundTask sleeping_task_low;
5898 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5899 Env::Priority::LOW);
5900 test::SleepingBackgroundTask sleeping_task_high;
5901 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
5902 &sleeping_task_high, Env::Priority::HIGH);
5903
5904 std::vector<std::string> filenames;
5905 ASSERT_OK(env_->GetChildren(dbname_, &filenames));
5906 // In Windows, LOCK file cannot be deleted because it is locked by db_test
5907 // After closing db_test, the LOCK file is unlocked and can be deleted
5908 // Delete archival files.
5909 bool deleteDir = true;
5910 for (size_t i = 0; i < filenames.size(); ++i) {
5911 Status s = env_->DeleteFile(dbname_ + "/" + filenames[i]);
5912 if (!s.ok()) {
5913 deleteDir = false;
5914 }
5915 }
5916 if (deleteDir) {
5917 ASSERT_OK(env_->DeleteDir(dbname_));
5918 }
5919 DestroyAndReopen(options);
5920
5921 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
5922 env_->SetBackgroundThreads(1, Env::LOW);
5923 env_->SetBackgroundThreads(1, Env::HIGH);
5924 Random rnd(301);
5925 int key_idx = 0;
5926
5927 // First three 110KB files are not going to level 2
5928 // After that, (100K, 200K)
5929 for (int num = 0; num < 5; num++) {
5930 GenerateNewFile(&rnd, &key_idx, true);
5931 }
5932
5933 ASSERT_EQ(0, GetSstFileCount(dbname_));
5934
5935 Close();
5936 ASSERT_EQ(0, GetSstFileCount(dbname_));
5937
5938 // Unblock background threads
5939 sleeping_task_high.WakeUp();
5940 sleeping_task_high.WaitUntilDone();
5941 sleeping_task_low.WakeUp();
5942 sleeping_task_low.WaitUntilDone();
5943
5944 Destroy(options);
5945 }
5946
5947 class DelayedMergeOperator : public MergeOperator {
5948 private:
5949 DBTest* db_test_;
5950
5951 public:
5952 explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
5953
5954 bool FullMergeV2(const MergeOperationInput& merge_in,
5955 MergeOperationOutput* merge_out) const override {
5956 db_test_->env_->MockSleepForMicroseconds(1000 *
5957 merge_in.operand_list.size());
5958 merge_out->new_value = "";
5959 return true;
5960 }
5961
5962 const char* Name() const override { return "DelayedMergeOperator"; }
5963 };
5964
5965 TEST_F(DBTest, MergeTestTime) {
5966 std::string one, two, three;
5967 PutFixed64(&one, 1);
5968 PutFixed64(&two, 2);
5969 PutFixed64(&three, 3);
5970
5971 // Enable time profiling
5972 SetPerfLevel(kEnableTime);
5973 Options options = CurrentOptions();
5974 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
5975 options.merge_operator.reset(new DelayedMergeOperator(this));
5976 SetTimeElapseOnlySleepOnReopen(&options);
5977 DestroyAndReopen(options);
5978
5979 // NOTE: Presumed unnecessary and removed: resetting mock time in env
5980
5981 ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
5982 ASSERT_OK(db_->Put(WriteOptions(), "foo", one));
5983 ASSERT_OK(Flush());
5984 ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
5985 ASSERT_OK(Flush());
5986 ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
5987 ASSERT_OK(Flush());
5988
5989 ReadOptions opt;
5990 opt.verify_checksums = true;
5991 opt.snapshot = nullptr;
5992 std::string result;
5993 ASSERT_OK(db_->Get(opt, "foo", &result));
5994
5995 ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
5996
5997 ReadOptions read_options;
5998 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
5999 int count = 0;
6000 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
6001 ASSERT_OK(iter->status());
6002 ++count;
6003 }
6004
6005 ASSERT_EQ(1, count);
6006 ASSERT_EQ(4000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
6007 #ifdef ROCKSDB_USING_THREAD_STATUS
6008 ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0);
6009 #endif // ROCKSDB_USING_THREAD_STATUS
6010 }
6011
6012 #ifndef ROCKSDB_LITE
6013 TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
6014 SetPerfLevel(kEnableTime);
6015 Options options = CurrentOptions();
6016 options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
6017 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
6018 options.merge_operator.reset(new DelayedMergeOperator(this));
6019 options.disable_auto_compactions = true;
6020 options.max_subcompactions = max_subcompactions_;
6021 SetTimeElapseOnlySleepOnReopen(&options);
6022 DestroyAndReopen(options);
6023
6024 constexpr unsigned n = 1000;
6025 for (unsigned i = 0; i < n; i++) {
6026 ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
6027 ASSERT_OK(Flush());
6028 }
6029 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6030
6031 CompactRangeOptions cro;
6032 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
6033 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
6034
6035 ASSERT_EQ(uint64_t{n} * 1000000U,
6036 TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
6037 }
6038
6039 TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
6040 Options options = CurrentOptions();
6041 options.compaction_filter_factory =
6042 std::make_shared<DelayFilterFactory>(this);
6043 options.disable_auto_compactions = true;
6044 options.create_if_missing = true;
6045 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
6046 options.statistics->set_stats_level(kExceptTimeForMutex);
6047 options.max_subcompactions = max_subcompactions_;
6048 SetTimeElapseOnlySleepOnReopen(&options);
6049 DestroyAndReopen(options);
6050
6051 unsigned n = 0;
6052 // put some data
6053 for (int table = 0; table < 4; ++table) {
6054 for (int i = 0; i < 10 + table; ++i) {
6055 ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
6056 ++n;
6057 }
6058 ASSERT_OK(Flush());
6059 }
6060
6061 CompactRangeOptions cro;
6062 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
6063 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
6064 ASSERT_EQ(0U, CountLiveFiles());
6065
6066 Reopen(options);
6067
6068 Iterator* itr = db_->NewIterator(ReadOptions());
6069 itr->SeekToFirst();
6070 ASSERT_OK(itr->status());
6071 ASSERT_EQ(uint64_t{n} * 1000000U,
6072 TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME));
6073 delete itr;
6074 }
6075 #endif // ROCKSDB_LITE
6076
6077 TEST_F(DBTest, TestLogCleanup) {
6078 Options options = CurrentOptions();
6079 options.write_buffer_size = 64 * 1024; // very small
6080 // only two memtables allowed ==> only two log files
6081 options.max_write_buffer_number = 2;
6082 Reopen(options);
6083
6084 for (int i = 0; i < 100000; ++i) {
6085 ASSERT_OK(Put(Key(i), "val"));
6086 // only 2 memtables will be alive, so logs_to_free needs to always be below
6087 // 2
6088 ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
6089 }
6090 }
6091
6092 #ifndef ROCKSDB_LITE
6093 TEST_F(DBTest, EmptyCompactedDB) {
6094 Options options = CurrentOptions();
6095 options.max_open_files = -1;
6096 Close();
6097 ASSERT_OK(ReadOnlyReopen(options));
6098 Status s = Put("new", "value");
6099 ASSERT_TRUE(s.IsNotSupported());
6100 Close();
6101 }
6102 #endif // ROCKSDB_LITE
6103
6104 #ifndef ROCKSDB_LITE
6105 TEST_F(DBTest, SuggestCompactRangeTest) {
6106 class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
6107 public:
6108 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
6109 const CompactionFilter::Context& context) override {
6110 saved_context = context;
6111 std::unique_ptr<CompactionFilter> empty_filter;
6112 return empty_filter;
6113 }
6114 const char* Name() const override {
6115 return "CompactionFilterFactoryGetContext";
6116 }
6117 static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
6118 return reinterpret_cast<CompactionFilterFactoryGetContext*>(
6119 compaction_filter_factory)
6120 ->saved_context.is_manual_compaction;
6121 }
6122 CompactionFilter::Context saved_context;
6123 };
6124
6125 Options options = CurrentOptions();
6126 options.memtable_factory.reset(test::NewSpecialSkipListFactory(
6127 DBTestBase::kNumKeysByGenerateNewRandomFile));
6128 options.compaction_style = kCompactionStyleLevel;
6129 options.compaction_filter_factory.reset(
6130 new CompactionFilterFactoryGetContext());
6131 options.write_buffer_size = 200 << 10;
6132 options.arena_block_size = 4 << 10;
6133 options.level0_file_num_compaction_trigger = 4;
6134 options.num_levels = 4;
6135 options.compression = kNoCompression;
6136 options.max_bytes_for_level_base = 450 << 10;
6137 options.target_file_size_base = 98 << 10;
6138 options.max_compaction_bytes = static_cast<uint64_t>(1) << 60; // inf
6139
6140 Reopen(options);
6141
6142 Random rnd(301);
6143
6144 for (int num = 0; num < 10; num++) {
6145 GenerateNewRandomFile(&rnd);
6146 }
6147
6148 ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
6149 options.compaction_filter_factory.get()));
6150
6151 // make sure either L0 or L1 has file
6152 while (NumTableFilesAtLevel(0) == 0 && NumTableFilesAtLevel(1) == 0) {
6153 GenerateNewRandomFile(&rnd);
6154 }
6155
6156 // compact it three times
6157 for (int i = 0; i < 3; ++i) {
6158 ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
6159 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6160 }
6161
6162 // All files are compacted
6163 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6164 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6165
6166 GenerateNewRandomFile(&rnd);
6167 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6168
6169 // nonoverlapping with the file on level 0
6170 Slice start("a"), end("b");
6171 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6172 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6173
6174 // should not compact the level 0 file
6175 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6176
6177 start = Slice("j");
6178 end = Slice("m");
6179 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6180 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6181 // SuggestCompactRange() is not going to be reported as manual compaction
6182 ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
6183 options.compaction_filter_factory.get()));
6184
6185 // now it should compact the level 0 file
6186 // as it's a trivial move to L1, it triggers another one to compact to L2
6187 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6188 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6189 }
6190
6191 TEST_F(DBTest, SuggestCompactRangeUniversal) {
6192 Options options = CurrentOptions();
6193 options.memtable_factory.reset(test::NewSpecialSkipListFactory(
6194 DBTestBase::kNumKeysByGenerateNewRandomFile));
6195 options.compaction_style = kCompactionStyleUniversal;
6196 options.write_buffer_size = 200 << 10;
6197 options.arena_block_size = 4 << 10;
6198 options.level0_file_num_compaction_trigger = 4;
6199 options.num_levels = 4;
6200 options.compression = kNoCompression;
6201 options.max_bytes_for_level_base = 450 << 10;
6202 options.target_file_size_base = 98 << 10;
6203 options.max_compaction_bytes = static_cast<uint64_t>(1) << 60; // inf
6204
6205 Reopen(options);
6206
6207 Random rnd(301);
6208
6209 for (int num = 0; num < 10; num++) {
6210 GenerateNewRandomFile(&rnd);
6211 }
6212
6213 ASSERT_EQ("1,2,3,4", FilesPerLevel());
6214 for (int i = 0; i < 3; i++) {
6215 ASSERT_OK(
6216 db_->SuggestCompactRange(db_->DefaultColumnFamily(), nullptr, nullptr));
6217 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6218 }
6219
6220 // All files are compacted
6221 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6222 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6223 ASSERT_EQ(0, NumTableFilesAtLevel(2));
6224
6225 GenerateNewRandomFile(&rnd);
6226 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6227
6228 // nonoverlapping with the file on level 0
6229 Slice start("a"), end("b");
6230 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6231 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6232
6233 // should not compact the level 0 file
6234 ASSERT_EQ(1, NumTableFilesAtLevel(0));
6235
6236 start = Slice("j");
6237 end = Slice("m");
6238 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6239 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6240
6241 // now it should compact the level 0 file to the last level
6242 ASSERT_EQ(0, NumTableFilesAtLevel(0));
6243 ASSERT_EQ(0, NumTableFilesAtLevel(1));
6244 }
6245
6246 TEST_F(DBTest, PromoteL0) {
6247 Options options = CurrentOptions();
6248 options.disable_auto_compactions = true;
6249 options.write_buffer_size = 10 * 1024 * 1024;
6250 DestroyAndReopen(options);
6251
6252 // non overlapping ranges
6253 std::vector<std::pair<int32_t, int32_t>> ranges = {
6254 {81, 160}, {0, 80}, {161, 240}, {241, 320}};
6255
6256 int32_t value_size = 10 * 1024; // 10 KB
6257
6258 Random rnd(301);
6259 std::map<int32_t, std::string> values;
6260 for (const auto& range : ranges) {
6261 for (int32_t j = range.first; j < range.second; j++) {
6262 values[j] = rnd.RandomString(value_size);
6263 ASSERT_OK(Put(Key(j), values[j]));
6264 }
6265 ASSERT_OK(Flush());
6266 }
6267
6268 int32_t level0_files = NumTableFilesAtLevel(0, 0);
6269 ASSERT_EQ(level0_files, ranges.size());
6270 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1
6271
6272 // Promote L0 level to L2.
6273 ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
6274 // We expect that all the files were trivially moved from L0 to L2
6275 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
6276 ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
6277
6278 for (const auto& kv : values) {
6279 ASSERT_EQ(Get(Key(kv.first)), kv.second);
6280 }
6281 }
6282
6283 TEST_F(DBTest, PromoteL0Failure) {
6284 Options options = CurrentOptions();
6285 options.disable_auto_compactions = true;
6286 options.write_buffer_size = 10 * 1024 * 1024;
6287 DestroyAndReopen(options);
6288
6289 // Produce two L0 files with overlapping ranges.
6290 ASSERT_OK(Put(Key(0), ""));
6291 ASSERT_OK(Put(Key(3), ""));
6292 ASSERT_OK(Flush());
6293 ASSERT_OK(Put(Key(1), ""));
6294 ASSERT_OK(Flush());
6295
6296 Status status;
6297 // Fails because L0 has overlapping files.
6298 status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
6299 ASSERT_TRUE(status.IsInvalidArgument());
6300
6301 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6302 // Now there is a file in L1.
6303 ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
6304
6305 ASSERT_OK(Put(Key(5), ""));
6306 ASSERT_OK(Flush());
6307 // Fails because L1 is non-empty.
6308 status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
6309 ASSERT_TRUE(status.IsInvalidArgument());
6310 }
6311
6312 // Github issue #596
6313 TEST_F(DBTest, CompactRangeWithEmptyBottomLevel) {
6314 const int kNumLevels = 2;
6315 const int kNumL0Files = 2;
6316 Options options = CurrentOptions();
6317 options.disable_auto_compactions = true;
6318 options.num_levels = kNumLevels;
6319 DestroyAndReopen(options);
6320
6321 Random rnd(301);
6322 for (int i = 0; i < kNumL0Files; ++i) {
6323 ASSERT_OK(Put(Key(0), rnd.RandomString(1024)));
6324 ASSERT_OK(Flush());
6325 }
6326 ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files);
6327 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
6328
6329 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6330 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6331 ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files);
6332 }
6333 #endif // ROCKSDB_LITE
6334
6335 TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
6336 const int kNumL0Files = 50;
6337 Options options = CurrentOptions();
6338 options.level0_file_num_compaction_trigger = 4;
6339 // never slowdown / stop
6340 options.level0_slowdown_writes_trigger = 999999;
6341 options.level0_stop_writes_trigger = 999999;
6342 options.max_background_compactions = 10;
6343 DestroyAndReopen(options);
6344
6345 // schedule automatic compactions after the manual one starts, but before it
6346 // finishes to ensure conflict.
6347 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6348 {{"DBImpl::BackgroundCompaction:Start",
6349 "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"},
6350 {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts",
6351 "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
6352 std::atomic<int> callback_count(0);
6353 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6354 "DBImpl::MaybeScheduleFlushOrCompaction:Conflict",
6355 [&](void* /*arg*/) { callback_count.fetch_add(1); });
6356 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6357
6358 Random rnd(301);
6359 for (int i = 0; i < 2; ++i) {
6360 // put two keys to ensure no trivial move
6361 for (int j = 0; j < 2; ++j) {
6362 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
6363 }
6364 ASSERT_OK(Flush());
6365 }
6366 port::Thread manual_compaction_thread([this]() {
6367 CompactRangeOptions croptions;
6368 croptions.exclusive_manual_compaction = true;
6369 ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
6370 });
6371
6372 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts");
6373 for (int i = 0; i < kNumL0Files; ++i) {
6374 // put two keys to ensure no trivial move
6375 for (int j = 0; j < 2; ++j) {
6376 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
6377 }
6378 ASSERT_OK(Flush());
6379 }
6380 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts");
6381
6382 ASSERT_GE(callback_count.load(), 1);
6383 for (int i = 0; i < 2; ++i) {
6384 ASSERT_NE("NOT_FOUND", Get(Key(i)));
6385 }
6386 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6387 manual_compaction_thread.join();
6388 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6389 }
6390
6391 #ifndef ROCKSDB_LITE
6392 TEST_F(DBTest, CompactFilesShouldTriggerAutoCompaction) {
6393 Options options = CurrentOptions();
6394 options.max_background_compactions = 1;
6395 options.level0_file_num_compaction_trigger = 4;
6396 options.level0_slowdown_writes_trigger = 36;
6397 options.level0_stop_writes_trigger = 36;
6398 DestroyAndReopen(options);
6399
6400 // generate files for manual compaction
6401 Random rnd(301);
6402 for (int i = 0; i < 2; ++i) {
6403 // put two keys to ensure no trivial move
6404 for (int j = 0; j < 2; ++j) {
6405 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
6406 }
6407 ASSERT_OK(Flush());
6408 }
6409
6410 ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data;
6411 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
6412
6413 std::vector<std::string> input_files;
6414 input_files.push_back(cf_meta_data.levels[0].files[0].name);
6415
6416 SyncPoint::GetInstance()->LoadDependency({
6417 {"CompactFilesImpl:0",
6418 "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"},
6419 {"DBTest::CompactFilesShouldTriggerAutoCompaction:End",
6420 "CompactFilesImpl:1"},
6421 });
6422
6423 SyncPoint::GetInstance()->EnableProcessing();
6424
6425 port::Thread manual_compaction_thread([&]() {
6426 auto s = db_->CompactFiles(CompactionOptions(), db_->DefaultColumnFamily(),
6427 input_files, 0);
6428 ASSERT_OK(s);
6429 });
6430
6431 TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:Begin");
6432 // generate enough files to trigger compaction
6433 for (int i = 0; i < 20; ++i) {
6434 for (int j = 0; j < 2; ++j) {
6435 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
6436 }
6437 ASSERT_OK(Flush());
6438 }
6439 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
6440 ASSERT_GT(cf_meta_data.levels[0].files.size(),
6441 options.level0_file_num_compaction_trigger);
6442 TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:End");
6443
6444 manual_compaction_thread.join();
6445 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6446
6447 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
6448 ASSERT_LE(cf_meta_data.levels[0].files.size(),
6449 options.level0_file_num_compaction_trigger);
6450 }
6451 #endif // ROCKSDB_LITE
6452
6453 // Github issue #595
6454 // Large write batch with column families
6455 TEST_F(DBTest, LargeBatchWithColumnFamilies) {
6456 Options options = CurrentOptions();
6457 options.env = env_;
6458 options.write_buffer_size = 100000; // Small write buffer
6459 CreateAndReopenWithCF({"pikachu"}, options);
6460 int64_t j = 0;
6461 for (int i = 0; i < 5; i++) {
6462 for (int pass = 1; pass <= 3; pass++) {
6463 WriteBatch batch;
6464 size_t write_size = 1024 * 1024 * (5 + i);
6465 fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n",
6466 (write_size / 1024 / 1024), pass);
6467 for (;;) {
6468 std::string data(3000, j++ % 127 + 20);
6469 data += std::to_string(j);
6470 ASSERT_OK(batch.Put(handles_[0], Slice(data), Slice(data)));
6471 if (batch.GetDataSize() > write_size) {
6472 break;
6473 }
6474 }
6475 fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n",
6476 (batch.GetDataSize() / 1024 / 1024));
6477 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
6478 fprintf(stderr, "done\n");
6479 }
6480 }
6481 // make sure we can re-open it.
6482 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
6483 }
6484
6485 // Make sure that Flushes can proceed in parallel with CompactRange()
6486 TEST_F(DBTest, FlushesInParallelWithCompactRange) {
6487 // iter == 0 -- leveled
6488 // iter == 1 -- leveled, but throw in a flush between two levels compacting
6489 // iter == 2 -- universal
6490 for (int iter = 0; iter < 3; ++iter) {
6491 Options options = CurrentOptions();
6492 if (iter < 2) {
6493 options.compaction_style = kCompactionStyleLevel;
6494 } else {
6495 options.compaction_style = kCompactionStyleUniversal;
6496 }
6497 options.write_buffer_size = 110 << 10;
6498 options.level0_file_num_compaction_trigger = 4;
6499 options.num_levels = 4;
6500 options.compression = kNoCompression;
6501 options.max_bytes_for_level_base = 450 << 10;
6502 options.target_file_size_base = 98 << 10;
6503 options.max_write_buffer_number = 2;
6504
6505 DestroyAndReopen(options);
6506
6507 Random rnd(301);
6508 for (int num = 0; num < 14; num++) {
6509 GenerateNewRandomFile(&rnd);
6510 }
6511
6512 if (iter == 1) {
6513 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6514 {{"DBImpl::RunManualCompaction()::1",
6515 "DBTest::FlushesInParallelWithCompactRange:1"},
6516 {"DBTest::FlushesInParallelWithCompactRange:2",
6517 "DBImpl::RunManualCompaction()::2"}});
6518 } else {
6519 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6520 {{"CompactionJob::Run():Start",
6521 "DBTest::FlushesInParallelWithCompactRange:1"},
6522 {"DBTest::FlushesInParallelWithCompactRange:2",
6523 "CompactionJob::Run():End"}});
6524 }
6525 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6526
6527 std::vector<port::Thread> threads;
6528 threads.emplace_back([&]() { Compact("a", "z"); });
6529
6530 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
6531
6532 // this has to start a flush. if flushes are blocked, this will try to
6533 // create
6534 // 3 memtables, and that will fail because max_write_buffer_number is 2
6535 for (int num = 0; num < 3; num++) {
6536 GenerateNewRandomFile(&rnd, /* nowait */ true);
6537 }
6538
6539 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
6540
6541 for (auto& t : threads) {
6542 t.join();
6543 }
6544 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6545 }
6546 }
6547
6548 TEST_F(DBTest, DelayedWriteRate) {
6549 const int kEntriesPerMemTable = 100;
6550 const int kTotalFlushes = 12;
6551
6552 Options options = CurrentOptions();
6553 env_->SetBackgroundThreads(1, Env::LOW);
6554 options.env = env_;
6555 options.write_buffer_size = 100000000;
6556 options.max_write_buffer_number = 256;
6557 options.max_background_compactions = 1;
6558 options.level0_file_num_compaction_trigger = 3;
6559 options.level0_slowdown_writes_trigger = 3;
6560 options.level0_stop_writes_trigger = 999999;
6561 options.delayed_write_rate = 20000000; // Start with 200MB/s
6562 options.memtable_factory.reset(
6563 test::NewSpecialSkipListFactory(kEntriesPerMemTable));
6564
6565 SetTimeElapseOnlySleepOnReopen(&options);
6566 CreateAndReopenWithCF({"pikachu"}, options);
6567
6568 // Block compactions
6569 test::SleepingBackgroundTask sleeping_task_low;
6570 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6571 Env::Priority::LOW);
6572
6573 for (int i = 0; i < 3; i++) {
6574 ASSERT_OK(Put(Key(i), std::string(10000, 'x')));
6575 ASSERT_OK(Flush());
6576 }
6577
6578 // These writes will be slowed down to 1KB/s
6579 uint64_t estimated_sleep_time = 0;
6580 Random rnd(301);
6581 ASSERT_OK(Put("", ""));
6582 uint64_t cur_rate = options.delayed_write_rate;
6583 for (int i = 0; i < kTotalFlushes; i++) {
6584 uint64_t size_memtable = 0;
6585 for (int j = 0; j < kEntriesPerMemTable; j++) {
6586 auto rand_num = rnd.Uniform(20);
6587 // Spread the size range to more.
6588 size_t entry_size = rand_num * rand_num * rand_num;
6589 WriteOptions wo;
6590 ASSERT_OK(Put(Key(i), std::string(entry_size, 'x'), wo));
6591 size_memtable += entry_size + 18;
6592 // Occasionally sleep a while
6593 if (rnd.Uniform(20) == 6) {
6594 env_->SleepForMicroseconds(2666);
6595 }
6596 }
6597 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6598 estimated_sleep_time += size_memtable * 1000000u / cur_rate;
6599 // Slow down twice. One for memtable switch and one for flush finishes.
6600 cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) *
6601 kIncSlowdownRatio * kIncSlowdownRatio);
6602 }
6603 // Estimate the total sleep time fall into the rough range.
6604 ASSERT_GT(env_->NowMicros(), estimated_sleep_time / 2);
6605 ASSERT_LT(env_->NowMicros(), estimated_sleep_time * 2);
6606
6607 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6608 sleeping_task_low.WakeUp();
6609 sleeping_task_low.WaitUntilDone();
6610 }
6611
6612 TEST_F(DBTest, HardLimit) {
6613 Options options = CurrentOptions();
6614 options.env = env_;
6615 env_->SetBackgroundThreads(1, Env::LOW);
6616 options.max_write_buffer_number = 256;
6617 options.write_buffer_size = 110 << 10; // 110KB
6618 options.arena_block_size = 4 * 1024;
6619 options.level0_file_num_compaction_trigger = 4;
6620 options.level0_slowdown_writes_trigger = 999999;
6621 options.level0_stop_writes_trigger = 999999;
6622 options.hard_pending_compaction_bytes_limit = 800 << 10;
6623 options.max_bytes_for_level_base = 10000000000u;
6624 options.max_background_compactions = 1;
6625 options.memtable_factory.reset(
6626 test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
6627
6628 env_->SetBackgroundThreads(1, Env::LOW);
6629 test::SleepingBackgroundTask sleeping_task_low;
6630 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6631 Env::Priority::LOW);
6632
6633 CreateAndReopenWithCF({"pikachu"}, options);
6634
6635 std::atomic<int> callback_count(0);
6636 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6637 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
6638 callback_count.fetch_add(1);
6639 sleeping_task_low.WakeUp();
6640 });
6641 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6642
6643 Random rnd(301);
6644 int key_idx = 0;
6645 for (int num = 0; num < 5; num++) {
6646 GenerateNewFile(&rnd, &key_idx, true);
6647 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6648 }
6649
6650 ASSERT_EQ(0, callback_count.load());
6651
6652 for (int num = 0; num < 5; num++) {
6653 GenerateNewFile(&rnd, &key_idx, true);
6654 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6655 }
6656 ASSERT_GE(callback_count.load(), 1);
6657
6658 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6659 sleeping_task_low.WaitUntilDone();
6660 }
6661
6662 #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
6663 class WriteStallListener : public EventListener {
6664 public:
6665 WriteStallListener() : condition_(WriteStallCondition::kNormal) {}
6666 void OnStallConditionsChanged(const WriteStallInfo& info) override {
6667 MutexLock l(&mutex_);
6668 condition_ = info.condition.cur;
6669 }
6670 bool CheckCondition(WriteStallCondition expected) {
6671 MutexLock l(&mutex_);
6672 return expected == condition_;
6673 }
6674
6675 private:
6676 port::Mutex mutex_;
6677 WriteStallCondition condition_;
6678 };
6679
6680 TEST_F(DBTest, SoftLimit) {
6681 Options options = CurrentOptions();
6682 options.env = env_;
6683 options.write_buffer_size = 100000; // Small write buffer
6684 options.max_write_buffer_number = 256;
6685 options.level0_file_num_compaction_trigger = 1;
6686 options.level0_slowdown_writes_trigger = 3;
6687 options.level0_stop_writes_trigger = 999999;
6688 options.delayed_write_rate = 20000; // About 200KB/s limited rate
6689 options.soft_pending_compaction_bytes_limit = 160000;
6690 options.target_file_size_base = 99999999; // All into one file
6691 options.max_bytes_for_level_base = 50000;
6692 options.max_bytes_for_level_multiplier = 10;
6693 options.max_background_compactions = 1;
6694 options.compression = kNoCompression;
6695 WriteStallListener* listener = new WriteStallListener();
6696 options.listeners.emplace_back(listener);
6697
6698 // FlushMemtable with opt.wait=true does not wait for
6699 // `OnStallConditionsChanged` being called. The event listener is triggered
6700 // on `JobContext::Clean`, which happens after flush result is installed.
6701 // We use sync point to create a custom WaitForFlush that waits for
6702 // context cleanup.
6703 port::Mutex flush_mutex;
6704 port::CondVar flush_cv(&flush_mutex);
6705 bool flush_finished = false;
6706 auto InstallFlushCallback = [&]() {
6707 {
6708 MutexLock l(&flush_mutex);
6709 flush_finished = false;
6710 }
6711 SyncPoint::GetInstance()->SetCallBack(
6712 "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) {
6713 {
6714 MutexLock l(&flush_mutex);
6715 flush_finished = true;
6716 }
6717 flush_cv.SignalAll();
6718 });
6719 };
6720 auto WaitForFlush = [&]() {
6721 {
6722 MutexLock l(&flush_mutex);
6723 while (!flush_finished) {
6724 flush_cv.Wait();
6725 }
6726 }
6727 SyncPoint::GetInstance()->ClearCallBack(
6728 "DBImpl::BackgroundCallFlush:ContextCleanedUp");
6729 };
6730
6731 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6732
6733 Reopen(options);
6734
6735 // Generating 360KB in Level 3
6736 for (int i = 0; i < 72; i++) {
6737 ASSERT_OK(Put(Key(i), std::string(5000, 'x')));
6738 if (i % 10 == 0) {
6739 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6740 }
6741 }
6742 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6743 MoveFilesToLevel(3);
6744
6745 // Generating 360KB in Level 2
6746 for (int i = 0; i < 72; i++) {
6747 ASSERT_OK(Put(Key(i), std::string(5000, 'x')));
6748 if (i % 10 == 0) {
6749 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6750 }
6751 }
6752 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6753 MoveFilesToLevel(2);
6754
6755 ASSERT_OK(Put(Key(0), ""));
6756
6757 test::SleepingBackgroundTask sleeping_task_low;
6758 // Block compactions
6759 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6760 Env::Priority::LOW);
6761 sleeping_task_low.WaitUntilSleeping();
6762
6763 // Create 3 L0 files, making score of L0 to be 3.
6764 for (int i = 0; i < 3; i++) {
6765 ASSERT_OK(Put(Key(i), std::string(5000, 'x')));
6766 ASSERT_OK(Put(Key(100 - i), std::string(5000, 'x')));
6767 // Flush the file. File size is around 30KB.
6768 InstallFlushCallback();
6769 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6770 WaitForFlush();
6771 }
6772 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6773 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
6774
6775 sleeping_task_low.WakeUp();
6776 sleeping_task_low.WaitUntilDone();
6777 sleeping_task_low.Reset();
6778 ASSERT_OK(dbfull()->TEST_WaitForCompact());
6779
6780 // Now there is one L1 file but doesn't trigger soft_rate_limit
6781 //
6782 // TODO: soft_rate_limit is depreciated. If this test
6783 // relies on soft_rate_limit, then we need to change the test.
6784 //
6785 // The L1 file size is around 30KB.
6786 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6787 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6788 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
6789
6790 // Only allow one compactin going through.
6791 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6792 "BackgroundCallCompaction:0", [&](void* /*arg*/) {
6793 // Schedule a sleeping task.
6794 sleeping_task_low.Reset();
6795 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6796 &sleeping_task_low, Env::Priority::LOW);
6797 });
6798
6799 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6800 Env::Priority::LOW);
6801 sleeping_task_low.WaitUntilSleeping();
6802 // Create 3 L0 files, making score of L0 to be 3
6803 for (int i = 0; i < 3; i++) {
6804 ASSERT_OK(Put(Key(10 + i), std::string(5000, 'x')));
6805 ASSERT_OK(Put(Key(90 - i), std::string(5000, 'x')));
6806 // Flush the file. File size is around 30KB.
6807 InstallFlushCallback();
6808 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6809 WaitForFlush();
6810 }
6811
6812 // Wake up sleep task to enable compaction to run and waits
6813 // for it to go to sleep state again to make sure one compaction
6814 // goes through.
6815 sleeping_task_low.WakeUp();
6816 sleeping_task_low.WaitUntilSleeping();
6817
6818 // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
6819 // Given level multiplier 10, estimated pending compaction is around 100KB
6820 // doesn't trigger soft_pending_compaction_bytes_limit
6821 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6822 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6823 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
6824
6825 // Create 3 L0 files, making score of L0 to be 3, higher than L0.
6826 for (int i = 0; i < 3; i++) {
6827 ASSERT_OK(Put(Key(20 + i), std::string(5000, 'x')));
6828 ASSERT_OK(Put(Key(80 - i), std::string(5000, 'x')));
6829 // Flush the file. File size is around 30KB.
6830 InstallFlushCallback();
6831 ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true));
6832 WaitForFlush();
6833 }
6834 // Wake up sleep task to enable compaction to run and waits
6835 // for it to go to sleep state again to make sure one compaction
6836 // goes through.
6837 sleeping_task_low.WakeUp();
6838 sleeping_task_low.WaitUntilSleeping();
6839
6840 // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
6841 // L2 size is 360KB, so the estimated level fanout 4, estimated pending
6842 // compaction is around 200KB
6843 // triggerring soft_pending_compaction_bytes_limit
6844 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6845 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6846 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
6847
6848 sleeping_task_low.WakeUp();
6849 sleeping_task_low.WaitUntilSleeping();
6850
6851 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6852 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
6853
6854 // shrink level base so L2 will hit soft limit easier.
6855 ASSERT_OK(dbfull()->SetOptions({
6856 {"max_bytes_for_level_base", "5000"},
6857 }));
6858
6859 ASSERT_OK(Put("", ""));
6860 ASSERT_OK(Flush());
6861 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6862 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
6863
6864 sleeping_task_low.WaitUntilSleeping();
6865 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6866 sleeping_task_low.WakeUp();
6867 sleeping_task_low.WaitUntilDone();
6868 }
6869
6870 TEST_F(DBTest, LastWriteBufferDelay) {
6871 Options options = CurrentOptions();
6872 options.env = env_;
6873 options.write_buffer_size = 100000;
6874 options.max_write_buffer_number = 4;
6875 options.delayed_write_rate = 20000;
6876 options.compression = kNoCompression;
6877 options.disable_auto_compactions = true;
6878 int kNumKeysPerMemtable = 3;
6879 options.memtable_factory.reset(
6880 test::NewSpecialSkipListFactory(kNumKeysPerMemtable));
6881
6882 Reopen(options);
6883 test::SleepingBackgroundTask sleeping_task;
6884 // Block flushes
6885 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
6886 Env::Priority::HIGH);
6887 sleeping_task.WaitUntilSleeping();
6888
6889 // Create 3 L0 files, making score of L0 to be 3.
6890 for (int i = 0; i < 3; i++) {
6891 // Fill one mem table
6892 for (int j = 0; j < kNumKeysPerMemtable; j++) {
6893 ASSERT_OK(Put(Key(j), ""));
6894 }
6895 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6896 }
6897 // Inserting a new entry would create a new mem table, triggering slow down.
6898 ASSERT_OK(Put(Key(0), ""));
6899 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6900
6901 sleeping_task.WakeUp();
6902 sleeping_task.WaitUntilDone();
6903 }
6904 #endif // !defined(ROCKSDB_LITE) &&
6905 // !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
6906
6907 TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
6908 CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
6909 kLZ4Compression, kLZ4HCCompression,
6910 kXpressCompression};
6911 for (auto comp : compressions) {
6912 if (!CompressionTypeSupported(comp)) {
6913 // not supported, we should fail the Open()
6914 Options options = CurrentOptions();
6915 options.compression = comp;
6916 ASSERT_TRUE(!TryReopen(options).ok());
6917 // Try if CreateColumnFamily also fails
6918 options.compression = kNoCompression;
6919 ASSERT_OK(TryReopen(options));
6920 ColumnFamilyOptions cf_options(options);
6921 cf_options.compression = comp;
6922 ColumnFamilyHandle* handle;
6923 ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
6924 }
6925 }
6926 }
6927
6928 TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) {
6929 Options options = CurrentOptions();
6930 options.max_open_files = 100;
6931 Reopen(options);
6932
6933 ColumnFamilyOptions cf_options(options);
6934 // ttl is now supported when max_open_files is -1.
6935 cf_options.ttl = 3600;
6936 ColumnFamilyHandle* handle;
6937 ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle));
6938 delete handle;
6939 }
6940
6941 #ifndef ROCKSDB_LITE
6942 TEST_F(DBTest, RowCache) {
6943 Options options = CurrentOptions();
6944 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
6945 options.row_cache = NewLRUCache(8192);
6946 DestroyAndReopen(options);
6947
6948 ASSERT_OK(Put("foo", "bar"));
6949 ASSERT_OK(Flush());
6950
6951 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
6952 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
6953 ASSERT_EQ(Get("foo"), "bar");
6954 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
6955 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
6956 ASSERT_EQ(Get("foo"), "bar");
6957 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
6958 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
6959 }
6960
6961 TEST_F(DBTest, PinnableSliceAndRowCache) {
6962 Options options = CurrentOptions();
6963 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
6964 options.row_cache = NewLRUCache(8192);
6965 DestroyAndReopen(options);
6966
6967 ASSERT_OK(Put("foo", "bar"));
6968 ASSERT_OK(Flush());
6969
6970 ASSERT_EQ(Get("foo"), "bar");
6971 ASSERT_EQ(
6972 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6973 1);
6974
6975 {
6976 PinnableSlice pin_slice;
6977 ASSERT_EQ(Get("foo", &pin_slice), Status::OK());
6978 ASSERT_EQ(pin_slice.ToString(), "bar");
6979 // Entry is already in cache, lookup will remove the element from lru
6980 ASSERT_EQ(
6981 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6982 0);
6983 }
6984 // After PinnableSlice destruction element is added back in LRU
6985 ASSERT_EQ(
6986 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6987 1);
6988 }
6989
6990 TEST_F(DBTest, ReusePinnableSlice) {
6991 Options options = CurrentOptions();
6992 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
6993 options.row_cache = NewLRUCache(8192);
6994 DestroyAndReopen(options);
6995
6996 ASSERT_OK(Put("foo", "bar"));
6997 ASSERT_OK(Flush());
6998
6999 ASSERT_EQ(Get("foo"), "bar");
7000 ASSERT_EQ(
7001 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7002 1);
7003
7004 {
7005 PinnableSlice pin_slice;
7006 ASSERT_EQ(Get("foo", &pin_slice), Status::OK());
7007 ASSERT_EQ(Get("foo", &pin_slice), Status::OK());
7008 ASSERT_EQ(pin_slice.ToString(), "bar");
7009
7010 // Entry is already in cache, lookup will remove the element from lru
7011 ASSERT_EQ(
7012 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7013 0);
7014 }
7015 // After PinnableSlice destruction element is added back in LRU
7016 ASSERT_EQ(
7017 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7018 1);
7019
7020 {
7021 std::vector<Slice> multiget_keys;
7022 multiget_keys.push_back("foo");
7023 std::vector<PinnableSlice> multiget_values(1);
7024 std::vector<Status> statuses({Status::NotFound()});
7025 ReadOptions ropt;
7026 dbfull()->MultiGet(ropt, dbfull()->DefaultColumnFamily(),
7027 multiget_keys.size(), multiget_keys.data(),
7028 multiget_values.data(), statuses.data());
7029 ASSERT_EQ(Status::OK(), statuses[0]);
7030 dbfull()->MultiGet(ropt, dbfull()->DefaultColumnFamily(),
7031 multiget_keys.size(), multiget_keys.data(),
7032 multiget_values.data(), statuses.data());
7033 ASSERT_EQ(Status::OK(), statuses[0]);
7034
7035 // Entry is already in cache, lookup will remove the element from lru
7036 ASSERT_EQ(
7037 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7038 0);
7039 }
7040 // After PinnableSlice destruction element is added back in LRU
7041 ASSERT_EQ(
7042 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7043 1);
7044
7045 {
7046 std::vector<ColumnFamilyHandle*> multiget_cfs;
7047 multiget_cfs.push_back(dbfull()->DefaultColumnFamily());
7048 std::vector<Slice> multiget_keys;
7049 multiget_keys.push_back("foo");
7050 std::vector<PinnableSlice> multiget_values(1);
7051 std::vector<Status> statuses({Status::NotFound()});
7052 ReadOptions ropt;
7053 dbfull()->MultiGet(ropt, multiget_keys.size(), multiget_cfs.data(),
7054 multiget_keys.data(), multiget_values.data(),
7055 statuses.data());
7056 ASSERT_EQ(Status::OK(), statuses[0]);
7057 dbfull()->MultiGet(ropt, multiget_keys.size(), multiget_cfs.data(),
7058 multiget_keys.data(), multiget_values.data(),
7059 statuses.data());
7060 ASSERT_EQ(Status::OK(), statuses[0]);
7061
7062 // Entry is already in cache, lookup will remove the element from lru
7063 ASSERT_EQ(
7064 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7065 0);
7066 }
7067 // After PinnableSlice destruction element is added back in LRU
7068 ASSERT_EQ(
7069 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
7070 1);
7071 }
7072
7073 #endif // ROCKSDB_LITE
7074
7075 TEST_F(DBTest, DeletingOldWalAfterDrop) {
7076 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7077 {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"},
7078 {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}});
7079 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
7080
7081 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7082 Options options = CurrentOptions();
7083 options.max_total_wal_size = 8192;
7084 options.compression = kNoCompression;
7085 options.write_buffer_size = 1 << 20;
7086 options.level0_file_num_compaction_trigger = (1 << 30);
7087 options.level0_slowdown_writes_trigger = (1 << 30);
7088 options.level0_stop_writes_trigger = (1 << 30);
7089 options.disable_auto_compactions = true;
7090 DestroyAndReopen(options);
7091 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7092
7093 CreateColumnFamilies({"cf1", "cf2"}, options);
7094 ASSERT_OK(Put(0, "key1", DummyString(8192)));
7095 ASSERT_OK(Put(0, "key2", DummyString(8192)));
7096 // the oldest wal should now be getting_flushed
7097 ASSERT_OK(db_->DropColumnFamily(handles_[0]));
7098 // all flushes should now do nothing because their CF is dropped
7099 TEST_SYNC_POINT("Test:AllowFlushes");
7100 TEST_SYNC_POINT("Test:WaitForFlush");
7101 uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
7102 ASSERT_OK(Put(1, "key3", DummyString(8192)));
7103 ASSERT_OK(Put(1, "key4", DummyString(8192)));
7104 // new wal should have been created
7105 uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
7106 EXPECT_GT(lognum2, lognum1);
7107 }
7108
7109 TEST_F(DBTest, UnsupportedManualSync) {
7110 DestroyAndReopen(CurrentOptions());
7111 env_->is_wal_sync_thread_safe_.store(false);
7112 Status s = db_->SyncWAL();
7113 ASSERT_TRUE(s.IsNotSupported());
7114 }
7115
7116 INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
7117 ::testing::Combine(::testing::Values(1, 4),
7118 ::testing::Bool()));
7119
7120 TEST_F(DBTest, PauseBackgroundWorkTest) {
7121 Options options = CurrentOptions();
7122 options.write_buffer_size = 100000; // Small write buffer
7123 Reopen(options);
7124
7125 std::vector<port::Thread> threads;
7126 std::atomic<bool> done(false);
7127 ASSERT_OK(db_->PauseBackgroundWork());
7128 threads.emplace_back([&]() {
7129 Random rnd(301);
7130 for (int i = 0; i < 10000; ++i) {
7131 ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
7132 }
7133 done.store(true);
7134 });
7135 env_->SleepForMicroseconds(200000);
7136 // make sure the thread is not done
7137 ASSERT_FALSE(done.load());
7138 ASSERT_OK(db_->ContinueBackgroundWork());
7139 for (auto& t : threads) {
7140 t.join();
7141 }
7142 // now it's done
7143 ASSERT_TRUE(done.load());
7144 }
7145
7146 // Keep spawning short-living threads that create an iterator and quit.
7147 // Meanwhile in another thread keep flushing memtables.
7148 // This used to cause a deadlock.
7149 TEST_F(DBTest, ThreadLocalPtrDeadlock) {
7150 std::atomic<int> flushes_done{0};
7151 std::atomic<int> threads_destroyed{0};
7152 auto done = [&] { return flushes_done.load() > 10; };
7153
7154 port::Thread flushing_thread([&] {
7155 for (int i = 0; !done(); ++i) {
7156 ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"),
7157 Slice(std::to_string(i).c_str())));
7158 ASSERT_OK(db_->Flush(FlushOptions()));
7159 int cnt = ++flushes_done;
7160 fprintf(stderr, "Flushed %d times\n", cnt);
7161 }
7162 });
7163
7164 std::vector<port::Thread> thread_spawning_threads(10);
7165 for (auto& t : thread_spawning_threads) {
7166 t = port::Thread([&] {
7167 while (!done()) {
7168 {
7169 port::Thread tmp_thread([&] {
7170 auto it = db_->NewIterator(ReadOptions());
7171 ASSERT_OK(it->status());
7172 delete it;
7173 });
7174 tmp_thread.join();
7175 }
7176 ++threads_destroyed;
7177 }
7178 });
7179 }
7180
7181 for (auto& t : thread_spawning_threads) {
7182 t.join();
7183 }
7184 flushing_thread.join();
7185 fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n",
7186 flushes_done.load(), threads_destroyed.load());
7187 }
7188
7189 TEST_F(DBTest, LargeBlockSizeTest) {
7190 Options options = CurrentOptions();
7191 CreateAndReopenWithCF({"pikachu"}, options);
7192 ASSERT_OK(Put(0, "foo", "bar"));
7193 BlockBasedTableOptions table_options;
7194 table_options.block_size = 8LL * 1024 * 1024 * 1024LL;
7195 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7196 ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
7197 }
7198
7199 #ifndef ROCKSDB_LITE
7200
7201 TEST_F(DBTest, CreationTimeOfOldestFile) {
7202 const int kNumKeysPerFile = 32;
7203 const int kNumLevelFiles = 2;
7204 const int kValueSize = 100;
7205
7206 Options options = CurrentOptions();
7207 options.max_open_files = -1;
7208 env_->SetMockSleep();
7209 options.env = env_;
7210
7211 // NOTE: Presumed unnecessary and removed: resetting mock time in env
7212
7213 DestroyAndReopen(options);
7214
7215 bool set_file_creation_time_to_zero = true;
7216 int idx = 0;
7217
7218 int64_t time_1 = 0;
7219 env_->GetCurrentTime(&time_1);
7220 const uint64_t uint_time_1 = static_cast<uint64_t>(time_1);
7221
7222 // Add 50 hours
7223 env_->MockSleepForSeconds(50 * 60 * 60);
7224
7225 int64_t time_2 = 0;
7226 env_->GetCurrentTime(&time_2);
7227 const uint64_t uint_time_2 = static_cast<uint64_t>(time_2);
7228
7229 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7230 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
7231 TableProperties* props = reinterpret_cast<TableProperties*>(arg);
7232 if (set_file_creation_time_to_zero) {
7233 if (idx == 0) {
7234 props->file_creation_time = 0;
7235 idx++;
7236 } else if (idx == 1) {
7237 props->file_creation_time = uint_time_1;
7238 idx = 0;
7239 }
7240 } else {
7241 if (idx == 0) {
7242 props->file_creation_time = uint_time_1;
7243 idx++;
7244 } else if (idx == 1) {
7245 props->file_creation_time = uint_time_2;
7246 }
7247 }
7248 });
7249 // Set file creation time in manifest all to 0.
7250 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7251 "FileMetaData::FileMetaData", [&](void* arg) {
7252 FileMetaData* meta = static_cast<FileMetaData*>(arg);
7253 meta->file_creation_time = 0;
7254 });
7255 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7256
7257 Random rnd(301);
7258 for (int i = 0; i < kNumLevelFiles; ++i) {
7259 for (int j = 0; j < kNumKeysPerFile; ++j) {
7260 ASSERT_OK(
7261 Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
7262 }
7263 ASSERT_OK(Flush());
7264 }
7265
7266 // At this point there should be 2 files, one with file_creation_time = 0 and
7267 // the other non-zero. GetCreationTimeOfOldestFile API should return 0.
7268 uint64_t creation_time;
7269 Status s1 = dbfull()->GetCreationTimeOfOldestFile(&creation_time);
7270 ASSERT_EQ(0, creation_time);
7271 ASSERT_EQ(s1, Status::OK());
7272
7273 // Testing with non-zero file creation time.
7274 set_file_creation_time_to_zero = false;
7275 options = CurrentOptions();
7276 options.max_open_files = -1;
7277 options.env = env_;
7278
7279 // NOTE: Presumed unnecessary and removed: resetting mock time in env
7280
7281 DestroyAndReopen(options);
7282
7283 for (int i = 0; i < kNumLevelFiles; ++i) {
7284 for (int j = 0; j < kNumKeysPerFile; ++j) {
7285 ASSERT_OK(
7286 Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
7287 }
7288 ASSERT_OK(Flush());
7289 }
7290
7291 // At this point there should be 2 files with non-zero file creation time.
7292 // GetCreationTimeOfOldestFile API should return non-zero value.
7293 uint64_t ctime;
7294 Status s2 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
7295 ASSERT_EQ(uint_time_1, ctime);
7296 ASSERT_EQ(s2, Status::OK());
7297
7298 // Testing with max_open_files != -1
7299 options = CurrentOptions();
7300 options.max_open_files = 10;
7301 DestroyAndReopen(options);
7302 Status s3 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
7303 ASSERT_EQ(s3, Status::NotSupported());
7304
7305 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7306 }
7307
7308 TEST_F(DBTest, MemoryUsageWithMaxWriteBufferSizeToMaintain) {
7309 Options options = CurrentOptions();
7310 options.max_write_buffer_size_to_maintain = 10000;
7311 options.write_buffer_size = 160000;
7312 Reopen(options);
7313 Random rnd(301);
7314 bool memory_limit_exceeded = false;
7315
7316 ColumnFamilyData* cfd =
7317 static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
7318
7319 for (int i = 0; i < 1000; i++) {
7320 std::string value = rnd.RandomString(1000);
7321 ASSERT_OK(Put("keykey_" + std::to_string(i), value));
7322
7323 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
7324
7325 const uint64_t cur_active_mem = cfd->mem()->ApproximateMemoryUsage();
7326 const uint64_t size_all_mem_table =
7327 cur_active_mem + cfd->imm()->ApproximateMemoryUsage();
7328
7329 // Errors out if memory usage keeps on increasing beyond the limit.
7330 // Once memory limit exceeds, memory_limit_exceeded is set and if
7331 // size_all_mem_table doesn't drop out in the next write then it errors out
7332 // (not expected behaviour). If memory usage drops then
7333 // memory_limit_exceeded is set to false.
7334 if ((size_all_mem_table > cur_active_mem) &&
7335 (cur_active_mem >=
7336 static_cast<uint64_t>(options.max_write_buffer_size_to_maintain)) &&
7337 (size_all_mem_table >
7338 static_cast<uint64_t>(options.max_write_buffer_size_to_maintain) +
7339 options.write_buffer_size)) {
7340 ASSERT_FALSE(memory_limit_exceeded);
7341 memory_limit_exceeded = true;
7342 } else {
7343 memory_limit_exceeded = false;
7344 }
7345 }
7346 }
7347
7348 TEST_F(DBTest, ShuttingDownNotBlockStalledWrites) {
7349 Options options = CurrentOptions();
7350 options.disable_auto_compactions = true;
7351 Reopen(options);
7352 Random rnd(403);
7353
7354 for (int i = 0; i < 20; i++) {
7355 ASSERT_OK(Put("key_" + std::to_string(i), rnd.RandomString(10)));
7356 ASSERT_OK(Flush());
7357 }
7358 ASSERT_EQ(GetSstFileCount(dbname_), 20);
7359
7360 // We need !disable_auto_compactions for writes to stall but also want to
7361 // delay compaction so stalled writes unblocked due to kShutdownInProgress. BG
7362 // compaction will first wait for the sync point
7363 // DBTest::ShuttingDownNotBlockStalledWrites. Then it waits extra 2 sec to
7364 // allow CancelAllBackgroundWork() to set shutting_down_.
7365 SyncPoint::GetInstance()->SetCallBack(
7366 "BackgroundCallCompaction:0",
7367 [&](void* /* arg */) { env_->SleepForMicroseconds(2 * 1000 * 1000); });
7368 SyncPoint::GetInstance()->LoadDependency(
7369 {{"DBImpl::DelayWrite:Wait", "DBTest::ShuttingDownNotBlockStalledWrites"},
7370 {"DBTest::ShuttingDownNotBlockStalledWrites",
7371 "BackgroundCallCompaction:0"}});
7372 SyncPoint::GetInstance()->EnableProcessing();
7373
7374 options.level0_stop_writes_trigger = 20;
7375 options.disable_auto_compactions = false;
7376 Reopen(options);
7377
7378 std::thread thd([&]() {
7379 Status s = Put("key_" + std::to_string(101), "101");
7380 ASSERT_EQ(s.code(), Status::kShutdownInProgress);
7381 });
7382
7383 TEST_SYNC_POINT("DBTest::ShuttingDownNotBlockStalledWrites");
7384 CancelAllBackgroundWork(db_, true);
7385
7386 thd.join();
7387 }
7388 #endif
7389
7390 } // namespace ROCKSDB_NAMESPACE
7391
7392 int main(int argc, char** argv) {
7393 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7394 ::testing::InitGoogleTest(&argc, argv);
7395 RegisterCustomObjects(argc, argv);
7396 return RUN_ALL_TESTS();
7397 }