]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / db_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10// Introduction of SyncPoint effectively disabled building and running this test
11// in Release build.
12// which is a pity, it is a good test
13#include <fcntl.h>
14#include <algorithm>
15#include <set>
16#include <thread>
17#include <unordered_set>
18#include <utility>
19#ifndef OS_WIN
20#include <unistd.h>
21#endif
22#ifdef OS_SOLARIS
23#include <alloca.h>
24#endif
25
11fdf7f2 26#include "cache/lru_cache.h"
20effc67 27#include "db/blob/blob_index.h"
f67539c2 28#include "db/db_impl/db_impl.h"
7c673cae
FG
29#include "db/db_test_util.h"
30#include "db/dbformat.h"
31#include "db/job_context.h"
32#include "db/version_set.h"
33#include "db/write_batch_internal.h"
34#include "env/mock_env.h"
f67539c2 35#include "file/filename.h"
7c673cae
FG
36#include "memtable/hash_linklist_rep.h"
37#include "monitoring/thread_status_util.h"
38#include "port/port.h"
39#include "port/stack_trace.h"
40#include "rocksdb/cache.h"
41#include "rocksdb/compaction_filter.h"
42#include "rocksdb/convenience.h"
43#include "rocksdb/db.h"
44#include "rocksdb/env.h"
45#include "rocksdb/experimental.h"
46#include "rocksdb/filter_policy.h"
47#include "rocksdb/options.h"
48#include "rocksdb/perf_context.h"
49#include "rocksdb/slice.h"
50#include "rocksdb/slice_transform.h"
51#include "rocksdb/snapshot.h"
52#include "rocksdb/table.h"
53#include "rocksdb/table_properties.h"
54#include "rocksdb/thread_status.h"
55#include "rocksdb/utilities/checkpoint.h"
56#include "rocksdb/utilities/optimistic_transaction_db.h"
57#include "rocksdb/utilities/write_batch_with_index.h"
7c673cae 58#include "table/mock_table.h"
7c673cae 59#include "table/scoped_arena_iterator.h"
f67539c2
TL
60#include "test_util/sync_point.h"
61#include "test_util/testharness.h"
62#include "test_util/testutil.h"
7c673cae 63#include "util/compression.h"
7c673cae 64#include "util/mutexlock.h"
20effc67 65#include "util/random.h"
7c673cae
FG
66#include "util/rate_limiter.h"
67#include "util/string_util.h"
7c673cae
FG
68#include "utilities/merge_operators.h"
69
f67539c2 70namespace ROCKSDB_NAMESPACE {
7c673cae 71
20effc67
TL
72// Note that whole DBTest and its child classes disable fsync on files
73// and directories for speed.
74// If fsync needs to be covered in a test, put it in other places.
7c673cae
FG
75class DBTest : public DBTestBase {
76 public:
20effc67 77 DBTest() : DBTestBase("/db_test", /*env_do_fsync=*/false) {}
7c673cae
FG
78};
79
80class DBTestWithParam
81 : public DBTest,
82 public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
83 public:
84 DBTestWithParam() {
85 max_subcompactions_ = std::get<0>(GetParam());
86 exclusive_manual_compaction_ = std::get<1>(GetParam());
87 }
88
89 // Required if inheriting from testing::WithParamInterface<>
90 static void SetUpTestCase() {}
91 static void TearDownTestCase() {}
92
93 uint32_t max_subcompactions_;
94 bool exclusive_manual_compaction_;
95};
96
97TEST_F(DBTest, MockEnvTest) {
494da23a 98 std::unique_ptr<MockEnv> env{new MockEnv(Env::Default())};
7c673cae
FG
99 Options options;
100 options.create_if_missing = true;
101 options.env = env.get();
102 DB* db;
103
104 const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
105 const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
106
107 ASSERT_OK(DB::Open(options, "/dir/db", &db));
108 for (size_t i = 0; i < 3; ++i) {
109 ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
110 }
111
112 for (size_t i = 0; i < 3; ++i) {
113 std::string res;
114 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
115 ASSERT_TRUE(res == vals[i]);
116 }
117
118 Iterator* iterator = db->NewIterator(ReadOptions());
119 iterator->SeekToFirst();
120 for (size_t i = 0; i < 3; ++i) {
121 ASSERT_TRUE(iterator->Valid());
122 ASSERT_TRUE(keys[i] == iterator->key());
123 ASSERT_TRUE(vals[i] == iterator->value());
124 iterator->Next();
125 }
126 ASSERT_TRUE(!iterator->Valid());
127 delete iterator;
128
129// TEST_FlushMemTable() is not supported in ROCKSDB_LITE
130#ifndef ROCKSDB_LITE
20effc67 131 DBImpl* dbi = static_cast_with_check<DBImpl>(db);
7c673cae
FG
132 ASSERT_OK(dbi->TEST_FlushMemTable());
133
134 for (size_t i = 0; i < 3; ++i) {
135 std::string res;
136 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
137 ASSERT_TRUE(res == vals[i]);
138 }
139#endif // ROCKSDB_LITE
140
141 delete db;
142}
143
144// NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
145// defined.
146#ifndef ROCKSDB_LITE
147TEST_F(DBTest, MemEnvTest) {
494da23a 148 std::unique_ptr<Env> env{NewMemEnv(Env::Default())};
7c673cae
FG
149 Options options;
150 options.create_if_missing = true;
151 options.env = env.get();
152 DB* db;
153
154 const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
155 const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
156
157 ASSERT_OK(DB::Open(options, "/dir/db", &db));
158 for (size_t i = 0; i < 3; ++i) {
159 ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
160 }
161
162 for (size_t i = 0; i < 3; ++i) {
163 std::string res;
164 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
165 ASSERT_TRUE(res == vals[i]);
166 }
167
168 Iterator* iterator = db->NewIterator(ReadOptions());
169 iterator->SeekToFirst();
170 for (size_t i = 0; i < 3; ++i) {
171 ASSERT_TRUE(iterator->Valid());
172 ASSERT_TRUE(keys[i] == iterator->key());
173 ASSERT_TRUE(vals[i] == iterator->value());
174 iterator->Next();
175 }
176 ASSERT_TRUE(!iterator->Valid());
177 delete iterator;
178
20effc67 179 DBImpl* dbi = static_cast_with_check<DBImpl>(db);
7c673cae
FG
180 ASSERT_OK(dbi->TEST_FlushMemTable());
181
182 for (size_t i = 0; i < 3; ++i) {
183 std::string res;
184 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
185 ASSERT_TRUE(res == vals[i]);
186 }
187
188 delete db;
189
190 options.create_if_missing = false;
191 ASSERT_OK(DB::Open(options, "/dir/db", &db));
192 for (size_t i = 0; i < 3; ++i) {
193 std::string res;
194 ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
195 ASSERT_TRUE(res == vals[i]);
196 }
197 delete db;
198}
199#endif // ROCKSDB_LITE
200
201TEST_F(DBTest, WriteEmptyBatch) {
202 Options options = CurrentOptions();
203 options.env = env_;
204 options.write_buffer_size = 100000;
205 CreateAndReopenWithCF({"pikachu"}, options);
206
207 ASSERT_OK(Put(1, "foo", "bar"));
208 WriteOptions wo;
209 wo.sync = true;
210 wo.disableWAL = false;
211 WriteBatch empty_batch;
212 ASSERT_OK(dbfull()->Write(wo, &empty_batch));
213
214 // make sure we can re-open it.
215 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
216 ASSERT_EQ("bar", Get(1, "foo"));
217}
218
219TEST_F(DBTest, SkipDelay) {
220 Options options = CurrentOptions();
221 options.env = env_;
222 options.write_buffer_size = 100000;
223 CreateAndReopenWithCF({"pikachu"}, options);
224
225 for (bool sync : {true, false}) {
226 for (bool disableWAL : {true, false}) {
11fdf7f2
TL
227 if (sync && disableWAL) {
228 // sync and disableWAL is incompatible.
229 continue;
230 }
7c673cae
FG
231 // Use a small number to ensure a large delay that is still effective
232 // when we do Put
233 // TODO(myabandeh): this is time dependent and could potentially make
234 // the test flaky
235 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
236 std::atomic<int> sleep_count(0);
f67539c2 237 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 238 "DBImpl::DelayWrite:Sleep",
11fdf7f2 239 [&](void* /*arg*/) { sleep_count.fetch_add(1); });
7c673cae 240 std::atomic<int> wait_count(0);
f67539c2 241 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 242 "DBImpl::DelayWrite:Wait",
11fdf7f2 243 [&](void* /*arg*/) { wait_count.fetch_add(1); });
f67539c2 244 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
245
246 WriteOptions wo;
247 wo.sync = sync;
248 wo.disableWAL = disableWAL;
249 wo.no_slowdown = true;
250 dbfull()->Put(wo, "foo", "bar");
251 // We need the 2nd write to trigger delay. This is because delay is
252 // estimated based on the last write size which is 0 for the first write.
253 ASSERT_NOK(dbfull()->Put(wo, "foo2", "bar2"));
254 ASSERT_GE(sleep_count.load(), 0);
255 ASSERT_GE(wait_count.load(), 0);
256 token.reset();
257
258 token = dbfull()->TEST_write_controler().GetDelayToken(1000000000);
259 wo.no_slowdown = false;
260 ASSERT_OK(dbfull()->Put(wo, "foo3", "bar3"));
261 ASSERT_GE(sleep_count.load(), 1);
262 token.reset();
263 }
264 }
265}
266
11fdf7f2
TL
267TEST_F(DBTest, MixedSlowdownOptions) {
268 Options options = CurrentOptions();
269 options.env = env_;
270 options.write_buffer_size = 100000;
271 CreateAndReopenWithCF({"pikachu"}, options);
272 std::vector<port::Thread> threads;
273 std::atomic<int> thread_num(0);
274
275 std::function<void()> write_slowdown_func = [&]() {
276 int a = thread_num.fetch_add(1);
277 std::string key = "foo" + std::to_string(a);
278 WriteOptions wo;
279 wo.no_slowdown = false;
280 ASSERT_OK(dbfull()->Put(wo, key, "bar"));
281 };
282 std::function<void()> write_no_slowdown_func = [&]() {
283 int a = thread_num.fetch_add(1);
284 std::string key = "foo" + std::to_string(a);
285 WriteOptions wo;
286 wo.no_slowdown = true;
287 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
288 };
289 // Use a small number to ensure a large delay that is still effective
290 // when we do Put
291 // TODO(myabandeh): this is time dependent and could potentially make
292 // the test flaky
293 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
294 std::atomic<int> sleep_count(0);
f67539c2
TL
295 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
296 "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) {
11fdf7f2
TL
297 sleep_count.fetch_add(1);
298 if (threads.empty()) {
299 for (int i = 0; i < 2; ++i) {
300 threads.emplace_back(write_slowdown_func);
301 }
302 for (int i = 0; i < 2; ++i) {
303 threads.emplace_back(write_no_slowdown_func);
304 }
305 }
306 });
f67539c2 307 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2
TL
308
309 WriteOptions wo;
310 wo.sync = false;
311 wo.disableWAL = false;
312 wo.no_slowdown = false;
313 dbfull()->Put(wo, "foo", "bar");
314 // We need the 2nd write to trigger delay. This is because delay is
315 // estimated based on the last write size which is 0 for the first write.
316 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
317 token.reset();
318
319 for (auto& t : threads) {
320 t.join();
321 }
322 ASSERT_GE(sleep_count.load(), 1);
323
324 wo.no_slowdown = true;
325 ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
326}
327
328TEST_F(DBTest, MixedSlowdownOptionsInQueue) {
329 Options options = CurrentOptions();
330 options.env = env_;
331 options.write_buffer_size = 100000;
332 CreateAndReopenWithCF({"pikachu"}, options);
333 std::vector<port::Thread> threads;
334 std::atomic<int> thread_num(0);
335
336 std::function<void()> write_no_slowdown_func = [&]() {
337 int a = thread_num.fetch_add(1);
338 std::string key = "foo" + std::to_string(a);
339 WriteOptions wo;
340 wo.no_slowdown = true;
341 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
342 };
343 // Use a small number to ensure a large delay that is still effective
344 // when we do Put
345 // TODO(myabandeh): this is time dependent and could potentially make
346 // the test flaky
347 auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
348 std::atomic<int> sleep_count(0);
f67539c2
TL
349 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
350 "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) {
11fdf7f2
TL
351 sleep_count.fetch_add(1);
352 if (threads.empty()) {
353 for (int i = 0; i < 2; ++i) {
354 threads.emplace_back(write_no_slowdown_func);
355 }
356 // Sleep for 2s to allow the threads to insert themselves into the
357 // write queue
358 env_->SleepForMicroseconds(3000000ULL);
359 }
360 });
361 std::atomic<int> wait_count(0);
f67539c2 362 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
363 "DBImpl::DelayWrite:Wait",
364 [&](void* /*arg*/) { wait_count.fetch_add(1); });
f67539c2 365 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2
TL
366
367 WriteOptions wo;
368 wo.sync = false;
369 wo.disableWAL = false;
370 wo.no_slowdown = false;
371 dbfull()->Put(wo, "foo", "bar");
372 // We need the 2nd write to trigger delay. This is because delay is
373 // estimated based on the last write size which is 0 for the first write.
374 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
375 token.reset();
376
377 for (auto& t : threads) {
378 t.join();
379 }
380 ASSERT_EQ(sleep_count.load(), 1);
381 ASSERT_GE(wait_count.load(), 0);
382}
383
384TEST_F(DBTest, MixedSlowdownOptionsStop) {
385 Options options = CurrentOptions();
386 options.env = env_;
387 options.write_buffer_size = 100000;
388 CreateAndReopenWithCF({"pikachu"}, options);
389 std::vector<port::Thread> threads;
390 std::atomic<int> thread_num(0);
391
392 std::function<void()> write_slowdown_func = [&]() {
393 int a = thread_num.fetch_add(1);
394 std::string key = "foo" + std::to_string(a);
395 WriteOptions wo;
396 wo.no_slowdown = false;
397 ASSERT_OK(dbfull()->Put(wo, key, "bar"));
398 };
399 std::function<void()> write_no_slowdown_func = [&]() {
400 int a = thread_num.fetch_add(1);
401 std::string key = "foo" + std::to_string(a);
402 WriteOptions wo;
403 wo.no_slowdown = true;
404 ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
405 };
406 std::function<void()> wakeup_writer = [&]() {
407 dbfull()->mutex_.Lock();
408 dbfull()->bg_cv_.SignalAll();
409 dbfull()->mutex_.Unlock();
410 };
411 // Use a small number to ensure a large delay that is still effective
412 // when we do Put
413 // TODO(myabandeh): this is time dependent and could potentially make
414 // the test flaky
415 auto token = dbfull()->TEST_write_controler().GetStopToken();
416 std::atomic<int> wait_count(0);
f67539c2
TL
417 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
418 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
11fdf7f2
TL
419 wait_count.fetch_add(1);
420 if (threads.empty()) {
421 for (int i = 0; i < 2; ++i) {
422 threads.emplace_back(write_slowdown_func);
423 }
424 for (int i = 0; i < 2; ++i) {
425 threads.emplace_back(write_no_slowdown_func);
426 }
427 // Sleep for 2s to allow the threads to insert themselves into the
428 // write queue
429 env_->SleepForMicroseconds(3000000ULL);
430 }
431 token.reset();
432 threads.emplace_back(wakeup_writer);
433 });
f67539c2 434 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2
TL
435
436 WriteOptions wo;
437 wo.sync = false;
438 wo.disableWAL = false;
439 wo.no_slowdown = false;
440 dbfull()->Put(wo, "foo", "bar");
441 // We need the 2nd write to trigger delay. This is because delay is
442 // estimated based on the last write size which is 0 for the first write.
443 ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
444 token.reset();
445
446 for (auto& t : threads) {
447 t.join();
448 }
449 ASSERT_GE(wait_count.load(), 1);
450
451 wo.no_slowdown = true;
452 ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
453}
7c673cae
FG
454#ifndef ROCKSDB_LITE
455
456TEST_F(DBTest, LevelLimitReopen) {
457 Options options = CurrentOptions();
458 CreateAndReopenWithCF({"pikachu"}, options);
459
460 const std::string value(1024 * 1024, ' ');
461 int i = 0;
462 while (NumTableFilesAtLevel(2, 1) == 0) {
463 ASSERT_OK(Put(1, Key(i++), value));
464 }
465
466 options.num_levels = 1;
467 options.max_bytes_for_level_multiplier_additional.resize(1, 1);
468 Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
469 ASSERT_EQ(s.IsInvalidArgument(), true);
470 ASSERT_EQ(s.ToString(),
471 "Invalid argument: db has more levels than options.num_levels");
472
473 options.num_levels = 10;
474 options.max_bytes_for_level_multiplier_additional.resize(10, 1);
475 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
476}
477#endif // ROCKSDB_LITE
478
479
480TEST_F(DBTest, PutSingleDeleteGet) {
481 do {
482 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
483 ASSERT_OK(Put(1, "foo", "v1"));
484 ASSERT_EQ("v1", Get(1, "foo"));
485 ASSERT_OK(Put(1, "foo2", "v2"));
486 ASSERT_EQ("v2", Get(1, "foo2"));
487 ASSERT_OK(SingleDelete(1, "foo"));
488 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
494da23a
TL
489 // Skip FIFO and universal compaction beccause they do not apply to the test
490 // case. Skip MergePut because single delete does not get removed when it
491 // encounters a merge.
492 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
493 kSkipMergePut));
7c673cae
FG
494}
495
496TEST_F(DBTest, ReadFromPersistedTier) {
497 do {
498 Random rnd(301);
499 Options options = CurrentOptions();
500 for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
501 CreateAndReopenWithCF({"pikachu"}, options);
502 WriteOptions wopt;
503 wopt.disableWAL = (disableWAL == 1);
504 // 1st round: put but not flush
505 ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
506 ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
507 ASSERT_EQ("first", Get(1, "foo"));
508 ASSERT_EQ("one", Get(1, "bar"));
509
510 // Read directly from persited data.
511 ReadOptions ropt;
512 ropt.read_tier = kPersistedTier;
513 std::string value;
514 if (wopt.disableWAL) {
515 // as data has not yet being flushed, we expect not found.
516 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
517 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
518 } else {
519 ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
520 ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
521 }
522
523 // Multiget
524 std::vector<ColumnFamilyHandle*> multiget_cfs;
525 multiget_cfs.push_back(handles_[1]);
526 multiget_cfs.push_back(handles_[1]);
527 std::vector<Slice> multiget_keys;
528 multiget_keys.push_back("foo");
529 multiget_keys.push_back("bar");
530 std::vector<std::string> multiget_values;
531 auto statuses =
532 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
533 if (wopt.disableWAL) {
534 ASSERT_TRUE(statuses[0].IsNotFound());
535 ASSERT_TRUE(statuses[1].IsNotFound());
536 } else {
537 ASSERT_OK(statuses[0]);
538 ASSERT_OK(statuses[1]);
539 }
540
541 // 2nd round: flush and put a new value in memtable.
542 ASSERT_OK(Flush(1));
543 ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
544
545 // once the data has been flushed, we are able to get the
546 // data when kPersistedTier is used.
547 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
548 ASSERT_EQ(value, "first");
549 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
550 ASSERT_EQ(value, "one");
551 if (wopt.disableWAL) {
552 ASSERT_TRUE(
553 db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
554 } else {
555 ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
556 ASSERT_EQ(value, "hello");
557 }
558
559 // Expect same result in multiget
560 multiget_cfs.push_back(handles_[1]);
561 multiget_keys.push_back("rocksdb");
562 statuses =
563 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
564 ASSERT_TRUE(statuses[0].ok());
565 ASSERT_EQ("first", multiget_values[0]);
566 ASSERT_TRUE(statuses[1].ok());
567 ASSERT_EQ("one", multiget_values[1]);
568 if (wopt.disableWAL) {
569 ASSERT_TRUE(statuses[2].IsNotFound());
570 } else {
571 ASSERT_OK(statuses[2]);
572 }
573
574 // 3rd round: delete and flush
575 ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
576 Flush(1);
577 ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
578
579 ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
580 if (wopt.disableWAL) {
581 // Still expect finding the value as its delete has not yet being
582 // flushed.
583 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
584 ASSERT_EQ(value, "one");
585 } else {
586 ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
587 }
588 ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
589 ASSERT_EQ(value, "hello");
590
591 statuses =
592 db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
593 ASSERT_TRUE(statuses[0].IsNotFound());
594 if (wopt.disableWAL) {
595 ASSERT_TRUE(statuses[1].ok());
596 ASSERT_EQ("one", multiget_values[1]);
597 } else {
598 ASSERT_TRUE(statuses[1].IsNotFound());
599 }
600 ASSERT_TRUE(statuses[2].ok());
601 ASSERT_EQ("hello", multiget_values[2]);
602 if (wopt.disableWAL == 0) {
603 DestroyAndReopen(options);
604 }
605 }
494da23a 606 } while (ChangeOptions());
7c673cae
FG
607}
608
609TEST_F(DBTest, SingleDeleteFlush) {
610 // Test to check whether flushing preserves a single delete hidden
611 // behind a put.
612 do {
613 Random rnd(301);
614
615 Options options = CurrentOptions();
616 options.disable_auto_compactions = true;
617 CreateAndReopenWithCF({"pikachu"}, options);
618
619 // Put values on second level (so that they will not be in the same
620 // compaction as the other operations.
621 Put(1, "foo", "first");
622 Put(1, "bar", "one");
623 ASSERT_OK(Flush(1));
624 MoveFilesToLevel(2, 1);
625
626 // (Single) delete hidden by a put
627 SingleDelete(1, "foo");
628 Put(1, "foo", "second");
629 Delete(1, "bar");
630 Put(1, "bar", "two");
631 ASSERT_OK(Flush(1));
632
633 SingleDelete(1, "foo");
634 Delete(1, "bar");
635 ASSERT_OK(Flush(1));
636
637 dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
638 nullptr);
639
640 ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
641 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
494da23a
TL
642 // Skip FIFO and universal compaction beccause they do not apply to the test
643 // case. Skip MergePut because single delete does not get removed when it
644 // encounters a merge.
645 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
646 kSkipMergePut));
7c673cae
FG
647}
648
649TEST_F(DBTest, SingleDeletePutFlush) {
650 // Single deletes that encounter the matching put in a flush should get
651 // removed.
652 do {
653 Random rnd(301);
654
655 Options options = CurrentOptions();
656 options.disable_auto_compactions = true;
657 CreateAndReopenWithCF({"pikachu"}, options);
658
659 Put(1, "foo", Slice());
660 Put(1, "a", Slice());
661 SingleDelete(1, "a");
662 ASSERT_OK(Flush(1));
663
664 ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
494da23a
TL
665 // Skip FIFO and universal compaction beccause they do not apply to the test
666 // case. Skip MergePut because single delete does not get removed when it
667 // encounters a merge.
668 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
669 kSkipMergePut));
7c673cae
FG
670}
671
11fdf7f2
TL
672// Disable because not all platform can run it.
673// It requires more than 9GB memory to run it, With single allocation
674// of more than 3GB.
675TEST_F(DBTest, DISABLED_SanitizeVeryVeryLargeValue) {
676 const size_t kValueSize = 4 * size_t{1024 * 1024 * 1024}; // 4GB value
677 std::string raw(kValueSize, 'v');
678 Options options = CurrentOptions();
679 options.env = env_;
680 options.merge_operator = MergeOperators::CreatePutOperator();
681 options.write_buffer_size = 100000; // Small write buffer
682 options.paranoid_checks = true;
683 DestroyAndReopen(options);
684
685 ASSERT_OK(Put("boo", "v1"));
686 ASSERT_TRUE(Put("foo", raw).IsInvalidArgument());
687 ASSERT_TRUE(Merge("foo", raw).IsInvalidArgument());
688
689 WriteBatch wb;
690 ASSERT_TRUE(wb.Put("foo", raw).IsInvalidArgument());
691 ASSERT_TRUE(wb.Merge("foo", raw).IsInvalidArgument());
692
693 Slice value_slice = raw;
694 Slice key_slice = "foo";
695 SliceParts sp_key(&key_slice, 1);
696 SliceParts sp_value(&value_slice, 1);
697
698 ASSERT_TRUE(wb.Put(sp_key, sp_value).IsInvalidArgument());
699 ASSERT_TRUE(wb.Merge(sp_key, sp_value).IsInvalidArgument());
700}
701
7c673cae
FG
702// Disable because not all platform can run it.
703// It requires more than 9GB memory to run it, With single allocation
704// of more than 3GB.
705TEST_F(DBTest, DISABLED_VeryLargeValue) {
706 const size_t kValueSize = 3221225472u; // 3GB value
707 const size_t kKeySize = 8388608u; // 8MB key
708 std::string raw(kValueSize, 'v');
709 std::string key1(kKeySize, 'c');
710 std::string key2(kKeySize, 'd');
711
712 Options options = CurrentOptions();
713 options.env = env_;
714 options.write_buffer_size = 100000; // Small write buffer
715 options.paranoid_checks = true;
716 DestroyAndReopen(options);
717
718 ASSERT_OK(Put("boo", "v1"));
719 ASSERT_OK(Put("foo", "v1"));
720 ASSERT_OK(Put(key1, raw));
721 raw[0] = 'w';
722 ASSERT_OK(Put(key2, raw));
723 dbfull()->TEST_WaitForFlushMemTable();
724
11fdf7f2 725#ifndef ROCKSDB_LITE
7c673cae 726 ASSERT_EQ(1, NumTableFilesAtLevel(0));
11fdf7f2 727#endif // !ROCKSDB_LITE
7c673cae
FG
728
729 std::string value;
730 Status s = db_->Get(ReadOptions(), key1, &value);
731 ASSERT_OK(s);
732 ASSERT_EQ(kValueSize, value.size());
733 ASSERT_EQ('v', value[0]);
734
735 s = db_->Get(ReadOptions(), key2, &value);
736 ASSERT_OK(s);
737 ASSERT_EQ(kValueSize, value.size());
738 ASSERT_EQ('w', value[0]);
739
740 // Compact all files.
741 Flush();
742 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
743
744 // Check DB is not in read-only state.
745 ASSERT_OK(Put("boo", "v1"));
746
747 s = db_->Get(ReadOptions(), key1, &value);
748 ASSERT_OK(s);
749 ASSERT_EQ(kValueSize, value.size());
750 ASSERT_EQ('v', value[0]);
751
752 s = db_->Get(ReadOptions(), key2, &value);
753 ASSERT_OK(s);
754 ASSERT_EQ(kValueSize, value.size());
755 ASSERT_EQ('w', value[0]);
756}
757
758TEST_F(DBTest, GetFromImmutableLayer) {
759 do {
760 Options options = CurrentOptions();
761 options.env = env_;
762 CreateAndReopenWithCF({"pikachu"}, options);
763
764 ASSERT_OK(Put(1, "foo", "v1"));
765 ASSERT_EQ("v1", Get(1, "foo"));
766
767 // Block sync calls
768 env_->delay_sstable_sync_.store(true, std::memory_order_release);
769 Put(1, "k1", std::string(100000, 'x')); // Fill memtable
770 Put(1, "k2", std::string(100000, 'y')); // Trigger flush
771 ASSERT_EQ("v1", Get(1, "foo"));
772 ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
773 // Release sync calls
774 env_->delay_sstable_sync_.store(false, std::memory_order_release);
775 } while (ChangeOptions());
776}
777
778
779TEST_F(DBTest, GetLevel0Ordering) {
780 do {
781 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
782 // Check that we process level-0 files in correct order. The code
783 // below generates two level-0 files where the earlier one comes
784 // before the later one in the level-0 file list since the earlier
785 // one has a smaller "smallest" key.
786 ASSERT_OK(Put(1, "bar", "b"));
787 ASSERT_OK(Put(1, "foo", "v1"));
788 ASSERT_OK(Flush(1));
789 ASSERT_OK(Put(1, "foo", "v2"));
790 ASSERT_OK(Flush(1));
791 ASSERT_EQ("v2", Get(1, "foo"));
792 } while (ChangeOptions());
793}
794
795TEST_F(DBTest, WrongLevel0Config) {
796 Options options = CurrentOptions();
797 Close();
798 ASSERT_OK(DestroyDB(dbname_, options));
799 options.level0_stop_writes_trigger = 1;
800 options.level0_slowdown_writes_trigger = 2;
801 options.level0_file_num_compaction_trigger = 3;
802 ASSERT_OK(DB::Open(options, dbname_, &db_));
803}
804
805#ifndef ROCKSDB_LITE
806TEST_F(DBTest, GetOrderedByLevels) {
807 do {
808 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
809 ASSERT_OK(Put(1, "foo", "v1"));
810 Compact(1, "a", "z");
811 ASSERT_EQ("v1", Get(1, "foo"));
812 ASSERT_OK(Put(1, "foo", "v2"));
813 ASSERT_EQ("v2", Get(1, "foo"));
814 ASSERT_OK(Flush(1));
815 ASSERT_EQ("v2", Get(1, "foo"));
816 } while (ChangeOptions());
817}
818
819TEST_F(DBTest, GetPicksCorrectFile) {
820 do {
821 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
822 // Arrange to have multiple files in a non-level-0 level.
823 ASSERT_OK(Put(1, "a", "va"));
824 Compact(1, "a", "b");
825 ASSERT_OK(Put(1, "x", "vx"));
826 Compact(1, "x", "y");
827 ASSERT_OK(Put(1, "f", "vf"));
828 Compact(1, "f", "g");
829 ASSERT_EQ("va", Get(1, "a"));
830 ASSERT_EQ("vf", Get(1, "f"));
831 ASSERT_EQ("vx", Get(1, "x"));
832 } while (ChangeOptions());
833}
834
835TEST_F(DBTest, GetEncountersEmptyLevel) {
836 do {
837 Options options = CurrentOptions();
838 CreateAndReopenWithCF({"pikachu"}, options);
839 // Arrange for the following to happen:
840 // * sstable A in level 0
841 // * nothing in level 1
842 // * sstable B in level 2
843 // Then do enough Get() calls to arrange for an automatic compaction
844 // of sstable A. A bug would cause the compaction to be marked as
845 // occurring at level 1 (instead of the correct level 0).
846
847 // Step 1: First place sstables in levels 0 and 2
848 Put(1, "a", "begin");
849 Put(1, "z", "end");
850 ASSERT_OK(Flush(1));
851 dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
852 dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
853 Put(1, "a", "begin");
854 Put(1, "z", "end");
855 ASSERT_OK(Flush(1));
856 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
857 ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
858
859 // Step 2: clear level 1 if necessary.
860 dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
861 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
862 ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
863 ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
864
865 // Step 3: read a bunch of times
866 for (int i = 0; i < 1000; i++) {
867 ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
868 }
869
870 // Step 4: Wait for compaction to finish
871 dbfull()->TEST_WaitForCompact();
872
873 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX
874 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
875}
876#endif // ROCKSDB_LITE
877
878TEST_F(DBTest, FlushMultipleMemtable) {
879 do {
880 Options options = CurrentOptions();
881 WriteOptions writeOpt = WriteOptions();
882 writeOpt.disableWAL = true;
883 options.max_write_buffer_number = 4;
884 options.min_write_buffer_number_to_merge = 3;
f67539c2 885 options.max_write_buffer_size_to_maintain = -1;
7c673cae
FG
886 CreateAndReopenWithCF({"pikachu"}, options);
887 ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
888 ASSERT_OK(Flush(1));
889 ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
890
891 ASSERT_EQ("v1", Get(1, "foo"));
892 ASSERT_EQ("v1", Get(1, "bar"));
893 ASSERT_OK(Flush(1));
894 } while (ChangeCompactOptions());
895}
896#ifndef ROCKSDB_LITE
897TEST_F(DBTest, FlushSchedule) {
898 Options options = CurrentOptions();
899 options.disable_auto_compactions = true;
900 options.level0_stop_writes_trigger = 1 << 10;
901 options.level0_slowdown_writes_trigger = 1 << 10;
902 options.min_write_buffer_number_to_merge = 1;
f67539c2
TL
903 options.max_write_buffer_size_to_maintain =
904 static_cast<int64_t>(options.write_buffer_size);
7c673cae
FG
905 options.max_write_buffer_number = 2;
906 options.write_buffer_size = 120 * 1024;
907 CreateAndReopenWithCF({"pikachu"}, options);
908 std::vector<port::Thread> threads;
909
910 std::atomic<int> thread_num(0);
911 // each column family will have 5 thread, each thread generating 2 memtables.
912 // each column family should end up with 10 table files
913 std::function<void()> fill_memtable_func = [&]() {
914 int a = thread_num.fetch_add(1);
915 Random rnd(a);
916 WriteOptions wo;
917 // this should fill up 2 memtables
918 for (int k = 0; k < 5000; ++k) {
20effc67 919 ASSERT_OK(db_->Put(wo, handles_[a & 1], rnd.RandomString(13), ""));
7c673cae
FG
920 }
921 };
922
923 for (int i = 0; i < 10; ++i) {
924 threads.emplace_back(fill_memtable_func);
925 }
926
927 for (auto& t : threads) {
928 t.join();
929 }
930
931 auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
932 auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
933 ASSERT_LE(default_tables, static_cast<uint64_t>(10));
934 ASSERT_GT(default_tables, static_cast<uint64_t>(0));
935 ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
936 ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
937}
938#endif // ROCKSDB_LITE
939
940namespace {
941class KeepFilter : public CompactionFilter {
942 public:
494da23a
TL
943 bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
944 std::string* /*new_value*/,
945 bool* /*value_changed*/) const override {
7c673cae
FG
946 return false;
947 }
948
494da23a 949 const char* Name() const override { return "KeepFilter"; }
7c673cae
FG
950};
951
952class KeepFilterFactory : public CompactionFilterFactory {
953 public:
954 explicit KeepFilterFactory(bool check_context = false)
955 : check_context_(check_context) {}
956
494da23a 957 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
7c673cae
FG
958 const CompactionFilter::Context& context) override {
959 if (check_context_) {
960 EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
961 EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
962 }
963 return std::unique_ptr<CompactionFilter>(new KeepFilter());
964 }
965
494da23a 966 const char* Name() const override { return "KeepFilterFactory"; }
7c673cae
FG
967 bool check_context_;
968 std::atomic_bool expect_full_compaction_;
969 std::atomic_bool expect_manual_compaction_;
970};
971
972class DelayFilter : public CompactionFilter {
973 public:
974 explicit DelayFilter(DBTestBase* d) : db_test(d) {}
494da23a
TL
975 bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
976 std::string* /*new_value*/,
977 bool* /*value_changed*/) const override {
20effc67 978 db_test->env_->MockSleepForMicroseconds(1000);
7c673cae
FG
979 return true;
980 }
981
494da23a 982 const char* Name() const override { return "DelayFilter"; }
7c673cae
FG
983
984 private:
985 DBTestBase* db_test;
986};
987
988class DelayFilterFactory : public CompactionFilterFactory {
989 public:
990 explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
494da23a 991 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
11fdf7f2 992 const CompactionFilter::Context& /*context*/) override {
7c673cae
FG
993 return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
994 }
995
494da23a 996 const char* Name() const override { return "DelayFilterFactory"; }
7c673cae
FG
997
998 private:
999 DBTestBase* db_test;
1000};
1001} // namespace
1002
1003#ifndef ROCKSDB_LITE
1004
1005static std::string CompressibleString(Random* rnd, int len) {
1006 std::string r;
1007 test::CompressibleString(rnd, 0.8, len, &r);
1008 return r;
1009}
1010#endif // ROCKSDB_LITE
1011
1012TEST_F(DBTest, FailMoreDbPaths) {
1013 Options options = CurrentOptions();
1014 options.db_paths.emplace_back(dbname_, 10000000);
1015 options.db_paths.emplace_back(dbname_ + "_2", 1000000);
1016 options.db_paths.emplace_back(dbname_ + "_3", 1000000);
1017 options.db_paths.emplace_back(dbname_ + "_4", 1000000);
1018 options.db_paths.emplace_back(dbname_ + "_5", 1000000);
1019 ASSERT_TRUE(TryReopen(options).IsNotSupported());
1020}
1021
f67539c2
TL
1022void CheckColumnFamilyMeta(
1023 const ColumnFamilyMetaData& cf_meta,
1024 const std::vector<std::vector<FileMetaData>>& files_by_level,
1025 uint64_t start_time, uint64_t end_time) {
1026 ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName);
1027 ASSERT_EQ(cf_meta.levels.size(), files_by_level.size());
1028
7c673cae 1029 uint64_t cf_size = 0;
7c673cae 1030 size_t file_count = 0;
f67539c2
TL
1031
1032 for (size_t i = 0; i < cf_meta.levels.size(); ++i) {
1033 const auto& level_meta_from_cf = cf_meta.levels[i];
1034 const auto& level_meta_from_files = files_by_level[i];
1035
1036 ASSERT_EQ(level_meta_from_cf.level, i);
1037 ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size());
1038
1039 file_count += level_meta_from_cf.files.size();
1040
7c673cae 1041 uint64_t level_size = 0;
f67539c2
TL
1042 for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) {
1043 const auto& file_meta_from_cf = level_meta_from_cf.files[j];
1044 const auto& file_meta_from_files = level_meta_from_files[j];
1045
1046 level_size += file_meta_from_cf.size;
1047
1048 ASSERT_EQ(file_meta_from_cf.file_number,
1049 file_meta_from_files.fd.GetNumber());
1050 ASSERT_EQ(file_meta_from_cf.file_number,
1051 TableFileNameToNumber(file_meta_from_cf.name));
1052 ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size);
1053 ASSERT_EQ(file_meta_from_cf.smallest_seqno,
1054 file_meta_from_files.fd.smallest_seqno);
1055 ASSERT_EQ(file_meta_from_cf.largest_seqno,
1056 file_meta_from_files.fd.largest_seqno);
1057 ASSERT_EQ(file_meta_from_cf.smallestkey,
1058 file_meta_from_files.smallest.user_key().ToString());
1059 ASSERT_EQ(file_meta_from_cf.largestkey,
1060 file_meta_from_files.largest.user_key().ToString());
1061 ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number,
1062 file_meta_from_files.oldest_blob_file_number);
1063 ASSERT_EQ(file_meta_from_cf.oldest_ancester_time,
1064 file_meta_from_files.oldest_ancester_time);
1065 ASSERT_EQ(file_meta_from_cf.file_creation_time,
1066 file_meta_from_files.file_creation_time);
1067 ASSERT_GE(file_meta_from_cf.file_creation_time, start_time);
1068 ASSERT_LE(file_meta_from_cf.file_creation_time, end_time);
1069 ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time);
1070 ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time);
7c673cae 1071 }
f67539c2
TL
1072
1073 ASSERT_EQ(level_meta_from_cf.size, level_size);
7c673cae 1074 cf_size += level_size;
7c673cae 1075 }
f67539c2 1076
7c673cae
FG
1077 ASSERT_EQ(cf_meta.file_count, file_count);
1078 ASSERT_EQ(cf_meta.size, cf_size);
1079}
1080
f67539c2
TL
1081void CheckLiveFilesMeta(
1082 const std::vector<LiveFileMetaData>& live_file_meta,
1083 const std::vector<std::vector<FileMetaData>>& files_by_level) {
1084 size_t total_file_count = 0;
1085 for (const auto& f : files_by_level) {
1086 total_file_count += f.size();
1087 }
1088
1089 ASSERT_EQ(live_file_meta.size(), total_file_count);
1090
1091 int level = 0;
1092 int i = 0;
1093
1094 for (const auto& meta : live_file_meta) {
1095 if (level != meta.level) {
1096 level = meta.level;
1097 i = 0;
1098 }
1099
1100 ASSERT_LT(i, files_by_level[level].size());
1101
1102 const auto& expected_meta = files_by_level[level][i];
1103
1104 ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName);
1105 ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber());
1106 ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name));
1107 ASSERT_EQ(meta.size, expected_meta.fd.file_size);
1108 ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno);
1109 ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno);
1110 ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString());
1111 ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString());
1112 ASSERT_EQ(meta.oldest_blob_file_number,
1113 expected_meta.oldest_blob_file_number);
1114
1115 ++i;
1116 }
1117}
1118
7c673cae 1119#ifndef ROCKSDB_LITE
f67539c2 1120TEST_F(DBTest, MetaDataTest) {
7c673cae
FG
1121 Options options = CurrentOptions();
1122 options.create_if_missing = true;
f67539c2
TL
1123 options.disable_auto_compactions = true;
1124
1125 int64_t temp_time = 0;
1126 options.env->GetCurrentTime(&temp_time);
1127 uint64_t start_time = static_cast<uint64_t>(temp_time);
1128
7c673cae
FG
1129 DestroyAndReopen(options);
1130
1131 Random rnd(301);
1132 int key_index = 0;
7c673cae 1133 for (int i = 0; i < 100; ++i) {
f67539c2
TL
1134 // Add a single blob reference to each file
1135 std::string blob_index;
1136 BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000,
1137 /* offset */ 1234, /* size */ 5678, kNoCompression);
1138
1139 WriteBatch batch;
1140 ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index),
1141 blob_index));
1142 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
1143
1144 ++key_index;
1145
1146 // Fill up the rest of the file with random values.
1147 GenerateNewFile(&rnd, &key_index, /* nowait */ true);
1148
1149 Flush();
7c673cae 1150 }
f67539c2
TL
1151
1152 std::vector<std::vector<FileMetaData>> files_by_level;
1153 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level);
1154
1155 options.env->GetCurrentTime(&temp_time);
1156 uint64_t end_time = static_cast<uint64_t>(temp_time);
1157
1158 ColumnFamilyMetaData cf_meta;
1159 db_->GetColumnFamilyMetaData(&cf_meta);
1160 CheckColumnFamilyMeta(cf_meta, files_by_level, start_time, end_time);
1161
1162 std::vector<LiveFileMetaData> live_file_meta;
1163 db_->GetLiveFilesMetaData(&live_file_meta);
1164 CheckLiveFilesMeta(live_file_meta, files_by_level);
7c673cae
FG
1165}
1166
1167namespace {
1168void MinLevelHelper(DBTest* self, Options& options) {
1169 Random rnd(301);
1170
1171 for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
1172 num++) {
1173 std::vector<std::string> values;
1174 // Write 120KB (12 values, each 10K)
1175 for (int i = 0; i < 12; i++) {
20effc67 1176 values.push_back(rnd.RandomString(10000));
7c673cae
FG
1177 ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
1178 }
1179 self->dbfull()->TEST_WaitForFlushMemTable();
1180 ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
1181 }
1182
1183 // generate one more file in level-0, and should trigger level-0 compaction
1184 std::vector<std::string> values;
1185 for (int i = 0; i < 12; i++) {
20effc67 1186 values.push_back(rnd.RandomString(10000));
7c673cae
FG
1187 ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
1188 }
1189 self->dbfull()->TEST_WaitForCompact();
1190
1191 ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
1192 ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
1193}
1194
1195// returns false if the calling-Test should be skipped
1196bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
1197 int lev, int strategy) {
1198 fprintf(stderr,
1199 "Test with compression options : window_bits = %d, level = %d, "
1200 "strategy = %d}\n",
1201 wbits, lev, strategy);
1202 options.write_buffer_size = 100 << 10; // 100KB
1203 options.arena_block_size = 4096;
1204 options.num_levels = 3;
1205 options.level0_file_num_compaction_trigger = 3;
1206 options.create_if_missing = true;
1207
1208 if (Snappy_Supported()) {
1209 type = kSnappyCompression;
1210 fprintf(stderr, "using snappy\n");
1211 } else if (Zlib_Supported()) {
1212 type = kZlibCompression;
1213 fprintf(stderr, "using zlib\n");
1214 } else if (BZip2_Supported()) {
1215 type = kBZip2Compression;
1216 fprintf(stderr, "using bzip2\n");
1217 } else if (LZ4_Supported()) {
1218 type = kLZ4Compression;
1219 fprintf(stderr, "using lz4\n");
1220 } else if (XPRESS_Supported()) {
1221 type = kXpressCompression;
1222 fprintf(stderr, "using xpress\n");
1223 } else if (ZSTD_Supported()) {
1224 type = kZSTD;
1225 fprintf(stderr, "using ZSTD\n");
1226 } else {
1227 fprintf(stderr, "skipping test, compression disabled\n");
1228 return false;
1229 }
1230 options.compression_per_level.resize(options.num_levels);
1231
1232 // do not compress L0
1233 for (int i = 0; i < 1; i++) {
1234 options.compression_per_level[i] = kNoCompression;
1235 }
1236 for (int i = 1; i < options.num_levels; i++) {
1237 options.compression_per_level[i] = type;
1238 }
1239 return true;
1240}
1241} // namespace
1242
1243TEST_F(DBTest, MinLevelToCompress1) {
1244 Options options = CurrentOptions();
1245 CompressionType type = kSnappyCompression;
1246 if (!MinLevelToCompress(type, options, -14, -1, 0)) {
1247 return;
1248 }
1249 Reopen(options);
1250 MinLevelHelper(this, options);
1251
1252 // do not compress L0 and L1
1253 for (int i = 0; i < 2; i++) {
1254 options.compression_per_level[i] = kNoCompression;
1255 }
1256 for (int i = 2; i < options.num_levels; i++) {
1257 options.compression_per_level[i] = type;
1258 }
1259 DestroyAndReopen(options);
1260 MinLevelHelper(this, options);
1261}
1262
1263TEST_F(DBTest, MinLevelToCompress2) {
1264 Options options = CurrentOptions();
1265 CompressionType type = kSnappyCompression;
1266 if (!MinLevelToCompress(type, options, 15, -1, 0)) {
1267 return;
1268 }
1269 Reopen(options);
1270 MinLevelHelper(this, options);
1271
1272 // do not compress L0 and L1
1273 for (int i = 0; i < 2; i++) {
1274 options.compression_per_level[i] = kNoCompression;
1275 }
1276 for (int i = 2; i < options.num_levels; i++) {
1277 options.compression_per_level[i] = type;
1278 }
1279 DestroyAndReopen(options);
1280 MinLevelHelper(this, options);
1281}
1282
1283// This test may fail because of a legit case that multiple L0 files
1284// are trivial moved to L1.
1285TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) {
1286 do {
1287 Options options = CurrentOptions();
1288 options.env = env_;
1289 options.write_buffer_size = 100000; // Small write buffer
1290 CreateAndReopenWithCF({"pikachu"}, options);
1291
1292 // We must have at most one file per level except for level-0,
1293 // which may have up to kL0_StopWritesTrigger files.
1294 const int kMaxFiles =
1295 options.num_levels + options.level0_stop_writes_trigger;
1296
1297 Random rnd(301);
1298 std::string value =
20effc67 1299 rnd.RandomString(static_cast<int>(2 * options.write_buffer_size));
7c673cae
FG
1300 for (int i = 0; i < 5 * kMaxFiles; i++) {
1301 ASSERT_OK(Put(1, "key", value));
1302 ASSERT_LE(TotalTableFiles(1), kMaxFiles);
1303 }
1304 } while (ChangeCompactOptions());
1305}
1306#endif // ROCKSDB_LITE
1307
1308TEST_F(DBTest, SparseMerge) {
1309 do {
1310 Options options = CurrentOptions();
1311 options.compression = kNoCompression;
1312 CreateAndReopenWithCF({"pikachu"}, options);
1313
1314 FillLevels("A", "Z", 1);
1315
1316 // Suppose there is:
1317 // small amount of data with prefix A
1318 // large amount of data with prefix B
1319 // small amount of data with prefix C
1320 // and that recent updates have made small changes to all three prefixes.
1321 // Check that we do not do a compaction that merges all of B in one shot.
1322 const std::string value(1000, 'x');
1323 Put(1, "A", "va");
1324 // Write approximately 100MB of "B" values
1325 for (int i = 0; i < 100000; i++) {
1326 char key[100];
1327 snprintf(key, sizeof(key), "B%010d", i);
1328 Put(1, key, value);
1329 }
1330 Put(1, "C", "vc");
1331 ASSERT_OK(Flush(1));
1332 dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
1333
1334 // Make sparse update
1335 Put(1, "A", "va2");
1336 Put(1, "B100", "bvalue2");
1337 Put(1, "C", "vc2");
1338 ASSERT_OK(Flush(1));
1339
1340 // Compactions should not cause us to create a situation where
1341 // a file overlaps too much data at the next level.
1342 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
1343 20 * 1048576);
1344 dbfull()->TEST_CompactRange(0, nullptr, nullptr);
1345 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
1346 20 * 1048576);
1347 dbfull()->TEST_CompactRange(1, nullptr, nullptr);
1348 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
1349 20 * 1048576);
1350 } while (ChangeCompactOptions());
1351}
1352
1353#ifndef ROCKSDB_LITE
1354static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1355 bool result = (val >= low) && (val <= high);
1356 if (!result) {
1357 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1358 (unsigned long long)(val), (unsigned long long)(low),
1359 (unsigned long long)(high));
1360 }
1361 return result;
1362}
1363
1364TEST_F(DBTest, ApproximateSizesMemTable) {
1365 Options options = CurrentOptions();
1366 options.write_buffer_size = 100000000; // Large write buffer
1367 options.compression = kNoCompression;
1368 options.create_if_missing = true;
1369 DestroyAndReopen(options);
f67539c2 1370 auto default_cf = db_->DefaultColumnFamily();
7c673cae
FG
1371
1372 const int N = 128;
1373 Random rnd(301);
1374 for (int i = 0; i < N; i++) {
20effc67 1375 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
7c673cae
FG
1376 }
1377
1378 uint64_t size;
1379 std::string start = Key(50);
1380 std::string end = Key(60);
1381 Range r(start, end);
f67539c2
TL
1382 SizeApproximationOptions size_approx_options;
1383 size_approx_options.include_memtabtles = true;
1384 size_approx_options.include_files = true;
1385 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1386 ASSERT_GT(size, 6000);
1387 ASSERT_LT(size, 204800);
1388 // Zero if not including mem table
1389 db_->GetApproximateSizes(&r, 1, &size);
1390 ASSERT_EQ(size, 0);
1391
1392 start = Key(500);
1393 end = Key(600);
1394 r = Range(start, end);
f67539c2 1395 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1396 ASSERT_EQ(size, 0);
1397
1398 for (int i = 0; i < N; i++) {
20effc67 1399 ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024)));
7c673cae
FG
1400 }
1401
1402 start = Key(500);
1403 end = Key(600);
1404 r = Range(start, end);
f67539c2 1405 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1406 ASSERT_EQ(size, 0);
1407
1408 start = Key(100);
1409 end = Key(1020);
1410 r = Range(start, end);
f67539c2 1411 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1412 ASSERT_GT(size, 6000);
1413
1414 options.max_write_buffer_number = 8;
1415 options.min_write_buffer_number_to_merge = 5;
1416 options.write_buffer_size = 1024 * N; // Not very large
1417 DestroyAndReopen(options);
f67539c2 1418 default_cf = db_->DefaultColumnFamily();
7c673cae
FG
1419
1420 int keys[N * 3];
1421 for (int i = 0; i < N; i++) {
1422 keys[i * 3] = i * 5;
1423 keys[i * 3 + 1] = i * 5 + 1;
1424 keys[i * 3 + 2] = i * 5 + 2;
1425 }
20effc67
TL
1426 // MemTable entry counting is estimated and can vary greatly depending on
1427 // layout. Thus, using deterministic seed for test stability.
1428 RandomShuffle(std::begin(keys), std::end(keys), rnd.Next());
7c673cae
FG
1429
1430 for (int i = 0; i < N * 3; i++) {
20effc67 1431 ASSERT_OK(Put(Key(keys[i] + 1000), rnd.RandomString(1024)));
7c673cae
FG
1432 }
1433
1434 start = Key(100);
1435 end = Key(300);
1436 r = Range(start, end);
f67539c2 1437 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1438 ASSERT_EQ(size, 0);
1439
1440 start = Key(1050);
1441 end = Key(1080);
1442 r = Range(start, end);
f67539c2 1443 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1444 ASSERT_GT(size, 6000);
1445
1446 start = Key(2100);
1447 end = Key(2300);
1448 r = Range(start, end);
f67539c2 1449 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
7c673cae
FG
1450 ASSERT_EQ(size, 0);
1451
1452 start = Key(1050);
1453 end = Key(1080);
1454 r = Range(start, end);
1455 uint64_t size_with_mt, size_without_mt;
f67539c2
TL
1456 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1457 &size_with_mt);
7c673cae
FG
1458 ASSERT_GT(size_with_mt, 6000);
1459 db_->GetApproximateSizes(&r, 1, &size_without_mt);
1460 ASSERT_EQ(size_without_mt, 0);
1461
1462 Flush();
1463
1464 for (int i = 0; i < N; i++) {
20effc67 1465 ASSERT_OK(Put(Key(i + 1000), rnd.RandomString(1024)));
7c673cae
FG
1466 }
1467
1468 start = Key(1050);
1469 end = Key(1080);
1470 r = Range(start, end);
f67539c2
TL
1471 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
1472 &size_with_mt);
7c673cae
FG
1473 db_->GetApproximateSizes(&r, 1, &size_without_mt);
1474 ASSERT_GT(size_with_mt, size_without_mt);
1475 ASSERT_GT(size_without_mt, 6000);
f67539c2
TL
1476
1477 // Check that include_memtabtles flag works as expected
1478 size_approx_options.include_memtabtles = false;
1479 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
1480 ASSERT_EQ(size, size_without_mt);
1481
1482 // Check that files_size_error_margin works as expected, when the heuristic
1483 // conditions are not met
1484 start = Key(1);
1485 end = Key(1000 + N - 2);
1486 r = Range(start, end);
1487 size_approx_options.files_size_error_margin = -1.0; // disabled
1488 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
1489 uint64_t size2;
1490 size_approx_options.files_size_error_margin = 0.5; // enabled, but not used
1491 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2);
1492 ASSERT_EQ(size, size2);
1493}
1494
1495TEST_F(DBTest, ApproximateSizesFilesWithErrorMargin) {
20effc67
TL
1496 // Roughly 4 keys per data block, 1000 keys per file,
1497 // with filter substantially larger than a data block
1498 BlockBasedTableOptions table_options;
1499 table_options.filter_policy.reset(NewBloomFilterPolicy(16));
1500 table_options.block_size = 100;
f67539c2 1501 Options options = CurrentOptions();
20effc67
TL
1502 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1503 options.write_buffer_size = 24 * 1024;
f67539c2
TL
1504 options.compression = kNoCompression;
1505 options.create_if_missing = true;
20effc67 1506 options.target_file_size_base = 24 * 1024;
f67539c2
TL
1507 DestroyAndReopen(options);
1508 const auto default_cf = db_->DefaultColumnFamily();
1509
1510 const int N = 64000;
1511 Random rnd(301);
1512 for (int i = 0; i < N; i++) {
20effc67 1513 ASSERT_OK(Put(Key(i), rnd.RandomString(24)));
f67539c2
TL
1514 }
1515 // Flush everything to files
1516 Flush();
1517 // Compact the entire key space into the next level
1518 db_->CompactRange(CompactRangeOptions(), default_cf, nullptr, nullptr);
1519
1520 // Write more keys
1521 for (int i = N; i < (N + N / 4); i++) {
20effc67 1522 ASSERT_OK(Put(Key(i), rnd.RandomString(24)));
f67539c2
TL
1523 }
1524 // Flush everything to files again
1525 Flush();
1526
1527 // Wait for compaction to finish
1528 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1529
20effc67
TL
1530 {
1531 const std::string start = Key(0);
1532 const std::string end = Key(2 * N);
1533 const Range r(start, end);
f67539c2 1534
20effc67
TL
1535 SizeApproximationOptions size_approx_options;
1536 size_approx_options.include_memtabtles = false;
1537 size_approx_options.include_files = true;
1538 size_approx_options.files_size_error_margin = -1.0; // disabled
f67539c2 1539
20effc67
TL
1540 // Get the precise size without any approximation heuristic
1541 uint64_t size;
1542 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
1543 ASSERT_NE(size, 0);
f67539c2 1544
20effc67
TL
1545 // Get the size with an approximation heuristic
1546 uint64_t size2;
1547 const double error_margin = 0.2;
1548 size_approx_options.files_size_error_margin = error_margin;
1549 db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2);
1550 ASSERT_LT(size2, size * (1 + error_margin));
1551 ASSERT_GT(size2, size * (1 - error_margin));
1552 }
1553
1554 {
1555 // Ensure that metadata is not falsely attributed only to the last data in
1556 // the file. (In some applications, filters can be large portion of data
1557 // size.)
1558 // Perform many queries over small range, enough to ensure crossing file
1559 // boundary, and make sure we never see a spike for large filter.
1560 for (int i = 0; i < 3000; i += 10) {
1561 const std::string start = Key(i);
1562 const std::string end = Key(i + 11); // overlap by 1 key
1563 const Range r(start, end);
1564 uint64_t size;
1565 db_->GetApproximateSizes(&r, 1, &size);
1566 ASSERT_LE(size, 11 * 100);
1567 }
1568 }
7c673cae
FG
1569}
1570
1571TEST_F(DBTest, GetApproximateMemTableStats) {
1572 Options options = CurrentOptions();
1573 options.write_buffer_size = 100000000;
1574 options.compression = kNoCompression;
1575 options.create_if_missing = true;
1576 DestroyAndReopen(options);
1577
1578 const int N = 128;
1579 Random rnd(301);
1580 for (int i = 0; i < N; i++) {
20effc67 1581 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
7c673cae
FG
1582 }
1583
1584 uint64_t count;
1585 uint64_t size;
1586
1587 std::string start = Key(50);
1588 std::string end = Key(60);
1589 Range r(start, end);
1590 db_->GetApproximateMemTableStats(r, &count, &size);
1591 ASSERT_GT(count, 0);
1592 ASSERT_LE(count, N);
1593 ASSERT_GT(size, 6000);
1594 ASSERT_LT(size, 204800);
1595
1596 start = Key(500);
1597 end = Key(600);
1598 r = Range(start, end);
1599 db_->GetApproximateMemTableStats(r, &count, &size);
1600 ASSERT_EQ(count, 0);
1601 ASSERT_EQ(size, 0);
1602
1603 Flush();
1604
1605 start = Key(50);
1606 end = Key(60);
1607 r = Range(start, end);
1608 db_->GetApproximateMemTableStats(r, &count, &size);
1609 ASSERT_EQ(count, 0);
1610 ASSERT_EQ(size, 0);
1611
1612 for (int i = 0; i < N; i++) {
20effc67 1613 ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024)));
7c673cae
FG
1614 }
1615
1616 start = Key(100);
1617 end = Key(1020);
1618 r = Range(start, end);
1619 db_->GetApproximateMemTableStats(r, &count, &size);
1620 ASSERT_GT(count, 20);
1621 ASSERT_GT(size, 6000);
1622}
1623
1624TEST_F(DBTest, ApproximateSizes) {
1625 do {
1626 Options options = CurrentOptions();
1627 options.write_buffer_size = 100000000; // Large write buffer
1628 options.compression = kNoCompression;
1629 options.create_if_missing = true;
1630 DestroyAndReopen(options);
1631 CreateAndReopenWithCF({"pikachu"}, options);
1632
1633 ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
1634 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1635 ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
1636
1637 // Write 8MB (80 values, each 100K)
1638 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1639 const int N = 80;
1640 static const int S1 = 100000;
1641 static const int S2 = 105000; // Allow some expansion from metadata
1642 Random rnd(301);
1643 for (int i = 0; i < N; i++) {
20effc67 1644 ASSERT_OK(Put(1, Key(i), rnd.RandomString(S1)));
7c673cae
FG
1645 }
1646
1647 // 0 because GetApproximateSizes() does not account for memtable space
1648 ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
1649
1650 // Check sizes across recovery by reopening a few times
1651 for (int run = 0; run < 3; run++) {
1652 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1653
1654 for (int compact_start = 0; compact_start < N; compact_start += 10) {
1655 for (int i = 0; i < N; i += 10) {
1656 ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
1657 ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
1658 S2 * (i + 1)));
1659 ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
1660 }
1661 ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
1662 ASSERT_TRUE(
1663 Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
1664
1665 std::string cstart_str = Key(compact_start);
1666 std::string cend_str = Key(compact_start + 9);
1667 Slice cstart = cstart_str;
1668 Slice cend = cend_str;
1669 dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
1670 }
1671
1672 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1673 ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
1674 }
1675 // ApproximateOffsetOf() is not yet implemented in plain table format.
1676 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
1677 kSkipPlainTable | kSkipHashIndex));
1678}
1679
1680TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
1681 do {
1682 Options options = CurrentOptions();
1683 options.compression = kNoCompression;
1684 CreateAndReopenWithCF({"pikachu"}, options);
1685
1686 Random rnd(301);
20effc67
TL
1687 std::string big1 = rnd.RandomString(100000);
1688 ASSERT_OK(Put(1, Key(0), rnd.RandomString(10000)));
1689 ASSERT_OK(Put(1, Key(1), rnd.RandomString(10000)));
7c673cae 1690 ASSERT_OK(Put(1, Key(2), big1));
20effc67 1691 ASSERT_OK(Put(1, Key(3), rnd.RandomString(10000)));
7c673cae 1692 ASSERT_OK(Put(1, Key(4), big1));
20effc67
TL
1693 ASSERT_OK(Put(1, Key(5), rnd.RandomString(10000)));
1694 ASSERT_OK(Put(1, Key(6), rnd.RandomString(300000)));
1695 ASSERT_OK(Put(1, Key(7), rnd.RandomString(10000)));
7c673cae
FG
1696
1697 // Check sizes across recovery by reopening a few times
1698 for (int run = 0; run < 3; run++) {
1699 ReopenWithColumnFamilies({"default", "pikachu"}, options);
1700
1701 ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
1702 ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
1703 ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
1704 ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
1705 ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
20effc67
TL
1706 ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 232000));
1707 ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 242000));
1708 // Ensure some overhead is accounted for, even without including all
1709 ASSERT_TRUE(Between(Size("", Key(7), 1), 540500, 545000));
1710 ASSERT_TRUE(Between(Size("", Key(8), 1), 550500, 555000));
7c673cae 1711
20effc67 1712 ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110100, 111000));
7c673cae
FG
1713
1714 dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
1715 }
1716 // ApproximateOffsetOf() is not yet implemented in plain table format.
1717 } while (ChangeOptions(kSkipPlainTable));
1718}
1719#endif // ROCKSDB_LITE
1720
1721#ifndef ROCKSDB_LITE
1722TEST_F(DBTest, Snapshot) {
20effc67 1723 env_->SetMockSleep();
7c673cae
FG
1724 anon::OptionsOverride options_override;
1725 options_override.skip_policy = kSkipNoSnapshot;
1726 do {
1727 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
1728 Put(0, "foo", "0v1");
1729 Put(1, "foo", "1v1");
1730
1731 const Snapshot* s1 = db_->GetSnapshot();
1732 ASSERT_EQ(1U, GetNumSnapshots());
1733 uint64_t time_snap1 = GetTimeOldestSnapshots();
1734 ASSERT_GT(time_snap1, 0U);
f67539c2 1735 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
7c673cae
FG
1736 Put(0, "foo", "0v2");
1737 Put(1, "foo", "1v2");
1738
20effc67 1739 env_->MockSleepForSeconds(1);
7c673cae
FG
1740
1741 const Snapshot* s2 = db_->GetSnapshot();
1742 ASSERT_EQ(2U, GetNumSnapshots());
1743 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
f67539c2 1744 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
7c673cae
FG
1745 Put(0, "foo", "0v3");
1746 Put(1, "foo", "1v3");
1747
1748 {
1749 ManagedSnapshot s3(db_);
1750 ASSERT_EQ(3U, GetNumSnapshots());
1751 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
f67539c2 1752 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
7c673cae
FG
1753
1754 Put(0, "foo", "0v4");
1755 Put(1, "foo", "1v4");
1756 ASSERT_EQ("0v1", Get(0, "foo", s1));
1757 ASSERT_EQ("1v1", Get(1, "foo", s1));
1758 ASSERT_EQ("0v2", Get(0, "foo", s2));
1759 ASSERT_EQ("1v2", Get(1, "foo", s2));
1760 ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
1761 ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
1762 ASSERT_EQ("0v4", Get(0, "foo"));
1763 ASSERT_EQ("1v4", Get(1, "foo"));
1764 }
1765
1766 ASSERT_EQ(2U, GetNumSnapshots());
1767 ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
f67539c2 1768 ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
7c673cae
FG
1769 ASSERT_EQ("0v1", Get(0, "foo", s1));
1770 ASSERT_EQ("1v1", Get(1, "foo", s1));
1771 ASSERT_EQ("0v2", Get(0, "foo", s2));
1772 ASSERT_EQ("1v2", Get(1, "foo", s2));
1773 ASSERT_EQ("0v4", Get(0, "foo"));
1774 ASSERT_EQ("1v4", Get(1, "foo"));
1775
1776 db_->ReleaseSnapshot(s1);
1777 ASSERT_EQ("0v2", Get(0, "foo", s2));
1778 ASSERT_EQ("1v2", Get(1, "foo", s2));
1779 ASSERT_EQ("0v4", Get(0, "foo"));
1780 ASSERT_EQ("1v4", Get(1, "foo"));
1781 ASSERT_EQ(1U, GetNumSnapshots());
1782 ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
f67539c2 1783 ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber());
7c673cae
FG
1784
1785 db_->ReleaseSnapshot(s2);
1786 ASSERT_EQ(0U, GetNumSnapshots());
f67539c2 1787 ASSERT_EQ(GetSequenceOldestSnapshots(), 0);
7c673cae
FG
1788 ASSERT_EQ("0v4", Get(0, "foo"));
1789 ASSERT_EQ("1v4", Get(1, "foo"));
494da23a 1790 } while (ChangeOptions());
7c673cae
FG
1791}
1792
1793TEST_F(DBTest, HiddenValuesAreRemoved) {
1794 anon::OptionsOverride options_override;
1795 options_override.skip_policy = kSkipNoSnapshot;
1796 do {
1797 Options options = CurrentOptions(options_override);
1798 CreateAndReopenWithCF({"pikachu"}, options);
1799 Random rnd(301);
1800 FillLevels("a", "z", 1);
1801
20effc67 1802 std::string big = rnd.RandomString(50000);
7c673cae
FG
1803 Put(1, "foo", big);
1804 Put(1, "pastfoo", "v");
1805 const Snapshot* snapshot = db_->GetSnapshot();
1806 Put(1, "foo", "tiny");
1807 Put(1, "pastfoo2", "v2"); // Advance sequence number one more
1808
1809 ASSERT_OK(Flush(1));
1810 ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
1811
1812 ASSERT_EQ(big, Get(1, "foo", snapshot));
1813 ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
1814 db_->ReleaseSnapshot(snapshot);
1815 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
1816 Slice x("x");
1817 dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
1818 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
1819 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1820 ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
1821 dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
1822 ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
1823
1824 ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
1825 // ApproximateOffsetOf() is not yet implemented in plain table format,
1826 // which is used by Size().
7c673cae 1827 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
494da23a 1828 kSkipPlainTable));
7c673cae
FG
1829}
1830#endif // ROCKSDB_LITE
1831
1832TEST_F(DBTest, UnremovableSingleDelete) {
1833 // If we compact:
1834 //
1835 // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
1836 //
1837 // We do not want to end up with:
1838 //
1839 // Put(A, v1) Snapshot Put(A, v2)
1840 //
1841 // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
1842 // but not Put(A, v1), so Get(A) would return v1.
1843 anon::OptionsOverride options_override;
1844 options_override.skip_policy = kSkipNoSnapshot;
1845 do {
1846 Options options = CurrentOptions(options_override);
1847 options.disable_auto_compactions = true;
1848 CreateAndReopenWithCF({"pikachu"}, options);
1849
1850 Put(1, "foo", "first");
1851 const Snapshot* snapshot = db_->GetSnapshot();
1852 SingleDelete(1, "foo");
1853 Put(1, "foo", "second");
1854 ASSERT_OK(Flush(1));
1855
1856 ASSERT_EQ("first", Get(1, "foo", snapshot));
1857 ASSERT_EQ("second", Get(1, "foo"));
1858
1859 dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
1860 nullptr);
1861 ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
1862
1863 SingleDelete(1, "foo");
1864
1865 ASSERT_EQ("first", Get(1, "foo", snapshot));
1866 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
1867
1868 dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
1869 nullptr);
1870
1871 ASSERT_EQ("first", Get(1, "foo", snapshot));
1872 ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
1873 db_->ReleaseSnapshot(snapshot);
494da23a
TL
1874 // Skip FIFO and universal compaction beccause they do not apply to the test
1875 // case. Skip MergePut because single delete does not get removed when it
1876 // encounters a merge.
1877 } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
1878 kSkipMergePut));
7c673cae
FG
1879}
1880
1881#ifndef ROCKSDB_LITE
1882TEST_F(DBTest, DeletionMarkers1) {
1883 Options options = CurrentOptions();
7c673cae
FG
1884 CreateAndReopenWithCF({"pikachu"}, options);
1885 Put(1, "foo", "v1");
1886 ASSERT_OK(Flush(1));
1887 const int last = 2;
1888 MoveFilesToLevel(last, 1);
1889 // foo => v1 is now in last level
1890 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
1891
1892 // Place a table at level last-1 to prevent merging with preceding mutation
1893 Put(1, "a", "begin");
1894 Put(1, "z", "end");
1895 Flush(1);
1896 MoveFilesToLevel(last - 1, 1);
1897 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
1898 ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
1899
1900 Delete(1, "foo");
1901 Put(1, "foo", "v2");
1902 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
1903 ASSERT_OK(Flush(1)); // Moves to level last-2
1904 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
1905 Slice z("z");
1906 dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
1907 // DEL eliminated, but v1 remains because we aren't compacting that level
1908 // (DEL can be eliminated because v2 hides v1).
1909 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
1910 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
1911 // Merging last-1 w/ last, so we are the base level for "foo", so
1912 // DEL is removed. (as is v1).
1913 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
1914}
1915
1916TEST_F(DBTest, DeletionMarkers2) {
1917 Options options = CurrentOptions();
1918 CreateAndReopenWithCF({"pikachu"}, options);
1919 Put(1, "foo", "v1");
1920 ASSERT_OK(Flush(1));
1921 const int last = 2;
1922 MoveFilesToLevel(last, 1);
1923 // foo => v1 is now in last level
1924 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
1925
1926 // Place a table at level last-1 to prevent merging with preceding mutation
1927 Put(1, "a", "begin");
1928 Put(1, "z", "end");
1929 Flush(1);
1930 MoveFilesToLevel(last - 1, 1);
1931 ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
1932 ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
1933
1934 Delete(1, "foo");
1935 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
1936 ASSERT_OK(Flush(1)); // Moves to level last-2
1937 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
1938 dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
1939 // DEL kept: "last" file overlaps
1940 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
1941 dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
1942 // Merging last-1 w/ last, so we are the base level for "foo", so
1943 // DEL is removed. (as is v1).
1944 ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
1945}
1946
1947TEST_F(DBTest, OverlapInLevel0) {
1948 do {
1949 Options options = CurrentOptions();
1950 CreateAndReopenWithCF({"pikachu"}, options);
1951
1952 // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
1953 // 0.
1954 ASSERT_OK(Put(1, "100", "v100"));
1955 ASSERT_OK(Put(1, "999", "v999"));
1956 Flush(1);
1957 MoveFilesToLevel(2, 1);
1958 ASSERT_OK(Delete(1, "100"));
1959 ASSERT_OK(Delete(1, "999"));
1960 Flush(1);
1961 MoveFilesToLevel(1, 1);
1962 ASSERT_EQ("0,1,1", FilesPerLevel(1));
1963
1964 // Make files spanning the following ranges in level-0:
1965 // files[0] 200 .. 900
1966 // files[1] 300 .. 500
1967 // Note that files are sorted by smallest key.
1968 ASSERT_OK(Put(1, "300", "v300"));
1969 ASSERT_OK(Put(1, "500", "v500"));
1970 Flush(1);
1971 ASSERT_OK(Put(1, "200", "v200"));
1972 ASSERT_OK(Put(1, "600", "v600"));
1973 ASSERT_OK(Put(1, "900", "v900"));
1974 Flush(1);
1975 ASSERT_EQ("2,1,1", FilesPerLevel(1));
1976
1977 // Compact away the placeholder files we created initially
1978 dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
1979 dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
1980 ASSERT_EQ("2", FilesPerLevel(1));
1981
1982 // Do a memtable compaction. Before bug-fix, the compaction would
1983 // not detect the overlap with level-0 files and would incorrectly place
1984 // the deletion in a deeper level.
1985 ASSERT_OK(Delete(1, "600"));
1986 Flush(1);
1987 ASSERT_EQ("3", FilesPerLevel(1));
1988 ASSERT_EQ("NOT_FOUND", Get(1, "600"));
1989 } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
1990}
1991#endif // ROCKSDB_LITE
1992
1993TEST_F(DBTest, ComparatorCheck) {
1994 class NewComparator : public Comparator {
1995 public:
494da23a
TL
1996 const char* Name() const override { return "rocksdb.NewComparator"; }
1997 int Compare(const Slice& a, const Slice& b) const override {
7c673cae
FG
1998 return BytewiseComparator()->Compare(a, b);
1999 }
494da23a 2000 void FindShortestSeparator(std::string* s, const Slice& l) const override {
7c673cae
FG
2001 BytewiseComparator()->FindShortestSeparator(s, l);
2002 }
494da23a 2003 void FindShortSuccessor(std::string* key) const override {
7c673cae
FG
2004 BytewiseComparator()->FindShortSuccessor(key);
2005 }
2006 };
2007 Options new_options, options;
2008 NewComparator cmp;
2009 do {
2010 options = CurrentOptions();
2011 CreateAndReopenWithCF({"pikachu"}, options);
2012 new_options = CurrentOptions();
2013 new_options.comparator = &cmp;
2014 // only the non-default column family has non-matching comparator
2015 Status s = TryReopenWithColumnFamilies(
2016 {"default", "pikachu"}, std::vector<Options>({options, new_options}));
2017 ASSERT_TRUE(!s.ok());
2018 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
2019 << s.ToString();
2020 } while (ChangeCompactOptions());
2021}
2022
2023TEST_F(DBTest, CustomComparator) {
2024 class NumberComparator : public Comparator {
2025 public:
494da23a
TL
2026 const char* Name() const override { return "test.NumberComparator"; }
2027 int Compare(const Slice& a, const Slice& b) const override {
7c673cae
FG
2028 return ToNumber(a) - ToNumber(b);
2029 }
494da23a 2030 void FindShortestSeparator(std::string* s, const Slice& l) const override {
7c673cae
FG
2031 ToNumber(*s); // Check format
2032 ToNumber(l); // Check format
2033 }
494da23a 2034 void FindShortSuccessor(std::string* key) const override {
7c673cae
FG
2035 ToNumber(*key); // Check format
2036 }
2037
2038 private:
2039 static int ToNumber(const Slice& x) {
2040 // Check that there are no extra characters.
2041 EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
2042 << EscapeString(x);
2043 int val;
2044 char ignored;
2045 EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
2046 << EscapeString(x);
2047 return val;
2048 }
2049 };
2050 Options new_options;
2051 NumberComparator cmp;
2052 do {
2053 new_options = CurrentOptions();
2054 new_options.create_if_missing = true;
2055 new_options.comparator = &cmp;
2056 new_options.write_buffer_size = 4096; // Compact more often
2057 new_options.arena_block_size = 4096;
2058 new_options = CurrentOptions(new_options);
2059 DestroyAndReopen(new_options);
2060 CreateAndReopenWithCF({"pikachu"}, new_options);
2061 ASSERT_OK(Put(1, "[10]", "ten"));
2062 ASSERT_OK(Put(1, "[0x14]", "twenty"));
2063 for (int i = 0; i < 2; i++) {
2064 ASSERT_EQ("ten", Get(1, "[10]"));
2065 ASSERT_EQ("ten", Get(1, "[0xa]"));
2066 ASSERT_EQ("twenty", Get(1, "[20]"));
2067 ASSERT_EQ("twenty", Get(1, "[0x14]"));
2068 ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
2069 ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
2070 Compact(1, "[0]", "[9999]");
2071 }
2072
2073 for (int run = 0; run < 2; run++) {
2074 for (int i = 0; i < 1000; i++) {
2075 char buf[100];
2076 snprintf(buf, sizeof(buf), "[%d]", i * 10);
2077 ASSERT_OK(Put(1, buf, buf));
2078 }
2079 Compact(1, "[0]", "[1000000]");
2080 }
2081 } while (ChangeCompactOptions());
2082}
2083
2084TEST_F(DBTest, DBOpen_Options) {
2085 Options options = CurrentOptions();
11fdf7f2 2086 std::string dbname = test::PerThreadDBPath("db_options_test");
7c673cae
FG
2087 ASSERT_OK(DestroyDB(dbname, options));
2088
2089 // Does not exist, and create_if_missing == false: error
2090 DB* db = nullptr;
2091 options.create_if_missing = false;
2092 Status s = DB::Open(options, dbname, &db);
2093 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
2094 ASSERT_TRUE(db == nullptr);
2095
2096 // Does not exist, and create_if_missing == true: OK
2097 options.create_if_missing = true;
2098 s = DB::Open(options, dbname, &db);
2099 ASSERT_OK(s);
2100 ASSERT_TRUE(db != nullptr);
2101
2102 delete db;
2103 db = nullptr;
2104
2105 // Does exist, and error_if_exists == true: error
2106 options.create_if_missing = false;
2107 options.error_if_exists = true;
2108 s = DB::Open(options, dbname, &db);
2109 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
2110 ASSERT_TRUE(db == nullptr);
2111
2112 // Does exist, and error_if_exists == false: OK
2113 options.create_if_missing = true;
2114 options.error_if_exists = false;
2115 s = DB::Open(options, dbname, &db);
2116 ASSERT_OK(s);
2117 ASSERT_TRUE(db != nullptr);
2118
2119 delete db;
2120 db = nullptr;
2121}
2122
2123TEST_F(DBTest, DBOpen_Change_NumLevels) {
2124 Options options = CurrentOptions();
2125 options.create_if_missing = true;
2126 DestroyAndReopen(options);
2127 ASSERT_TRUE(db_ != nullptr);
2128 CreateAndReopenWithCF({"pikachu"}, options);
2129
2130 ASSERT_OK(Put(1, "a", "123"));
2131 ASSERT_OK(Put(1, "b", "234"));
2132 Flush(1);
2133 MoveFilesToLevel(3, 1);
2134 Close();
2135
2136 options.create_if_missing = false;
2137 options.num_levels = 2;
2138 Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
2139 ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
2140 ASSERT_TRUE(db_ == nullptr);
2141}
2142
2143TEST_F(DBTest, DestroyDBMetaDatabase) {
11fdf7f2 2144 std::string dbname = test::PerThreadDBPath("db_meta");
7c673cae
FG
2145 ASSERT_OK(env_->CreateDirIfMissing(dbname));
2146 std::string metadbname = MetaDatabaseName(dbname, 0);
2147 ASSERT_OK(env_->CreateDirIfMissing(metadbname));
2148 std::string metametadbname = MetaDatabaseName(metadbname, 0);
2149 ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
2150
2151 // Destroy previous versions if they exist. Using the long way.
2152 Options options = CurrentOptions();
2153 ASSERT_OK(DestroyDB(metametadbname, options));
2154 ASSERT_OK(DestroyDB(metadbname, options));
2155 ASSERT_OK(DestroyDB(dbname, options));
2156
2157 // Setup databases
2158 DB* db = nullptr;
2159 ASSERT_OK(DB::Open(options, dbname, &db));
2160 delete db;
2161 db = nullptr;
2162 ASSERT_OK(DB::Open(options, metadbname, &db));
2163 delete db;
2164 db = nullptr;
2165 ASSERT_OK(DB::Open(options, metametadbname, &db));
2166 delete db;
2167 db = nullptr;
2168
2169 // Delete databases
2170 ASSERT_OK(DestroyDB(dbname, options));
2171
2172 // Check if deletion worked.
2173 options.create_if_missing = false;
2174 ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
2175 ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
2176 ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
2177}
2178
2179#ifndef ROCKSDB_LITE
2180TEST_F(DBTest, SnapshotFiles) {
2181 do {
2182 Options options = CurrentOptions();
2183 options.write_buffer_size = 100000000; // Large write buffer
2184 CreateAndReopenWithCF({"pikachu"}, options);
2185
2186 Random rnd(301);
2187
2188 // Write 8MB (80 values, each 100K)
2189 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2190 std::vector<std::string> values;
2191 for (int i = 0; i < 80; i++) {
20effc67 2192 values.push_back(rnd.RandomString(100000));
7c673cae
FG
2193 ASSERT_OK(Put((i < 40), Key(i), values[i]));
2194 }
2195
2196 // assert that nothing makes it to disk yet.
2197 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2198
2199 // get a file snapshot
2200 uint64_t manifest_number = 0;
2201 uint64_t manifest_size = 0;
2202 std::vector<std::string> files;
2203 dbfull()->DisableFileDeletions();
2204 dbfull()->GetLiveFiles(files, &manifest_size);
2205
2206 // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF)
2207 ASSERT_EQ(files.size(), 5U);
2208
2209 uint64_t number = 0;
2210 FileType type;
2211
2212 // copy these files to a new snapshot directory
2213 std::string snapdir = dbname_ + ".snapdir/";
20effc67
TL
2214 if (env_->FileExists(snapdir).ok()) {
2215 ASSERT_OK(DestroyDir(env_, snapdir));
2216 }
2217 ASSERT_OK(env_->CreateDir(snapdir));
7c673cae
FG
2218
2219 for (size_t i = 0; i < files.size(); i++) {
2220 // our clients require that GetLiveFiles returns
2221 // files with "/" as first character!
2222 ASSERT_EQ(files[i][0], '/');
2223 std::string src = dbname_ + files[i];
2224 std::string dest = snapdir + files[i];
2225
2226 uint64_t size;
2227 ASSERT_OK(env_->GetFileSize(src, &size));
2228
2229 // record the number and the size of the
2230 // latest manifest file
2231 if (ParseFileName(files[i].substr(1), &number, &type)) {
2232 if (type == kDescriptorFile) {
2233 if (number > manifest_number) {
2234 manifest_number = number;
2235 ASSERT_GE(size, manifest_size);
2236 size = manifest_size; // copy only valid MANIFEST data
2237 }
2238 }
2239 }
2240 CopyFile(src, dest, size);
2241 }
2242
2243 // release file snapshot
2244 dbfull()->DisableFileDeletions();
2245 // overwrite one key, this key should not appear in the snapshot
2246 std::vector<std::string> extras;
2247 for (unsigned int i = 0; i < 1; i++) {
20effc67 2248 extras.push_back(rnd.RandomString(100000));
7c673cae
FG
2249 ASSERT_OK(Put(0, Key(i), extras[i]));
2250 }
2251
2252 // verify that data in the snapshot are correct
2253 std::vector<ColumnFamilyDescriptor> column_families;
2254 column_families.emplace_back("default", ColumnFamilyOptions());
2255 column_families.emplace_back("pikachu", ColumnFamilyOptions());
2256 std::vector<ColumnFamilyHandle*> cf_handles;
2257 DB* snapdb;
2258 DBOptions opts;
2259 opts.env = env_;
2260 opts.create_if_missing = false;
2261 Status stat =
2262 DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
2263 ASSERT_OK(stat);
2264
2265 ReadOptions roptions;
2266 std::string val;
2267 for (unsigned int i = 0; i < 80; i++) {
2268 stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
2269 ASSERT_EQ(values[i].compare(val), 0);
2270 }
2271 for (auto cfh : cf_handles) {
2272 delete cfh;
2273 }
2274 delete snapdb;
2275
2276 // look at the new live files after we added an 'extra' key
2277 // and after we took the first snapshot.
2278 uint64_t new_manifest_number = 0;
2279 uint64_t new_manifest_size = 0;
2280 std::vector<std::string> newfiles;
2281 dbfull()->DisableFileDeletions();
2282 dbfull()->GetLiveFiles(newfiles, &new_manifest_size);
2283
2284 // find the new manifest file. assert that this manifest file is
2285 // the same one as in the previous snapshot. But its size should be
2286 // larger because we added an extra key after taking the
2287 // previous shapshot.
2288 for (size_t i = 0; i < newfiles.size(); i++) {
2289 std::string src = dbname_ + "/" + newfiles[i];
2290 // record the lognumber and the size of the
2291 // latest manifest file
2292 if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
2293 if (type == kDescriptorFile) {
2294 if (number > new_manifest_number) {
2295 uint64_t size;
2296 new_manifest_number = number;
2297 ASSERT_OK(env_->GetFileSize(src, &size));
2298 ASSERT_GE(size, new_manifest_size);
2299 }
2300 }
2301 }
2302 }
2303 ASSERT_EQ(manifest_number, new_manifest_number);
2304 ASSERT_GT(new_manifest_size, manifest_size);
2305
2306 // release file snapshot
2307 dbfull()->DisableFileDeletions();
2308 } while (ChangeCompactOptions());
2309}
f67539c2
TL
2310
2311TEST_F(DBTest, ReadonlyDBGetLiveManifestSize) {
2312 do {
2313 Options options = CurrentOptions();
2314 options.level0_file_num_compaction_trigger = 2;
2315 DestroyAndReopen(options);
2316
2317 ASSERT_OK(Put("foo", "bar"));
2318 ASSERT_OK(Flush());
2319 ASSERT_OK(Put("foo", "bar"));
2320 ASSERT_OK(Flush());
2321 ASSERT_OK(dbfull()->TEST_WaitForCompact());
2322
2323 Close();
2324 ASSERT_OK(ReadOnlyReopen(options));
2325
2326 uint64_t manifest_size = 0;
2327 std::vector<std::string> files;
2328 dbfull()->GetLiveFiles(files, &manifest_size);
2329
2330 for (const std::string& f : files) {
2331 uint64_t number = 0;
2332 FileType type;
2333 if (ParseFileName(f.substr(1), &number, &type)) {
2334 if (type == kDescriptorFile) {
2335 uint64_t size_on_disk;
2336 env_->GetFileSize(dbname_ + "/" + f, &size_on_disk);
2337 ASSERT_EQ(manifest_size, size_on_disk);
2338 break;
2339 }
2340 }
2341 }
2342 Close();
2343 } while (ChangeCompactOptions());
2344}
20effc67
TL
2345
2346TEST_F(DBTest, GetLiveBlobFiles) {
2347 VersionSet* const versions = dbfull()->TEST_GetVersionSet();
2348 assert(versions);
2349 assert(versions->GetColumnFamilySet());
2350
2351 ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
2352 assert(cfd);
2353
2354 Version* const version = cfd->current();
2355 assert(version);
2356
2357 VersionStorageInfo* const storage_info = version->storage_info();
2358 assert(storage_info);
2359
2360 // Add a live blob file.
2361 constexpr uint64_t blob_file_number = 234;
2362 constexpr uint64_t total_blob_count = 555;
2363 constexpr uint64_t total_blob_bytes = 66666;
2364 constexpr char checksum_method[] = "CRC32";
2365 constexpr char checksum_value[] = "3d87ff57";
2366
2367 auto shared_meta = SharedBlobFileMetaData::Create(
2368 blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
2369 checksum_value);
2370
2371 constexpr uint64_t garbage_blob_count = 0;
2372 constexpr uint64_t garbage_blob_bytes = 0;
2373
2374 auto meta = BlobFileMetaData::Create(std::move(shared_meta),
2375 BlobFileMetaData::LinkedSsts(),
2376 garbage_blob_count, garbage_blob_bytes);
2377
2378 storage_info->AddBlobFile(std::move(meta));
2379
2380 // Make sure it appears in the results returned by GetLiveFiles.
2381 uint64_t manifest_size = 0;
2382 std::vector<std::string> files;
2383 ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size));
2384
2385 ASSERT_FALSE(files.empty());
2386 ASSERT_EQ(files[0], BlobFileName("", blob_file_number));
2387}
7c673cae
FG
2388#endif
2389
2390TEST_F(DBTest, PurgeInfoLogs) {
2391 Options options = CurrentOptions();
2392 options.keep_log_file_num = 5;
2393 options.create_if_missing = true;
20effc67 2394 options.env = env_;
7c673cae
FG
2395 for (int mode = 0; mode <= 1; mode++) {
2396 if (mode == 1) {
2397 options.db_log_dir = dbname_ + "_logs";
2398 env_->CreateDirIfMissing(options.db_log_dir);
2399 } else {
2400 options.db_log_dir = "";
2401 }
2402 for (int i = 0; i < 8; i++) {
2403 Reopen(options);
2404 }
2405
2406 std::vector<std::string> files;
2407 env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
2408 &files);
2409 int info_log_count = 0;
2410 for (std::string file : files) {
2411 if (file.find("LOG") != std::string::npos) {
2412 info_log_count++;
2413 }
2414 }
2415 ASSERT_EQ(5, info_log_count);
2416
2417 Destroy(options);
2418 // For mode (1), test DestroyDB() to delete all the logs under DB dir.
2419 // For mode (2), no info log file should have been put under DB dir.
2420 std::vector<std::string> db_files;
2421 env_->GetChildren(dbname_, &db_files);
2422 for (std::string file : db_files) {
2423 ASSERT_TRUE(file.find("LOG") == std::string::npos);
2424 }
2425
2426 if (mode == 1) {
2427 // Cleaning up
2428 env_->GetChildren(options.db_log_dir, &files);
2429 for (std::string file : files) {
2430 env_->DeleteFile(options.db_log_dir + "/" + file);
2431 }
2432 env_->DeleteDir(options.db_log_dir);
2433 }
2434 }
2435}
2436
2437#ifndef ROCKSDB_LITE
2438// Multi-threaded test:
2439namespace {
2440
2441static const int kColumnFamilies = 10;
2442static const int kNumThreads = 10;
2443static const int kTestSeconds = 10;
2444static const int kNumKeys = 1000;
2445
2446struct MTState {
2447 DBTest* test;
2448 std::atomic<bool> stop;
2449 std::atomic<int> counter[kNumThreads];
2450 std::atomic<bool> thread_done[kNumThreads];
2451};
2452
2453struct MTThread {
2454 MTState* state;
2455 int id;
f67539c2 2456 bool multiget_batched;
7c673cae
FG
2457};
2458
2459static void MTThreadBody(void* arg) {
2460 MTThread* t = reinterpret_cast<MTThread*>(arg);
2461 int id = t->id;
2462 DB* db = t->state->test->db_;
2463 int counter = 0;
2464 fprintf(stderr, "... starting thread %d\n", id);
2465 Random rnd(1000 + id);
2466 char valbuf[1500];
2467 while (t->state->stop.load(std::memory_order_acquire) == false) {
2468 t->state->counter[id].store(counter, std::memory_order_release);
2469
2470 int key = rnd.Uniform(kNumKeys);
2471 char keybuf[20];
2472 snprintf(keybuf, sizeof(keybuf), "%016d", key);
2473
2474 if (rnd.OneIn(2)) {
2475 // Write values of the form <key, my id, counter, cf, unique_id>.
2476 // into each of the CFs
2477 // We add some padding for force compactions.
2478 int unique_id = rnd.Uniform(1000000);
2479
2480 // Half of the time directly use WriteBatch. Half of the time use
2481 // WriteBatchWithIndex.
2482 if (rnd.OneIn(2)) {
2483 WriteBatch batch;
2484 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2485 snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
2486 static_cast<int>(counter), cf, unique_id);
2487 batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
2488 }
2489 ASSERT_OK(db->Write(WriteOptions(), &batch));
2490 } else {
2491 WriteBatchWithIndex batch(db->GetOptions().comparator);
2492 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2493 snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
2494 static_cast<int>(counter), cf, unique_id);
2495 batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
2496 }
2497 ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
2498 }
2499 } else {
2500 // Read a value and verify that it matches the pattern written above
2501 // and that writes to all column families were atomic (unique_id is the
2502 // same)
2503 std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
2504 std::vector<std::string> values;
f67539c2
TL
2505 std::vector<Status> statuses;
2506 if (!t->multiget_batched) {
2507 statuses = db->MultiGet(ReadOptions(), t->state->test->handles_, keys,
2508 &values);
2509 } else {
2510 std::vector<PinnableSlice> pin_values(keys.size());
2511 statuses.resize(keys.size());
2512 const Snapshot* snapshot = db->GetSnapshot();
2513 ReadOptions ro;
2514 ro.snapshot = snapshot;
2515 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2516 db->MultiGet(ro, t->state->test->handles_[cf], 1, &keys[cf],
2517 &pin_values[cf], &statuses[cf]);
2518 }
2519 db->ReleaseSnapshot(snapshot);
2520 values.resize(keys.size());
2521 for (int cf = 0; cf < kColumnFamilies; ++cf) {
2522 if (statuses[cf].ok()) {
2523 values[cf].assign(pin_values[cf].data(), pin_values[cf].size());
2524 }
2525 }
2526 }
7c673cae
FG
2527 Status s = statuses[0];
2528 // all statuses have to be the same
2529 for (size_t i = 1; i < statuses.size(); ++i) {
2530 // they are either both ok or both not-found
2531 ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
2532 (s.IsNotFound() && statuses[i].IsNotFound()));
2533 }
2534 if (s.IsNotFound()) {
2535 // Key has not yet been written
2536 } else {
2537 // Check that the writer thread counter is >= the counter in the value
2538 ASSERT_OK(s);
2539 int unique_id = -1;
2540 for (int i = 0; i < kColumnFamilies; ++i) {
2541 int k, w, c, cf, u;
2542 ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c,
2543 &cf, &u))
2544 << values[i];
2545 ASSERT_EQ(k, key);
2546 ASSERT_GE(w, 0);
2547 ASSERT_LT(w, kNumThreads);
2548 ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
2549 ASSERT_EQ(cf, i);
2550 if (i == 0) {
2551 unique_id = u;
2552 } else {
2553 // this checks that updates across column families happened
2554 // atomically -- all unique ids are the same
2555 ASSERT_EQ(u, unique_id);
2556 }
2557 }
2558 }
2559 }
2560 counter++;
2561 }
2562 t->state->thread_done[id].store(true, std::memory_order_release);
2563 fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
2564}
2565
2566} // namespace
2567
f67539c2
TL
2568class MultiThreadedDBTest
2569 : public DBTest,
2570 public ::testing::WithParamInterface<std::tuple<int, bool>> {
7c673cae 2571 public:
f67539c2
TL
2572 void SetUp() override {
2573 std::tie(option_config_, multiget_batched_) = GetParam();
2574 }
7c673cae
FG
2575
2576 static std::vector<int> GenerateOptionConfigs() {
2577 std::vector<int> optionConfigs;
2578 for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
494da23a 2579 optionConfigs.push_back(optionConfig);
7c673cae
FG
2580 }
2581 return optionConfigs;
2582 }
f67539c2
TL
2583
2584 bool multiget_batched_;
7c673cae
FG
2585};
2586
2587TEST_P(MultiThreadedDBTest, MultiThreaded) {
f67539c2 2588 if (option_config_ == kPipelinedWrite) return;
7c673cae
FG
2589 anon::OptionsOverride options_override;
2590 options_override.skip_policy = kSkipNoSnapshot;
2591 Options options = CurrentOptions(options_override);
2592 std::vector<std::string> cfs;
2593 for (int i = 1; i < kColumnFamilies; ++i) {
2594 cfs.push_back(ToString(i));
2595 }
2596 Reopen(options);
2597 CreateAndReopenWithCF(cfs, options);
2598 // Initialize state
2599 MTState mt;
2600 mt.test = this;
2601 mt.stop.store(false, std::memory_order_release);
2602 for (int id = 0; id < kNumThreads; id++) {
2603 mt.counter[id].store(0, std::memory_order_release);
2604 mt.thread_done[id].store(false, std::memory_order_release);
2605 }
2606
2607 // Start threads
2608 MTThread thread[kNumThreads];
2609 for (int id = 0; id < kNumThreads; id++) {
2610 thread[id].state = &mt;
2611 thread[id].id = id;
f67539c2 2612 thread[id].multiget_batched = multiget_batched_;
7c673cae
FG
2613 env_->StartThread(MTThreadBody, &thread[id]);
2614 }
2615
2616 // Let them run for a while
2617 env_->SleepForMicroseconds(kTestSeconds * 1000000);
2618
2619 // Stop the threads and wait for them to finish
2620 mt.stop.store(true, std::memory_order_release);
2621 for (int id = 0; id < kNumThreads; id++) {
2622 while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
2623 env_->SleepForMicroseconds(100000);
2624 }
2625 }
2626}
2627
2628INSTANTIATE_TEST_CASE_P(
2629 MultiThreaded, MultiThreadedDBTest,
f67539c2
TL
2630 ::testing::Combine(
2631 ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()),
2632 ::testing::Bool()));
7c673cae
FG
2633#endif // ROCKSDB_LITE
2634
2635// Group commit test:
494da23a
TL
2636#if !defined(TRAVIS) && !defined(OS_WIN)
2637// Disable this test temporarily on Travis and appveyor as it fails
2638// intermittently. Github issue: #4151
7c673cae
FG
2639namespace {
2640
2641static const int kGCNumThreads = 4;
2642static const int kGCNumKeys = 1000;
2643
2644struct GCThread {
2645 DB* db;
2646 int id;
2647 std::atomic<bool> done;
2648};
2649
2650static void GCThreadBody(void* arg) {
2651 GCThread* t = reinterpret_cast<GCThread*>(arg);
2652 int id = t->id;
2653 DB* db = t->db;
2654 WriteOptions wo;
2655
2656 for (int i = 0; i < kGCNumKeys; ++i) {
2657 std::string kv(ToString(i + id * kGCNumKeys));
2658 ASSERT_OK(db->Put(wo, kv, kv));
2659 }
2660 t->done = true;
2661}
2662
2663} // namespace
2664
2665TEST_F(DBTest, GroupCommitTest) {
2666 do {
2667 Options options = CurrentOptions();
2668 options.env = env_;
f67539c2 2669 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
2670 Reopen(options);
2671
f67539c2 2672 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
11fdf7f2 2673 {{"WriteThread::JoinBatchGroup:BeganWaiting",
494da23a 2674 "DBImpl::WriteImpl:BeforeLeaderEnters"},
f67539c2 2675 {"WriteThread::AwaitState:BlockingWaiting",
494da23a 2676 "WriteThread::EnterAsBatchGroupLeader:End"}});
f67539c2 2677 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2 2678
7c673cae
FG
2679 // Start threads
2680 GCThread thread[kGCNumThreads];
2681 for (int id = 0; id < kGCNumThreads; id++) {
2682 thread[id].id = id;
2683 thread[id].db = db_;
2684 thread[id].done = false;
2685 env_->StartThread(GCThreadBody, &thread[id]);
2686 }
11fdf7f2 2687 env_->WaitForJoin();
7c673cae
FG
2688
2689 ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
2690
2691 std::vector<std::string> expected_db;
2692 for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
2693 expected_db.push_back(ToString(i));
2694 }
2695 std::sort(expected_db.begin(), expected_db.end());
2696
2697 Iterator* itr = db_->NewIterator(ReadOptions());
2698 itr->SeekToFirst();
2699 for (auto x : expected_db) {
2700 ASSERT_TRUE(itr->Valid());
2701 ASSERT_EQ(itr->key().ToString(), x);
2702 ASSERT_EQ(itr->value().ToString(), x);
2703 itr->Next();
2704 }
2705 ASSERT_TRUE(!itr->Valid());
2706 delete itr;
2707
2708 HistogramData hist_data;
2709 options.statistics->histogramData(DB_WRITE, &hist_data);
2710 ASSERT_GT(hist_data.average, 0.0);
2711 } while (ChangeOptions(kSkipNoSeekToLast));
2712}
11fdf7f2 2713#endif // TRAVIS
7c673cae
FG
2714
2715namespace {
2716typedef std::map<std::string, std::string> KVMap;
2717}
2718
2719class ModelDB : public DB {
2720 public:
2721 class ModelSnapshot : public Snapshot {
2722 public:
2723 KVMap map_;
2724
494da23a 2725 SequenceNumber GetSequenceNumber() const override {
7c673cae
FG
2726 // no need to call this
2727 assert(false);
2728 return 0;
2729 }
2730 };
2731
2732 explicit ModelDB(const Options& options) : options_(options) {}
2733 using DB::Put;
494da23a
TL
2734 Status Put(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
2735 const Slice& v) override {
7c673cae
FG
2736 WriteBatch batch;
2737 batch.Put(cf, k, v);
2738 return Write(o, &batch);
2739 }
11fdf7f2 2740 using DB::Close;
494da23a 2741 Status Close() override { return Status::OK(); }
7c673cae 2742 using DB::Delete;
494da23a
TL
2743 Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
2744 const Slice& key) override {
7c673cae
FG
2745 WriteBatch batch;
2746 batch.Delete(cf, key);
2747 return Write(o, &batch);
2748 }
2749 using DB::SingleDelete;
494da23a
TL
2750 Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
2751 const Slice& key) override {
7c673cae
FG
2752 WriteBatch batch;
2753 batch.SingleDelete(cf, key);
2754 return Write(o, &batch);
2755 }
2756 using DB::Merge;
494da23a
TL
2757 Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
2758 const Slice& v) override {
7c673cae
FG
2759 WriteBatch batch;
2760 batch.Merge(cf, k, v);
2761 return Write(o, &batch);
2762 }
2763 using DB::Get;
494da23a
TL
2764 Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/,
2765 const Slice& key, PinnableSlice* /*value*/) override {
7c673cae
FG
2766 return Status::NotSupported(key);
2767 }
2768
f67539c2
TL
2769 using DB::GetMergeOperands;
2770 virtual Status GetMergeOperands(
2771 const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/,
2772 const Slice& key, PinnableSlice* /*slice*/,
2773 GetMergeOperandsOptions* /*merge_operands_options*/,
2774 int* /*number_of_operands*/) override {
2775 return Status::NotSupported(key);
2776 }
2777
7c673cae 2778 using DB::MultiGet;
494da23a 2779 std::vector<Status> MultiGet(
11fdf7f2
TL
2780 const ReadOptions& /*options*/,
2781 const std::vector<ColumnFamilyHandle*>& /*column_family*/,
7c673cae 2782 const std::vector<Slice>& keys,
11fdf7f2 2783 std::vector<std::string>* /*values*/) override {
7c673cae
FG
2784 std::vector<Status> s(keys.size(),
2785 Status::NotSupported("Not implemented."));
2786 return s;
2787 }
2788
2789#ifndef ROCKSDB_LITE
2790 using DB::IngestExternalFile;
494da23a 2791 Status IngestExternalFile(
11fdf7f2
TL
2792 ColumnFamilyHandle* /*column_family*/,
2793 const std::vector<std::string>& /*external_files*/,
2794 const IngestExternalFileOptions& /*options*/) override {
2795 return Status::NotSupported("Not implemented.");
2796 }
2797
494da23a
TL
2798 using DB::IngestExternalFiles;
2799 Status IngestExternalFiles(
2800 const std::vector<IngestExternalFileArg>& /*args*/) override {
2801 return Status::NotSupported("Not implemented");
2802 }
2803
f67539c2
TL
2804 using DB::CreateColumnFamilyWithImport;
2805 virtual Status CreateColumnFamilyWithImport(
2806 const ColumnFamilyOptions& /*options*/,
2807 const std::string& /*column_family_name*/,
2808 const ImportColumnFamilyOptions& /*import_options*/,
2809 const ExportImportFilesMetaData& /*metadata*/,
2810 ColumnFamilyHandle** /*handle*/) override {
2811 return Status::NotSupported("Not implemented.");
2812 }
2813
2814 using DB::VerifyChecksum;
2815 Status VerifyChecksum(const ReadOptions&) override {
7c673cae
FG
2816 return Status::NotSupported("Not implemented.");
2817 }
2818
2819 using DB::GetPropertiesOfAllTables;
494da23a 2820 Status GetPropertiesOfAllTables(
11fdf7f2
TL
2821 ColumnFamilyHandle* /*column_family*/,
2822 TablePropertiesCollection* /*props*/) override {
7c673cae
FG
2823 return Status();
2824 }
2825
494da23a 2826 Status GetPropertiesOfTablesInRange(
11fdf7f2
TL
2827 ColumnFamilyHandle* /*column_family*/, const Range* /*range*/,
2828 std::size_t /*n*/, TablePropertiesCollection* /*props*/) override {
7c673cae
FG
2829 return Status();
2830 }
2831#endif // ROCKSDB_LITE
2832
2833 using DB::KeyMayExist;
494da23a
TL
2834 bool KeyMayExist(const ReadOptions& /*options*/,
2835 ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
2836 std::string* /*value*/,
2837 bool* value_found = nullptr) override {
7c673cae
FG
2838 if (value_found != nullptr) {
2839 *value_found = false;
2840 }
2841 return true; // Not Supported directly
2842 }
2843 using DB::NewIterator;
494da23a
TL
2844 Iterator* NewIterator(const ReadOptions& options,
2845 ColumnFamilyHandle* /*column_family*/) override {
7c673cae
FG
2846 if (options.snapshot == nullptr) {
2847 KVMap* saved = new KVMap;
2848 *saved = map_;
2849 return new ModelIter(saved, true);
2850 } else {
2851 const KVMap* snapshot_state =
2852 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
2853 return new ModelIter(snapshot_state, false);
2854 }
2855 }
494da23a
TL
2856 Status NewIterators(const ReadOptions& /*options*/,
2857 const std::vector<ColumnFamilyHandle*>& /*column_family*/,
2858 std::vector<Iterator*>* /*iterators*/) override {
7c673cae
FG
2859 return Status::NotSupported("Not supported yet");
2860 }
494da23a 2861 const Snapshot* GetSnapshot() override {
7c673cae
FG
2862 ModelSnapshot* snapshot = new ModelSnapshot;
2863 snapshot->map_ = map_;
2864 return snapshot;
2865 }
2866
494da23a 2867 void ReleaseSnapshot(const Snapshot* snapshot) override {
7c673cae
FG
2868 delete reinterpret_cast<const ModelSnapshot*>(snapshot);
2869 }
2870
494da23a 2871 Status Write(const WriteOptions& /*options*/, WriteBatch* batch) override {
7c673cae
FG
2872 class Handler : public WriteBatch::Handler {
2873 public:
2874 KVMap* map_;
494da23a 2875 void Put(const Slice& key, const Slice& value) override {
7c673cae
FG
2876 (*map_)[key.ToString()] = value.ToString();
2877 }
494da23a 2878 void Merge(const Slice& /*key*/, const Slice& /*value*/) override {
7c673cae
FG
2879 // ignore merge for now
2880 // (*map_)[key.ToString()] = value.ToString();
2881 }
494da23a 2882 void Delete(const Slice& key) override { map_->erase(key.ToString()); }
7c673cae
FG
2883 };
2884 Handler handler;
2885 handler.map_ = &map_;
2886 return batch->Iterate(&handler);
2887 }
2888
2889 using DB::GetProperty;
494da23a
TL
2890 bool GetProperty(ColumnFamilyHandle* /*column_family*/,
2891 const Slice& /*property*/, std::string* /*value*/) override {
7c673cae
FG
2892 return false;
2893 }
2894 using DB::GetIntProperty;
494da23a
TL
2895 bool GetIntProperty(ColumnFamilyHandle* /*column_family*/,
2896 const Slice& /*property*/, uint64_t* /*value*/) override {
7c673cae
FG
2897 return false;
2898 }
2899 using DB::GetMapProperty;
494da23a
TL
2900 bool GetMapProperty(ColumnFamilyHandle* /*column_family*/,
2901 const Slice& /*property*/,
2902 std::map<std::string, std::string>* /*value*/) override {
7c673cae
FG
2903 return false;
2904 }
2905 using DB::GetAggregatedIntProperty;
494da23a
TL
2906 bool GetAggregatedIntProperty(const Slice& /*property*/,
2907 uint64_t* /*value*/) override {
7c673cae
FG
2908 return false;
2909 }
2910 using DB::GetApproximateSizes;
f67539c2
TL
2911 Status GetApproximateSizes(const SizeApproximationOptions& /*options*/,
2912 ColumnFamilyHandle* /*column_family*/,
2913 const Range* /*range*/, int n,
2914 uint64_t* sizes) override {
7c673cae
FG
2915 for (int i = 0; i < n; i++) {
2916 sizes[i] = 0;
2917 }
f67539c2 2918 return Status::OK();
7c673cae
FG
2919 }
2920 using DB::GetApproximateMemTableStats;
494da23a
TL
2921 void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/,
2922 const Range& /*range*/,
2923 uint64_t* const count,
2924 uint64_t* const size) override {
7c673cae
FG
2925 *count = 0;
2926 *size = 0;
2927 }
2928 using DB::CompactRange;
494da23a
TL
2929 Status CompactRange(const CompactRangeOptions& /*options*/,
2930 ColumnFamilyHandle* /*column_family*/,
2931 const Slice* /*start*/, const Slice* /*end*/) override {
7c673cae
FG
2932 return Status::NotSupported("Not supported operation.");
2933 }
2934
494da23a 2935 Status SetDBOptions(
11fdf7f2 2936 const std::unordered_map<std::string, std::string>& /*new_options*/)
7c673cae
FG
2937 override {
2938 return Status::NotSupported("Not supported operation.");
2939 }
2940
2941 using DB::CompactFiles;
494da23a 2942 Status CompactFiles(
11fdf7f2
TL
2943 const CompactionOptions& /*compact_options*/,
2944 ColumnFamilyHandle* /*column_family*/,
2945 const std::vector<std::string>& /*input_file_names*/,
2946 const int /*output_level*/, const int /*output_path_id*/ = -1,
494da23a
TL
2947 std::vector<std::string>* const /*output_file_names*/ = nullptr,
2948 CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
7c673cae
FG
2949 return Status::NotSupported("Not supported operation.");
2950 }
2951
2952 Status PauseBackgroundWork() override {
2953 return Status::NotSupported("Not supported operation.");
2954 }
2955
2956 Status ContinueBackgroundWork() override {
2957 return Status::NotSupported("Not supported operation.");
2958 }
2959
2960 Status EnableAutoCompaction(
11fdf7f2
TL
2961 const std::vector<ColumnFamilyHandle*>& /*column_family_handles*/)
2962 override {
7c673cae
FG
2963 return Status::NotSupported("Not supported operation.");
2964 }
2965
f67539c2
TL
2966 void EnableManualCompaction() override { return; }
2967
2968 void DisableManualCompaction() override { return; }
2969
7c673cae 2970 using DB::NumberLevels;
494da23a 2971 int NumberLevels(ColumnFamilyHandle* /*column_family*/) override { return 1; }
7c673cae
FG
2972
2973 using DB::MaxMemCompactionLevel;
494da23a 2974 int MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) override {
7c673cae
FG
2975 return 1;
2976 }
2977
2978 using DB::Level0StopWriteTrigger;
494da23a 2979 int Level0StopWriteTrigger(ColumnFamilyHandle* /*column_family*/) override {
7c673cae
FG
2980 return -1;
2981 }
2982
494da23a 2983 const std::string& GetName() const override { return name_; }
7c673cae 2984
494da23a 2985 Env* GetEnv() const override { return nullptr; }
7c673cae
FG
2986
2987 using DB::GetOptions;
494da23a 2988 Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override {
7c673cae
FG
2989 return options_;
2990 }
2991
2992 using DB::GetDBOptions;
494da23a 2993 DBOptions GetDBOptions() const override { return options_; }
7c673cae
FG
2994
2995 using DB::Flush;
f67539c2 2996 Status Flush(const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
494da23a 2997 ColumnFamilyHandle* /*column_family*/) override {
7c673cae
FG
2998 Status ret;
2999 return ret;
3000 }
494da23a 3001 Status Flush(
f67539c2 3002 const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
494da23a
TL
3003 const std::vector<ColumnFamilyHandle*>& /*column_families*/) override {
3004 return Status::OK();
3005 }
7c673cae 3006
494da23a 3007 Status SyncWAL() override { return Status::OK(); }
7c673cae 3008
494da23a 3009 Status DisableFileDeletions() override { return Status::OK(); }
7c673cae 3010
494da23a 3011 Status EnableFileDeletions(bool /*force*/) override { return Status::OK(); }
20effc67
TL
3012#ifndef ROCKSDB_LITE
3013
494da23a
TL
3014 Status GetLiveFiles(std::vector<std::string>&, uint64_t* /*size*/,
3015 bool /*flush_memtable*/ = true) override {
7c673cae
FG
3016 return Status::OK();
3017 }
3018
20effc67
TL
3019 Status GetLiveFilesChecksumInfo(
3020 FileChecksumList* /*checksum_list*/) override {
3021 return Status::OK();
3022 }
3023
494da23a 3024 Status GetSortedWalFiles(VectorLogPtr& /*files*/) override {
7c673cae
FG
3025 return Status::OK();
3026 }
3027
f67539c2
TL
3028 Status GetCurrentWalFile(
3029 std::unique_ptr<LogFile>* /*current_log_file*/) override {
3030 return Status::OK();
3031 }
3032
3033 virtual Status GetCreationTimeOfOldestFile(
3034 uint64_t* /*creation_time*/) override {
3035 return Status::NotSupported();
3036 }
3037
494da23a 3038 Status DeleteFile(std::string /*name*/) override { return Status::OK(); }
7c673cae 3039
494da23a 3040 Status GetUpdatesSince(
f67539c2
TL
3041 ROCKSDB_NAMESPACE::SequenceNumber,
3042 std::unique_ptr<ROCKSDB_NAMESPACE::TransactionLogIterator>*,
11fdf7f2 3043 const TransactionLogIterator::ReadOptions& /*read_options*/ =
7c673cae
FG
3044 TransactionLogIterator::ReadOptions()) override {
3045 return Status::NotSupported("Not supported in Model DB");
3046 }
3047
494da23a
TL
3048 void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
3049 ColumnFamilyMetaData* /*metadata*/) override {}
7c673cae
FG
3050#endif // ROCKSDB_LITE
3051
494da23a 3052 Status GetDbIdentity(std::string& /*identity*/) const override {
7c673cae
FG
3053 return Status::OK();
3054 }
3055
20effc67
TL
3056 Status GetDbSessionId(std::string& /*session_id*/) const override {
3057 return Status::OK();
3058 }
3059
494da23a 3060 SequenceNumber GetLatestSequenceNumber() const override { return 0; }
7c673cae 3061
494da23a 3062 bool SetPreserveDeletesSequenceNumber(SequenceNumber /*seqnum*/) override {
11fdf7f2
TL
3063 return true;
3064 }
3065
494da23a 3066 ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; }
7c673cae
FG
3067
3068 private:
3069 class ModelIter : public Iterator {
3070 public:
3071 ModelIter(const KVMap* map, bool owned)
3072 : map_(map), owned_(owned), iter_(map_->end()) {}
494da23a 3073 ~ModelIter() override {
7c673cae
FG
3074 if (owned_) delete map_;
3075 }
494da23a
TL
3076 bool Valid() const override { return iter_ != map_->end(); }
3077 void SeekToFirst() override { iter_ = map_->begin(); }
3078 void SeekToLast() override {
7c673cae
FG
3079 if (map_->empty()) {
3080 iter_ = map_->end();
3081 } else {
3082 iter_ = map_->find(map_->rbegin()->first);
3083 }
3084 }
494da23a 3085 void Seek(const Slice& k) override {
7c673cae
FG
3086 iter_ = map_->lower_bound(k.ToString());
3087 }
494da23a 3088 void SeekForPrev(const Slice& k) override {
7c673cae
FG
3089 iter_ = map_->upper_bound(k.ToString());
3090 Prev();
3091 }
494da23a
TL
3092 void Next() override { ++iter_; }
3093 void Prev() override {
7c673cae
FG
3094 if (iter_ == map_->begin()) {
3095 iter_ = map_->end();
3096 return;
3097 }
3098 --iter_;
3099 }
3100
494da23a
TL
3101 Slice key() const override { return iter_->first; }
3102 Slice value() const override { return iter_->second; }
3103 Status status() const override { return Status::OK(); }
7c673cae
FG
3104
3105 private:
3106 const KVMap* const map_;
3107 const bool owned_; // Do we own map_
3108 KVMap::const_iterator iter_;
3109 };
3110 const Options options_;
3111 KVMap map_;
3112 std::string name_ = "";
3113};
3114
11fdf7f2 3115#ifndef ROCKSDB_VALGRIND_RUN
7c673cae
FG
3116static std::string RandomKey(Random* rnd, int minimum = 0) {
3117 int len;
3118 do {
3119 len = (rnd->OneIn(3)
3120 ? 1 // Short sometimes to encourage collisions
3121 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
3122 } while (len < minimum);
3123 return test::RandomKey(rnd, len);
3124}
3125
3126static bool CompareIterators(int step, DB* model, DB* db,
3127 const Snapshot* model_snap,
3128 const Snapshot* db_snap) {
3129 ReadOptions options;
3130 options.snapshot = model_snap;
3131 Iterator* miter = model->NewIterator(options);
3132 options.snapshot = db_snap;
3133 Iterator* dbiter = db->NewIterator(options);
3134 bool ok = true;
3135 int count = 0;
3136 for (miter->SeekToFirst(), dbiter->SeekToFirst();
3137 ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
3138 count++;
3139 if (miter->key().compare(dbiter->key()) != 0) {
3140 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
3141 EscapeString(miter->key()).c_str(),
3142 EscapeString(dbiter->key()).c_str());
3143 ok = false;
3144 break;
3145 }
3146
3147 if (miter->value().compare(dbiter->value()) != 0) {
3148 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
3149 step, EscapeString(miter->key()).c_str(),
3150 EscapeString(miter->value()).c_str(),
20effc67 3151 EscapeString(dbiter->value()).c_str());
7c673cae
FG
3152 ok = false;
3153 }
3154 }
3155
3156 if (ok) {
3157 if (miter->Valid() != dbiter->Valid()) {
3158 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
3159 step, miter->Valid(), dbiter->Valid());
3160 ok = false;
3161 }
3162 }
3163 delete miter;
3164 delete dbiter;
3165 return ok;
3166}
3167
3168class DBTestRandomized : public DBTest,
3169 public ::testing::WithParamInterface<int> {
3170 public:
494da23a 3171 void SetUp() override { option_config_ = GetParam(); }
7c673cae
FG
3172
3173 static std::vector<int> GenerateOptionConfigs() {
3174 std::vector<int> option_configs;
3175 // skip cuckoo hash as it does not support snapshot.
3176 for (int option_config = kDefault; option_config < kEnd; ++option_config) {
494da23a
TL
3177 if (!ShouldSkipOptions(option_config,
3178 kSkipDeletesFilterFirst | kSkipNoSeekToLast)) {
7c673cae
FG
3179 option_configs.push_back(option_config);
3180 }
3181 }
3182 option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
3183 return option_configs;
3184 }
3185};
3186
3187INSTANTIATE_TEST_CASE_P(
3188 DBTestRandomized, DBTestRandomized,
3189 ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
3190
3191TEST_P(DBTestRandomized, Randomized) {
3192 anon::OptionsOverride options_override;
3193 options_override.skip_policy = kSkipNoSnapshot;
3194 Options options = CurrentOptions(options_override);
3195 DestroyAndReopen(options);
3196
3197 Random rnd(test::RandomSeed() + GetParam());
3198 ModelDB model(options);
3199 const int N = 10000;
3200 const Snapshot* model_snap = nullptr;
3201 const Snapshot* db_snap = nullptr;
3202 std::string k, v;
3203 for (int step = 0; step < N; step++) {
3204 // TODO(sanjay): Test Get() works
3205 int p = rnd.Uniform(100);
3206 int minimum = 0;
3207 if (option_config_ == kHashSkipList || option_config_ == kHashLinkList ||
7c673cae
FG
3208 option_config_ == kPlainTableFirstBytePrefix ||
3209 option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
3210 option_config_ == kBlockBasedTableWithPrefixHashIndex) {
3211 minimum = 1;
3212 }
3213 if (p < 45) { // Put
3214 k = RandomKey(&rnd, minimum);
20effc67
TL
3215 v = rnd.RandomString(rnd.OneIn(20) ? 100 + rnd.Uniform(100)
3216 : rnd.Uniform(8));
7c673cae
FG
3217 ASSERT_OK(model.Put(WriteOptions(), k, v));
3218 ASSERT_OK(db_->Put(WriteOptions(), k, v));
3219 } else if (p < 90) { // Delete
3220 k = RandomKey(&rnd, minimum);
3221 ASSERT_OK(model.Delete(WriteOptions(), k));
3222 ASSERT_OK(db_->Delete(WriteOptions(), k));
3223 } else { // Multi-element batch
3224 WriteBatch b;
3225 const int num = rnd.Uniform(8);
3226 for (int i = 0; i < num; i++) {
3227 if (i == 0 || !rnd.OneIn(10)) {
3228 k = RandomKey(&rnd, minimum);
3229 } else {
3230 // Periodically re-use the same key from the previous iter, so
3231 // we have multiple entries in the write batch for the same key
3232 }
3233 if (rnd.OneIn(2)) {
20effc67 3234 v = rnd.RandomString(rnd.Uniform(10));
7c673cae
FG
3235 b.Put(k, v);
3236 } else {
3237 b.Delete(k);
3238 }
3239 }
3240 ASSERT_OK(model.Write(WriteOptions(), &b));
3241 ASSERT_OK(db_->Write(WriteOptions(), &b));
3242 }
3243
3244 if ((step % 100) == 0) {
3245 // For DB instances that use the hash index + block-based table, the
3246 // iterator will be invalid right when seeking a non-existent key, right
3247 // than return a key that is close to it.
3248 if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
3249 option_config_ != kBlockBasedTableWithPrefixHashIndex) {
3250 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
3251 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
3252 }
3253
3254 // Save a snapshot from each DB this time that we'll use next
3255 // time we compare things, to make sure the current state is
3256 // preserved with the snapshot
3257 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
3258 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
3259
3260 Reopen(options);
3261 ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
3262
3263 model_snap = model.GetSnapshot();
3264 db_snap = db_->GetSnapshot();
3265 }
3266 }
3267 if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
3268 if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
3269}
11fdf7f2 3270#endif // ROCKSDB_VALGRIND_RUN
7c673cae
FG
3271
3272TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
3273 // create a DB with block prefix index
3274 BlockBasedTableOptions table_options;
3275 Options options = CurrentOptions();
3276 table_options.index_type = BlockBasedTableOptions::kHashSearch;
3277 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3278 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3279
3280 Reopen(options);
3281 ASSERT_OK(Put("k1", "v1"));
3282 Flush();
3283 ASSERT_OK(Put("k2", "v2"));
3284
3285 // Reopen it without prefix extractor, make sure everything still works.
3286 // RocksDB should just fall back to the binary index.
3287 table_options.index_type = BlockBasedTableOptions::kBinarySearch;
3288 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3289 options.prefix_extractor.reset();
3290
3291 Reopen(options);
3292 ASSERT_EQ("v1", Get("k1"));
3293 ASSERT_EQ("v2", Get("k2"));
3294}
3295
f67539c2
TL
3296TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) {
3297 // create a DB with block prefix index
3298 BlockBasedTableOptions table_options;
3299 Options options = CurrentOptions();
3300 options.max_open_files = 10;
3301 table_options.index_type = BlockBasedTableOptions::kHashSearch;
3302 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3303 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3304
3305 // RocksDB sanitize max open files to at least 20. Modify it back.
3306 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
3307 "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
3308 int* max_open_files = static_cast<int*>(arg);
3309 *max_open_files = 11;
3310 });
3311 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3312
3313 Reopen(options);
3314 ASSERT_OK(Put("k1", "v1"));
3315 Flush();
3316
3317 CompactRangeOptions cro;
3318 cro.change_level = true;
3319 cro.target_level = 1;
3320 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
3321
3322 // Force evict tables
3323 dbfull()->TEST_table_cache()->SetCapacity(0);
3324 // Make table cache to keep one entry.
3325 dbfull()->TEST_table_cache()->SetCapacity(1);
3326
3327 ReadOptions read_options;
3328 read_options.total_order_seek = true;
3329 {
3330 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
3331 iter->Seek("k1");
3332 ASSERT_TRUE(iter->Valid());
3333 ASSERT_EQ("k1", iter->key().ToString());
3334 }
3335
3336 // After total order seek, prefix index should still be used.
3337 read_options.total_order_seek = false;
3338 {
3339 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
3340 iter->Seek("k1");
3341 ASSERT_TRUE(iter->Valid());
3342 ASSERT_EQ("k1", iter->key().ToString());
3343 }
3344 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3345}
3346
7c673cae
FG
3347TEST_F(DBTest, ChecksumTest) {
3348 BlockBasedTableOptions table_options;
3349 Options options = CurrentOptions();
3350
3351 table_options.checksum = kCRC32c;
3352 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3353 Reopen(options);
3354 ASSERT_OK(Put("a", "b"));
3355 ASSERT_OK(Put("c", "d"));
3356 ASSERT_OK(Flush()); // table with crc checksum
3357
3358 table_options.checksum = kxxHash;
3359 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3360 Reopen(options);
3361 ASSERT_OK(Put("e", "f"));
3362 ASSERT_OK(Put("g", "h"));
3363 ASSERT_OK(Flush()); // table with xxhash checksum
3364
3365 table_options.checksum = kCRC32c;
3366 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3367 Reopen(options);
3368 ASSERT_EQ("b", Get("a"));
3369 ASSERT_EQ("d", Get("c"));
3370 ASSERT_EQ("f", Get("e"));
3371 ASSERT_EQ("h", Get("g"));
3372
3373 table_options.checksum = kCRC32c;
3374 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3375 Reopen(options);
3376 ASSERT_EQ("b", Get("a"));
3377 ASSERT_EQ("d", Get("c"));
3378 ASSERT_EQ("f", Get("e"));
3379 ASSERT_EQ("h", Get("g"));
3380}
3381
3382#ifndef ROCKSDB_LITE
3383TEST_P(DBTestWithParam, FIFOCompactionTest) {
3384 for (int iter = 0; iter < 2; ++iter) {
3385 // first iteration -- auto compaction
3386 // second iteration -- manual compaction
3387 Options options;
3388 options.compaction_style = kCompactionStyleFIFO;
3389 options.write_buffer_size = 100 << 10; // 100KB
3390 options.arena_block_size = 4096;
3391 options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB
3392 options.compression = kNoCompression;
3393 options.create_if_missing = true;
3394 options.max_subcompactions = max_subcompactions_;
3395 if (iter == 1) {
3396 options.disable_auto_compactions = true;
3397 }
3398 options = CurrentOptions(options);
3399 DestroyAndReopen(options);
3400
3401 Random rnd(301);
3402 for (int i = 0; i < 6; ++i) {
3403 for (int j = 0; j < 110; ++j) {
20effc67 3404 ASSERT_OK(Put(ToString(i * 100 + j), rnd.RandomString(980)));
7c673cae
FG
3405 }
3406 // flush should happen here
3407 ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
3408 }
3409 if (iter == 0) {
3410 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3411 } else {
3412 CompactRangeOptions cro;
3413 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
3414 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
3415 }
3416 // only 5 files should survive
3417 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3418 for (int i = 0; i < 50; ++i) {
3419 // these keys should be deleted in previous compaction
3420 ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
3421 }
3422 }
3423}
11fdf7f2
TL
3424
3425TEST_F(DBTest, FIFOCompactionTestWithCompaction) {
3426 Options options;
3427 options.compaction_style = kCompactionStyleFIFO;
3428 options.write_buffer_size = 20 << 10; // 20K
3429 options.arena_block_size = 4096;
3430 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
3431 options.compaction_options_fifo.allow_compaction = true;
3432 options.level0_file_num_compaction_trigger = 6;
3433 options.compression = kNoCompression;
3434 options.create_if_missing = true;
3435 options = CurrentOptions(options);
3436 DestroyAndReopen(options);
3437
3438 Random rnd(301);
3439 for (int i = 0; i < 60; i++) {
3440 // Generate and flush a file about 20KB.
3441 for (int j = 0; j < 20; j++) {
20effc67 3442 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3443 }
3444 Flush();
3445 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3446 }
3447 // It should be compacted to 10 files.
3448 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3449
3450 for (int i = 0; i < 60; i++) {
3451 // Generate and flush a file about 20KB.
3452 for (int j = 0; j < 20; j++) {
20effc67 3453 ASSERT_OK(Put(ToString(i * 20 + j + 2000), rnd.RandomString(980)));
11fdf7f2
TL
3454 }
3455 Flush();
3456 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3457 }
3458
3459 // It should be compacted to no more than 20 files.
3460 ASSERT_GT(NumTableFilesAtLevel(0), 10);
3461 ASSERT_LT(NumTableFilesAtLevel(0), 18);
3462 // Size limit is still guaranteed.
3463 ASSERT_LE(SizeAtLevel(0),
3464 options.compaction_options_fifo.max_table_files_size);
3465}
3466
3467TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) {
3468 Options options;
3469 options.compaction_style = kCompactionStyleFIFO;
3470 options.write_buffer_size = 20 << 10; // 20K
3471 options.arena_block_size = 4096;
3472 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
3473 options.compaction_options_fifo.allow_compaction = true;
3474 options.level0_file_num_compaction_trigger = 3;
3475 options.compression = kNoCompression;
3476 options.create_if_missing = true;
3477 options = CurrentOptions(options);
3478 DestroyAndReopen(options);
3479
3480 Random rnd(301);
3481 for (int i = 0; i < 3; i++) {
3482 // Each file contains a different key which will be dropped later.
20effc67 3483 ASSERT_OK(Put("a" + ToString(i), rnd.RandomString(500)));
11fdf7f2 3484 ASSERT_OK(Put("key" + ToString(i), ""));
20effc67 3485 ASSERT_OK(Put("z" + ToString(i), rnd.RandomString(500)));
11fdf7f2
TL
3486 Flush();
3487 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3488 }
3489 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
3490 for (int i = 0; i < 3; i++) {
3491 ASSERT_EQ("", Get("key" + ToString(i)));
3492 }
3493 for (int i = 0; i < 3; i++) {
3494 // Each file contains a different key which will be dropped later.
20effc67 3495 ASSERT_OK(Put("a" + ToString(i), rnd.RandomString(500)));
11fdf7f2 3496 ASSERT_OK(Delete("key" + ToString(i)));
20effc67 3497 ASSERT_OK(Put("z" + ToString(i), rnd.RandomString(500)));
11fdf7f2
TL
3498 Flush();
3499 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3500 }
3501 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
3502 for (int i = 0; i < 3; i++) {
3503 ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
3504 }
3505}
3506
3507// Check that FIFO-with-TTL is not supported with max_open_files != -1.
3508TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) {
3509 Options options;
3510 options.compaction_style = kCompactionStyleFIFO;
3511 options.create_if_missing = true;
494da23a 3512 options.ttl = 600; // seconds
11fdf7f2 3513
f67539c2 3514 // TTL is now supported with max_open_files != -1.
11fdf7f2
TL
3515 options.max_open_files = 100;
3516 options = CurrentOptions(options);
f67539c2 3517 ASSERT_OK(TryReopen(options));
11fdf7f2
TL
3518
3519 options.max_open_files = -1;
3520 ASSERT_OK(TryReopen(options));
3521}
3522
3523// Check that FIFO-with-TTL is supported only with BlockBasedTableFactory.
3524TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) {
3525 Options options;
3526 options.compaction_style = kCompactionStyleFIFO;
3527 options.create_if_missing = true;
494da23a 3528 options.ttl = 600; // seconds
11fdf7f2
TL
3529
3530 options = CurrentOptions(options);
3531 options.table_factory.reset(NewBlockBasedTableFactory());
3532 ASSERT_OK(TryReopen(options));
3533
3534 Destroy(options);
3535 options.table_factory.reset(NewPlainTableFactory());
3536 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3537
11fdf7f2
TL
3538 Destroy(options);
3539 options.table_factory.reset(NewAdaptiveTableFactory());
3540 ASSERT_TRUE(TryReopen(options).IsNotSupported());
3541}
3542
3543TEST_F(DBTest, FIFOCompactionWithTTLTest) {
3544 Options options;
3545 options.compaction_style = kCompactionStyleFIFO;
3546 options.write_buffer_size = 10 << 10; // 10KB
3547 options.arena_block_size = 4096;
3548 options.compression = kNoCompression;
3549 options.create_if_missing = true;
20effc67 3550 env_->SetMockSleep();
11fdf7f2
TL
3551 options.env = env_;
3552
3553 // Test to make sure that all files with expired ttl are deleted on next
3554 // manual compaction.
3555 {
20effc67
TL
3556 // NOTE: Presumed unnecessary and removed: resetting mock time in env
3557
11fdf7f2
TL
3558 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3559 options.compaction_options_fifo.allow_compaction = false;
494da23a 3560 options.ttl = 1 * 60 * 60 ; // 1 hour
11fdf7f2
TL
3561 options = CurrentOptions(options);
3562 DestroyAndReopen(options);
3563
3564 Random rnd(301);
3565 for (int i = 0; i < 10; i++) {
3566 // Generate and flush a file about 10KB.
3567 for (int j = 0; j < 10; j++) {
20effc67 3568 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3569 }
3570 Flush();
3571 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3572 }
3573 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3574
3575 // Sleep for 2 hours -- which is much greater than TTL.
20effc67 3576 env_->MockSleepForSeconds(2 * 60 * 60);
11fdf7f2
TL
3577
3578 // Since no flushes and compactions have run, the db should still be in
3579 // the same state even after considerable time has passed.
3580 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3581 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3582
3583 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
3584 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
3585 }
3586
3587 // Test to make sure that all files with expired ttl are deleted on next
3588 // automatic compaction.
3589 {
3590 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3591 options.compaction_options_fifo.allow_compaction = false;
494da23a 3592 options.ttl = 1 * 60 * 60; // 1 hour
11fdf7f2
TL
3593 options = CurrentOptions(options);
3594 DestroyAndReopen(options);
3595
3596 Random rnd(301);
3597 for (int i = 0; i < 10; i++) {
3598 // Generate and flush a file about 10KB.
3599 for (int j = 0; j < 10; j++) {
20effc67 3600 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3601 }
3602 Flush();
3603 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3604 }
3605 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3606
3607 // Sleep for 2 hours -- which is much greater than TTL.
20effc67 3608 env_->MockSleepForSeconds(2 * 60 * 60);
11fdf7f2
TL
3609 // Just to make sure that we are in the same state even after sleeping.
3610 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3611 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3612
3613 // Create 1 more file to trigger TTL compaction. The old files are dropped.
3614 for (int i = 0; i < 1; i++) {
3615 for (int j = 0; j < 10; j++) {
20effc67 3616 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3617 }
3618 Flush();
3619 }
3620
3621 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3622 // Only the new 10 files remain.
3623 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
3624 ASSERT_LE(SizeAtLevel(0),
3625 options.compaction_options_fifo.max_table_files_size);
3626 }
3627
3628 // Test that shows the fall back to size-based FIFO compaction if TTL-based
3629 // deletion doesn't move the total size to be less than max_table_files_size.
3630 {
3631 options.write_buffer_size = 10 << 10; // 10KB
3632 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3633 options.compaction_options_fifo.allow_compaction = false;
494da23a 3634 options.ttl = 1 * 60 * 60; // 1 hour
11fdf7f2
TL
3635 options = CurrentOptions(options);
3636 DestroyAndReopen(options);
3637
3638 Random rnd(301);
3639 for (int i = 0; i < 3; i++) {
3640 // Generate and flush a file about 10KB.
3641 for (int j = 0; j < 10; j++) {
20effc67 3642 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3643 }
3644 Flush();
3645 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3646 }
3647 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
3648
3649 // Sleep for 2 hours -- which is much greater than TTL.
20effc67 3650 env_->MockSleepForSeconds(2 * 60 * 60);
11fdf7f2
TL
3651 // Just to make sure that we are in the same state even after sleeping.
3652 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3653 ASSERT_EQ(NumTableFilesAtLevel(0), 3);
3654
3655 for (int i = 0; i < 5; i++) {
3656 for (int j = 0; j < 140; j++) {
20effc67 3657 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3658 }
3659 Flush();
3660 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3661 }
3662 // Size limit is still guaranteed.
3663 ASSERT_LE(SizeAtLevel(0),
3664 options.compaction_options_fifo.max_table_files_size);
3665 }
3666
3667 // Test with TTL + Intra-L0 compactions.
3668 {
3669 options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
3670 options.compaction_options_fifo.allow_compaction = true;
494da23a 3671 options.ttl = 1 * 60 * 60; // 1 hour
11fdf7f2
TL
3672 options.level0_file_num_compaction_trigger = 6;
3673 options = CurrentOptions(options);
3674 DestroyAndReopen(options);
3675
3676 Random rnd(301);
3677 for (int i = 0; i < 10; i++) {
3678 // Generate and flush a file about 10KB.
3679 for (int j = 0; j < 10; j++) {
20effc67 3680 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3681 }
3682 Flush();
3683 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3684 }
3685 // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1
3686 // (due to level0_file_num_compaction_trigger = 6).
3687 // So total files = 1 + remaining 4 = 5.
3688 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3689
3690 // Sleep for 2 hours -- which is much greater than TTL.
20effc67 3691 env_->MockSleepForSeconds(2 * 60 * 60);
11fdf7f2
TL
3692 // Just to make sure that we are in the same state even after sleeping.
3693 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3694 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3695
3696 // Create 10 more files. The old 5 files are dropped as their ttl expired.
3697 for (int i = 0; i < 10; i++) {
3698 for (int j = 0; j < 10; j++) {
20effc67 3699 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3700 }
3701 Flush();
3702 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3703 }
3704 ASSERT_EQ(NumTableFilesAtLevel(0), 5);
3705 ASSERT_LE(SizeAtLevel(0),
3706 options.compaction_options_fifo.max_table_files_size);
3707 }
3708
3709 // Test with large TTL + Intra-L0 compactions.
3710 // Files dropped based on size, as ttl doesn't kick in.
3711 {
3712 options.write_buffer_size = 20 << 10; // 20K
3713 options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1.5MB
3714 options.compaction_options_fifo.allow_compaction = true;
494da23a 3715 options.ttl = 1 * 60 * 60; // 1 hour
11fdf7f2
TL
3716 options.level0_file_num_compaction_trigger = 6;
3717 options = CurrentOptions(options);
3718 DestroyAndReopen(options);
3719
3720 Random rnd(301);
3721 for (int i = 0; i < 60; i++) {
3722 // Generate and flush a file about 20KB.
3723 for (int j = 0; j < 20; j++) {
20effc67 3724 ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
11fdf7f2
TL
3725 }
3726 Flush();
3727 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3728 }
3729 // It should be compacted to 10 files.
3730 ASSERT_EQ(NumTableFilesAtLevel(0), 10);
3731
3732 for (int i = 0; i < 60; i++) {
3733 // Generate and flush a file about 20KB.
3734 for (int j = 0; j < 20; j++) {
20effc67 3735 ASSERT_OK(Put(ToString(i * 20 + j + 2000), rnd.RandomString(980)));
11fdf7f2
TL
3736 }
3737 Flush();
3738 ASSERT_OK(dbfull()->TEST_WaitForCompact());
3739 }
3740
3741 // It should be compacted to no more than 20 files.
3742 ASSERT_GT(NumTableFilesAtLevel(0), 10);
3743 ASSERT_LT(NumTableFilesAtLevel(0), 18);
3744 // Size limit is still guaranteed.
3745 ASSERT_LE(SizeAtLevel(0),
3746 options.compaction_options_fifo.max_table_files_size);
3747 }
3748}
7c673cae
FG
3749#endif // ROCKSDB_LITE
3750
3751#ifndef ROCKSDB_LITE
3752/*
3753 * This test is not reliable enough as it heavily depends on disk behavior.
3754 * Disable as it is flaky.
3755 */
3756TEST_F(DBTest, DISABLED_RateLimitingTest) {
3757 Options options = CurrentOptions();
3758 options.write_buffer_size = 1 << 20; // 1MB
3759 options.level0_file_num_compaction_trigger = 2;
3760 options.target_file_size_base = 1 << 20; // 1MB
3761 options.max_bytes_for_level_base = 4 << 20; // 4MB
3762 options.max_bytes_for_level_multiplier = 4;
3763 options.compression = kNoCompression;
3764 options.create_if_missing = true;
3765 options.env = env_;
f67539c2 3766 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
3767 options.IncreaseParallelism(4);
3768 DestroyAndReopen(options);
3769
3770 WriteOptions wo;
3771 wo.disableWAL = true;
3772
3773 // # no rate limiting
3774 Random rnd(301);
3775 uint64_t start = env_->NowMicros();
3776 // Write ~96M data
3777 for (int64_t i = 0; i < (96 << 10); ++i) {
20effc67 3778 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
7c673cae
FG
3779 }
3780 uint64_t elapsed = env_->NowMicros() - start;
3781 double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed;
3782 uint64_t rate_limiter_drains =
3783 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS);
3784 ASSERT_EQ(0, rate_limiter_drains);
3785 Close();
3786
3787 // # rate limiting with 0.7 x threshold
3788 options.rate_limiter.reset(
3789 NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
3790 env_->bytes_written_ = 0;
3791 DestroyAndReopen(options);
3792
3793 start = env_->NowMicros();
3794 // Write ~96M data
3795 for (int64_t i = 0; i < (96 << 10); ++i) {
20effc67 3796 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
7c673cae
FG
3797 }
3798 rate_limiter_drains =
3799 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
3800 rate_limiter_drains;
3801 elapsed = env_->NowMicros() - start;
3802 Close();
3803 ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
3804 // Most intervals should've been drained (interval time is 100ms, elapsed is
3805 // micros)
3806 ASSERT_GT(rate_limiter_drains, 0);
3807 ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
3808 double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
3809 fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
3810 ASSERT_TRUE(ratio < 0.8);
3811
3812 // # rate limiting with half of the raw_rate
3813 options.rate_limiter.reset(
3814 NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
3815 env_->bytes_written_ = 0;
3816 DestroyAndReopen(options);
3817
3818 start = env_->NowMicros();
3819 // Write ~96M data
3820 for (int64_t i = 0; i < (96 << 10); ++i) {
20effc67 3821 ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo));
7c673cae
FG
3822 }
3823 elapsed = env_->NowMicros() - start;
3824 rate_limiter_drains =
3825 TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
3826 rate_limiter_drains;
3827 Close();
3828 ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
3829 // Most intervals should've been drained (interval time is 100ms, elapsed is
3830 // micros)
3831 ASSERT_GT(rate_limiter_drains, elapsed / 100000 / 2);
3832 ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
3833 ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
3834 fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
3835 ASSERT_LT(ratio, 0.6);
3836}
3837
3838TEST_F(DBTest, TableOptionsSanitizeTest) {
3839 Options options = CurrentOptions();
3840 options.create_if_missing = true;
3841 DestroyAndReopen(options);
3842 ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
3843
20effc67 3844 options.table_factory.reset(NewPlainTableFactory());
7c673cae
FG
3845 options.prefix_extractor.reset(NewNoopTransform());
3846 Destroy(options);
3847 ASSERT_TRUE(!TryReopen(options).IsNotSupported());
3848
3849 // Test for check of prefix_extractor when hash index is used for
3850 // block-based table
3851 BlockBasedTableOptions to;
3852 to.index_type = BlockBasedTableOptions::kHashSearch;
3853 options = CurrentOptions();
3854 options.create_if_missing = true;
3855 options.table_factory.reset(NewBlockBasedTableFactory(to));
3856 ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
3857 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
3858 ASSERT_OK(TryReopen(options));
3859}
3860
3861TEST_F(DBTest, ConcurrentMemtableNotSupported) {
3862 Options options = CurrentOptions();
3863 options.allow_concurrent_memtable_write = true;
3864 options.soft_pending_compaction_bytes_limit = 0;
3865 options.hard_pending_compaction_bytes_limit = 100;
3866 options.create_if_missing = true;
3867
3868 DestroyDB(dbname_, options);
3869 options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
3870 ASSERT_NOK(TryReopen(options));
3871
3872 options.memtable_factory.reset(new SkipListFactory);
3873 ASSERT_OK(TryReopen(options));
3874
3875 ColumnFamilyOptions cf_options(options);
3876 cf_options.memtable_factory.reset(
3877 NewHashLinkListRepFactory(4, 0, 3, true, 4));
3878 ColumnFamilyHandle* handle;
3879 ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
3880}
3881
3882#endif // ROCKSDB_LITE
3883
3884TEST_F(DBTest, SanitizeNumThreads) {
3885 for (int attempt = 0; attempt < 2; attempt++) {
3886 const size_t kTotalTasks = 8;
3887 test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
3888
3889 Options options = CurrentOptions();
3890 if (attempt == 0) {
3891 options.max_background_compactions = 3;
3892 options.max_background_flushes = 2;
3893 }
3894 options.create_if_missing = true;
3895 DestroyAndReopen(options);
3896
3897 for (size_t i = 0; i < kTotalTasks; i++) {
3898 // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
3899 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
3900 &sleeping_tasks[i],
3901 (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
3902 }
3903
494da23a
TL
3904 // Wait until 10s for they are scheduled.
3905 for (int i = 0; i < 10000; i++) {
3906 if (options.env->GetThreadPoolQueueLen(Env::Priority::LOW) <= 1 &&
3907 options.env->GetThreadPoolQueueLen(Env::Priority::HIGH) <= 2) {
3908 break;
3909 }
3910 env_->SleepForMicroseconds(1000);
3911 }
7c673cae
FG
3912
3913 // pool size 3, total task 4. Queue size should be 1.
3914 ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
3915 // pool size 2, total task 4. Queue size should be 2.
3916 ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
3917
3918 for (size_t i = 0; i < kTotalTasks; i++) {
3919 sleeping_tasks[i].WakeUp();
3920 sleeping_tasks[i].WaitUntilDone();
3921 }
3922
3923 ASSERT_OK(Put("abc", "def"));
3924 ASSERT_EQ("def", Get("abc"));
3925 Flush();
3926 ASSERT_EQ("def", Get("abc"));
3927 }
3928}
3929
3930TEST_F(DBTest, WriteSingleThreadEntry) {
3931 std::vector<port::Thread> threads;
3932 dbfull()->TEST_LockMutex();
3933 auto w = dbfull()->TEST_BeginWrite();
3934 threads.emplace_back([&] { Put("a", "b"); });
3935 env_->SleepForMicroseconds(10000);
3936 threads.emplace_back([&] { Flush(); });
3937 env_->SleepForMicroseconds(10000);
3938 dbfull()->TEST_UnlockMutex();
3939 dbfull()->TEST_LockMutex();
3940 dbfull()->TEST_EndWrite(w);
3941 dbfull()->TEST_UnlockMutex();
3942
3943 for (auto& t : threads) {
3944 t.join();
3945 }
3946}
3947
11fdf7f2
TL
3948TEST_F(DBTest, ConcurrentFlushWAL) {
3949 const size_t cnt = 100;
3950 Options options;
20effc67 3951 options.env = env_;
11fdf7f2
TL
3952 WriteOptions wopt;
3953 ReadOptions ropt;
3954 for (bool two_write_queues : {false, true}) {
3955 for (bool manual_wal_flush : {false, true}) {
3956 options.two_write_queues = two_write_queues;
3957 options.manual_wal_flush = manual_wal_flush;
3958 options.create_if_missing = true;
3959 DestroyAndReopen(options);
3960 std::vector<port::Thread> threads;
3961 threads.emplace_back([&] {
3962 for (size_t i = 0; i < cnt; i++) {
3963 auto istr = ToString(i);
3964 db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr, "b" + istr);
3965 }
3966 });
3967 if (two_write_queues) {
3968 threads.emplace_back([&] {
3969 for (size_t i = cnt; i < 2 * cnt; i++) {
3970 auto istr = ToString(i);
3971 WriteBatch batch;
3972 batch.Put("a" + istr, "b" + istr);
3973 dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true);
3974 }
3975 });
3976 }
3977 threads.emplace_back([&] {
3978 for (size_t i = 0; i < cnt * 100; i++) { // FlushWAL is faster than Put
3979 db_->FlushWAL(false);
3980 }
3981 });
3982 for (auto& t : threads) {
3983 t.join();
3984 }
3985 options.create_if_missing = false;
3986 // Recover from the wal and make sure that it is not corrupted
3987 Reopen(options);
3988 for (size_t i = 0; i < cnt; i++) {
3989 PinnableSlice pval;
3990 auto istr = ToString(i);
3991 ASSERT_OK(
3992 db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval));
3993 ASSERT_TRUE(pval == ("b" + istr));
3994 }
3995 }
3996 }
3997}
3998
7c673cae
FG
3999#ifndef ROCKSDB_LITE
4000TEST_F(DBTest, DynamicMemtableOptions) {
4001 const uint64_t k64KB = 1 << 16;
4002 const uint64_t k128KB = 1 << 17;
4003 const uint64_t k5KB = 5 * 1024;
4004 Options options;
4005 options.env = env_;
4006 options.create_if_missing = true;
4007 options.compression = kNoCompression;
4008 options.max_background_compactions = 1;
4009 options.write_buffer_size = k64KB;
4010 options.arena_block_size = 16 * 1024;
4011 options.max_write_buffer_number = 2;
4012 // Don't trigger compact/slowdown/stop
4013 options.level0_file_num_compaction_trigger = 1024;
4014 options.level0_slowdown_writes_trigger = 1024;
4015 options.level0_stop_writes_trigger = 1024;
4016 DestroyAndReopen(options);
4017
4018 auto gen_l0_kb = [this](int size) {
4019 const int kNumPutsBeforeWaitForFlush = 64;
4020 Random rnd(301);
4021 for (int i = 0; i < size; i++) {
20effc67 4022 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
7c673cae
FG
4023
4024 // The following condition prevents a race condition between flush jobs
4025 // acquiring work and this thread filling up multiple memtables. Without
4026 // this, the flush might produce less files than expected because
4027 // multiple memtables are flushed into a single L0 file. This race
4028 // condition affects assertion (A).
4029 if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
4030 dbfull()->TEST_WaitForFlushMemTable();
4031 }
4032 }
4033 dbfull()->TEST_WaitForFlushMemTable();
4034 };
4035
4036 // Test write_buffer_size
4037 gen_l0_kb(64);
4038 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4039 ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
4040 ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
4041
4042 // Clean up L0
4043 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4044 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4045
4046 // Increase buffer size
4047 ASSERT_OK(dbfull()->SetOptions({
4048 {"write_buffer_size", "131072"},
4049 }));
4050
11fdf7f2
TL
4051 // The existing memtable inflated 64KB->128KB when we invoked SetOptions().
4052 // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data.
4053 gen_l0_kb(192);
4054 ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A)
4055 ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB);
4056 ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB);
4057
4058 // Decrease buffer size below current usage
4059 ASSERT_OK(dbfull()->SetOptions({
4060 {"write_buffer_size", "65536"},
4061 }));
4062 // The existing memtable became eligible for flush when we reduced its
4063 // capacity to 64KB. Two keys need to be added to trigger flush: first causes
4064 // memtable to be marked full, second schedules the flush. Then we should have
4065 // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key.
4066 gen_l0_kb(2);
4067 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
7c673cae
FG
4068 ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
4069 ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
4070
4071 // Test max_write_buffer_number
4072 // Block compaction thread, which will also block the flushes because
4073 // max_background_flushes == 0, so flushes are getting executed by the
4074 // compaction thread
4075 env_->SetBackgroundThreads(1, Env::LOW);
4076 test::SleepingBackgroundTask sleeping_task_low;
4077 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4078 Env::Priority::LOW);
4079 // Start from scratch and disable compaction/flush. Flush can only happen
4080 // during compaction but trigger is pretty high
7c673cae
FG
4081 options.disable_auto_compactions = true;
4082 DestroyAndReopen(options);
11fdf7f2 4083 env_->SetBackgroundThreads(0, Env::HIGH);
7c673cae
FG
4084
4085 // Put until writes are stopped, bounded by 256 puts. We should see stop at
4086 // ~128KB
4087 int count = 0;
4088 Random rnd(301);
4089
f67539c2 4090 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae 4091 "DBImpl::DelayWrite:Wait",
11fdf7f2 4092 [&](void* /*arg*/) { sleeping_task_low.WakeUp(); });
f67539c2 4093 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4094
4095 while (!sleeping_task_low.WokenUp() && count < 256) {
20effc67 4096 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
7c673cae
FG
4097 count++;
4098 }
4099 ASSERT_GT(static_cast<double>(count), 128 * 0.8);
4100 ASSERT_LT(static_cast<double>(count), 128 * 1.2);
4101
4102 sleeping_task_low.WaitUntilDone();
4103
4104 // Increase
4105 ASSERT_OK(dbfull()->SetOptions({
4106 {"max_write_buffer_number", "8"},
4107 }));
4108 // Clean up memtable and L0
4109 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4110
4111 sleeping_task_low.Reset();
4112 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4113 Env::Priority::LOW);
4114 count = 0;
4115 while (!sleeping_task_low.WokenUp() && count < 1024) {
20effc67 4116 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
7c673cae
FG
4117 count++;
4118 }
4119// Windows fails this test. Will tune in the future and figure out
4120// approp number
4121#ifndef OS_WIN
4122 ASSERT_GT(static_cast<double>(count), 512 * 0.8);
4123 ASSERT_LT(static_cast<double>(count), 512 * 1.2);
4124#endif
4125 sleeping_task_low.WaitUntilDone();
4126
4127 // Decrease
4128 ASSERT_OK(dbfull()->SetOptions({
4129 {"max_write_buffer_number", "4"},
4130 }));
4131 // Clean up memtable and L0
4132 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4133
4134 sleeping_task_low.Reset();
4135 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4136 Env::Priority::LOW);
4137
4138 count = 0;
4139 while (!sleeping_task_low.WokenUp() && count < 1024) {
20effc67 4140 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions()));
7c673cae
FG
4141 count++;
4142 }
4143// Windows fails this test. Will tune in the future and figure out
4144// approp number
4145#ifndef OS_WIN
4146 ASSERT_GT(static_cast<double>(count), 256 * 0.8);
4147 ASSERT_LT(static_cast<double>(count), 266 * 1.2);
4148#endif
4149 sleeping_task_low.WaitUntilDone();
4150
f67539c2 4151 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
4152}
4153#endif // ROCKSDB_LITE
4154
4155#ifdef ROCKSDB_USING_THREAD_STATUS
4156namespace {
4157void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
4158 int expected_count) {
4159 int op_count = 0;
4160 std::vector<ThreadStatus> thread_list;
4161 ASSERT_OK(env->GetThreadList(&thread_list));
4162 for (auto thread : thread_list) {
4163 if (thread.operation_type == op_type) {
4164 op_count++;
4165 }
4166 }
4167 ASSERT_EQ(op_count, expected_count);
4168}
4169} // namespace
4170
4171TEST_F(DBTest, GetThreadStatus) {
4172 Options options;
4173 options.env = env_;
4174 options.enable_thread_tracking = true;
4175 TryReopen(options);
4176
4177 std::vector<ThreadStatus> thread_list;
4178 Status s = env_->GetThreadList(&thread_list);
4179
4180 for (int i = 0; i < 2; ++i) {
4181 // repeat the test with differet number of high / low priority threads
4182 const int kTestCount = 3;
4183 const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
4184 const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
11fdf7f2 4185 const unsigned int kBottomPriCounts[kTestCount] = {2, 1, 4};
7c673cae
FG
4186 for (int test = 0; test < kTestCount; ++test) {
4187 // Change the number of threads in high / low priority pool.
4188 env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
4189 env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
11fdf7f2 4190 env_->SetBackgroundThreads(kBottomPriCounts[test], Env::BOTTOM);
7c673cae
FG
4191 // Wait to ensure the all threads has been registered
4192 unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
11fdf7f2
TL
4193 // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after
4194 // all threads have been registered.
7c673cae
FG
4195 // Try up to 60 seconds.
4196 for (int num_try = 0; num_try < 60000; num_try++) {
4197 env_->SleepForMicroseconds(1000);
4198 thread_list.clear();
4199 s = env_->GetThreadList(&thread_list);
4200 ASSERT_OK(s);
4201 memset(thread_type_counts, 0, sizeof(thread_type_counts));
4202 for (auto thread : thread_list) {
4203 ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
4204 thread_type_counts[thread.thread_type]++;
4205 }
4206 if (thread_type_counts[ThreadStatus::HIGH_PRIORITY] ==
4207 kHighPriCounts[test] &&
4208 thread_type_counts[ThreadStatus::LOW_PRIORITY] ==
11fdf7f2
TL
4209 kLowPriCounts[test] &&
4210 thread_type_counts[ThreadStatus::BOTTOM_PRIORITY] ==
4211 kBottomPriCounts[test]) {
7c673cae
FG
4212 break;
4213 }
4214 }
7c673cae
FG
4215 // Verify the number of high-priority threads
4216 ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY],
4217 kHighPriCounts[test]);
4218 // Verify the number of low-priority threads
4219 ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY],
4220 kLowPriCounts[test]);
11fdf7f2
TL
4221 // Verify the number of bottom-priority threads
4222 ASSERT_EQ(thread_type_counts[ThreadStatus::BOTTOM_PRIORITY],
4223 kBottomPriCounts[test]);
7c673cae
FG
4224 }
4225 if (i == 0) {
4226 // repeat the test with multiple column families
4227 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
4228 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4229 true);
4230 }
4231 }
4232 db_->DropColumnFamily(handles_[2]);
4233 delete handles_[2];
4234 handles_.erase(handles_.begin() + 2);
4235 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4236 true);
4237 Close();
4238 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4239 true);
4240}
4241
4242TEST_F(DBTest, DisableThreadStatus) {
4243 Options options;
4244 options.env = env_;
4245 options.enable_thread_tracking = false;
4246 TryReopen(options);
4247 CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
4248 // Verify non of the column family info exists
4249 env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
4250 false);
4251}
4252
4253TEST_F(DBTest, ThreadStatusFlush) {
4254 Options options;
4255 options.env = env_;
4256 options.write_buffer_size = 100000; // Small write buffer
4257 options.enable_thread_tracking = true;
4258 options = CurrentOptions(options);
4259
f67539c2 4260 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
7c673cae
FG
4261 {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
4262 {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"},
4263 });
f67539c2 4264 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4265
4266 CreateAndReopenWithCF({"pikachu"}, options);
4267 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
4268
4269 ASSERT_OK(Put(1, "foo", "v1"));
4270 ASSERT_EQ("v1", Get(1, "foo"));
4271 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
4272
4273 uint64_t num_running_flushes = 0;
4274 db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
4275 ASSERT_EQ(num_running_flushes, 0);
4276
4277 Put(1, "k1", std::string(100000, 'x')); // Fill memtable
4278 Put(1, "k2", std::string(100000, 'y')); // Trigger flush
4279
4280 // The first sync point is to make sure there's one flush job
4281 // running when we perform VerifyOperationCount().
4282 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
4283 VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
4284 db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
4285 ASSERT_EQ(num_running_flushes, 1);
4286 // This second sync point is to ensure the flush job will not
4287 // be completed until we already perform VerifyOperationCount().
4288 TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
f67539c2 4289 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
4290}
4291
4292TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
4293 const int kTestKeySize = 16;
4294 const int kTestValueSize = 984;
4295 const int kEntrySize = kTestKeySize + kTestValueSize;
4296 const int kEntriesPerBuffer = 100;
4297 Options options;
4298 options.create_if_missing = true;
4299 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
4300 options.compaction_style = kCompactionStyleLevel;
4301 options.target_file_size_base = options.write_buffer_size;
4302 options.max_bytes_for_level_base = options.target_file_size_base * 2;
4303 options.max_bytes_for_level_multiplier = 2;
4304 options.compression = kNoCompression;
4305 options = CurrentOptions(options);
4306 options.env = env_;
4307 options.enable_thread_tracking = true;
4308 const int kNumL0Files = 4;
4309 options.level0_file_num_compaction_trigger = kNumL0Files;
4310 options.max_subcompactions = max_subcompactions_;
4311
f67539c2 4312 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
7c673cae
FG
4313 {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
4314 {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
4315 {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
4316 });
4317 for (int tests = 0; tests < 2; ++tests) {
4318 DestroyAndReopen(options);
f67539c2
TL
4319 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
4320 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4321
4322 Random rnd(301);
4323 // The Put Phase.
4324 for (int file = 0; file < kNumL0Files; ++file) {
4325 for (int key = 0; key < kEntriesPerBuffer; ++key) {
4326 ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
20effc67 4327 rnd.RandomString(kTestValueSize)));
7c673cae
FG
4328 }
4329 Flush();
4330 }
4331 // This makes sure a compaction won't be scheduled until
4332 // we have done with the above Put Phase.
4333 uint64_t num_running_compactions = 0;
4334 db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
4335 &num_running_compactions);
4336 ASSERT_EQ(num_running_compactions, 0);
4337 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
4338 ASSERT_GE(NumTableFilesAtLevel(0),
4339 options.level0_file_num_compaction_trigger);
4340
4341 // This makes sure at least one compaction is running.
4342 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
4343
4344 if (options.enable_thread_tracking) {
4345 // expecting one single L0 to L1 compaction
4346 VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
4347 } else {
4348 // If thread tracking is not enabled, compaction count should be 0.
4349 VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
4350 }
4351 db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
4352 &num_running_compactions);
4353 ASSERT_EQ(num_running_compactions, 1);
4354 // TODO(yhchiang): adding assert to verify each compaction stage.
4355 TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
4356
4357 // repeat the test with disabling thread tracking.
4358 options.enable_thread_tracking = false;
f67539c2 4359 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
4360 }
4361}
4362
4363TEST_P(DBTestWithParam, PreShutdownManualCompaction) {
4364 Options options = CurrentOptions();
7c673cae
FG
4365 options.max_subcompactions = max_subcompactions_;
4366 CreateAndReopenWithCF({"pikachu"}, options);
4367
4368 // iter - 0 with 7 levels
4369 // iter - 1 with 3 levels
4370 for (int iter = 0; iter < 2; ++iter) {
4371 MakeTables(3, "p", "q", 1);
4372 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4373
4374 // Compaction range falls before files
4375 Compact(1, "", "c");
4376 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4377
4378 // Compaction range falls after files
4379 Compact(1, "r", "z");
4380 ASSERT_EQ("1,1,1", FilesPerLevel(1));
4381
4382 // Compaction range overlaps files
20effc67 4383 Compact(1, "p", "q");
7c673cae
FG
4384 ASSERT_EQ("0,0,1", FilesPerLevel(1));
4385
4386 // Populate a different range
4387 MakeTables(3, "c", "e", 1);
4388 ASSERT_EQ("1,1,2", FilesPerLevel(1));
4389
4390 // Compact just the new range
4391 Compact(1, "b", "f");
4392 ASSERT_EQ("0,0,2", FilesPerLevel(1));
4393
4394 // Compact all
4395 MakeTables(1, "a", "z", 1);
4396 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4397 CancelAllBackgroundWork(db_);
4398 db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
4399 ASSERT_EQ("1,0,2", FilesPerLevel(1));
4400
4401 if (iter == 0) {
4402 options = CurrentOptions();
7c673cae
FG
4403 options.num_levels = 3;
4404 options.create_if_missing = true;
4405 DestroyAndReopen(options);
4406 CreateAndReopenWithCF({"pikachu"}, options);
4407 }
4408 }
4409}
4410
4411TEST_F(DBTest, PreShutdownFlush) {
4412 Options options = CurrentOptions();
7c673cae
FG
4413 CreateAndReopenWithCF({"pikachu"}, options);
4414 ASSERT_OK(Put(1, "key", "value"));
4415 CancelAllBackgroundWork(db_);
4416 Status s =
4417 db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
4418 ASSERT_TRUE(s.IsShutdownInProgress());
4419}
4420
4421TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) {
4422 const int kTestKeySize = 16;
4423 const int kTestValueSize = 984;
4424 const int kEntrySize = kTestKeySize + kTestValueSize;
4425 const int kEntriesPerBuffer = 40;
4426 const int kNumL0Files = 4;
4427
4428 const int kHighPriCount = 3;
4429 const int kLowPriCount = 5;
4430 env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
4431 env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
4432
4433 Options options;
4434 options.create_if_missing = true;
4435 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
4436 options.compaction_style = kCompactionStyleLevel;
4437 options.target_file_size_base = options.write_buffer_size;
4438 options.max_bytes_for_level_base =
4439 options.target_file_size_base * kNumL0Files;
4440 options.compression = kNoCompression;
4441 options = CurrentOptions(options);
4442 options.env = env_;
4443 options.enable_thread_tracking = true;
4444 options.level0_file_num_compaction_trigger = kNumL0Files;
4445 options.max_bytes_for_level_multiplier = 2;
4446 options.max_background_compactions = kLowPriCount;
4447 options.level0_stop_writes_trigger = 1 << 10;
4448 options.level0_slowdown_writes_trigger = 1 << 10;
4449 options.max_subcompactions = max_subcompactions_;
4450
4451 TryReopen(options);
4452 Random rnd(301);
4453
4454 std::vector<ThreadStatus> thread_list;
4455 // Delay both flush and compaction
f67539c2 4456 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7c673cae
FG
4457 {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
4458 {"CompactionJob::Run():Start",
4459 "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
4460 {"CompactionJob::Run():Start",
4461 "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
4462 {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
4463 "CompactionJob::Run():End"},
4464 {"CompactionJob::Run():End",
4465 "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
4466
f67539c2 4467 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4468
4469 // Make rocksdb busy
4470 int key = 0;
4471 // check how many threads are doing compaction using GetThreadList
4472 int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
4473 for (int file = 0; file < 16 * kNumL0Files; ++file) {
4474 for (int k = 0; k < kEntriesPerBuffer; ++k) {
20effc67 4475 ASSERT_OK(Put(ToString(key++), rnd.RandomString(kTestValueSize)));
7c673cae
FG
4476 }
4477
4478 Status s = env_->GetThreadList(&thread_list);
4479 for (auto thread : thread_list) {
4480 operation_count[thread.operation_type]++;
4481 }
4482
4483 // Speed up the test
4484 if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
4485 operation_count[ThreadStatus::OP_COMPACTION] >
4486 0.6 * options.max_background_compactions) {
4487 break;
4488 }
4489 if (file == 15 * kNumL0Files) {
4490 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4491 }
4492 }
4493
4494 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
4495 ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
4496 CancelAllBackgroundWork(db_);
4497 TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
4498 dbfull()->TEST_WaitForCompact();
4499 // Record the number of compactions at a time.
4500 for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
4501 operation_count[i] = 0;
4502 }
4503 Status s = env_->GetThreadList(&thread_list);
4504 for (auto thread : thread_list) {
4505 operation_count[thread.operation_type]++;
4506 }
4507 ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
4508}
4509
4510TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) {
4511 const int kTestKeySize = 16;
4512 const int kTestValueSize = 984;
4513 const int kEntrySize = kTestKeySize + kTestValueSize;
4514 const int kEntriesPerBuffer = 40;
4515 const int kNumL0Files = 4;
4516
4517 const int kHighPriCount = 3;
4518 const int kLowPriCount = 5;
4519 env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
4520 env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
4521
4522 Options options;
4523 options.create_if_missing = true;
4524 options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
4525 options.compaction_style = kCompactionStyleLevel;
4526 options.target_file_size_base = options.write_buffer_size;
4527 options.max_bytes_for_level_base =
4528 options.target_file_size_base * kNumL0Files;
4529 options.compression = kNoCompression;
4530 options = CurrentOptions(options);
4531 options.env = env_;
4532 options.enable_thread_tracking = true;
4533 options.level0_file_num_compaction_trigger = kNumL0Files;
4534 options.max_bytes_for_level_multiplier = 2;
4535 options.max_background_compactions = kLowPriCount;
4536 options.level0_stop_writes_trigger = 1 << 10;
4537 options.level0_slowdown_writes_trigger = 1 << 10;
4538 options.max_subcompactions = max_subcompactions_;
4539
4540 TryReopen(options);
4541 Random rnd(301);
4542
4543 std::vector<ThreadStatus> thread_list;
4544 // Delay both flush and compaction
f67539c2 4545 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7c673cae
FG
4546 {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
4547 "CompactionJob::Run():Inprogress"},
4548 {"CompactionJob::Run():Start",
4549 "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
4550 {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
4551 {"CompactionJob::Run():End",
4552 "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
4553
f67539c2 4554 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4555
4556 // Make rocksdb busy
4557 int key = 0;
4558 // check how many threads are doing compaction using GetThreadList
4559 int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
4560 for (int file = 0; file < 16 * kNumL0Files; ++file) {
4561 for (int k = 0; k < kEntriesPerBuffer; ++k) {
20effc67 4562 ASSERT_OK(Put(ToString(key++), rnd.RandomString(kTestValueSize)));
7c673cae
FG
4563 }
4564
4565 Status s = env_->GetThreadList(&thread_list);
4566 for (auto thread : thread_list) {
4567 operation_count[thread.operation_type]++;
4568 }
4569
4570 // Speed up the test
4571 if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
4572 operation_count[ThreadStatus::OP_COMPACTION] >
4573 0.6 * options.max_background_compactions) {
4574 break;
4575 }
4576 if (file == 15 * kNumL0Files) {
4577 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
4578 }
4579 }
4580
4581 ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
4582 CancelAllBackgroundWork(db_);
4583 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
4584 TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
4585 dbfull()->TEST_WaitForCompact();
4586 // Record the number of compactions at a time.
4587 for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
4588 operation_count[i] = 0;
4589 }
4590 Status s = env_->GetThreadList(&thread_list);
4591 for (auto thread : thread_list) {
4592 operation_count[thread.operation_type]++;
4593 }
4594 ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
4595}
4596
4597#endif // ROCKSDB_USING_THREAD_STATUS
4598
4599#ifndef ROCKSDB_LITE
4600TEST_F(DBTest, FlushOnDestroy) {
4601 WriteOptions wo;
4602 wo.disableWAL = true;
4603 ASSERT_OK(Put("foo", "v1", wo));
4604 CancelAllBackgroundWork(db_);
4605}
4606
4607TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
4608 if (!Snappy_Supported()) {
4609 return;
4610 }
4611 const int kNKeys = 120;
4612 int keys[kNKeys];
4613 for (int i = 0; i < kNKeys; i++) {
4614 keys[i] = i;
4615 }
20effc67 4616 RandomShuffle(std::begin(keys), std::end(keys));
7c673cae
FG
4617
4618 Random rnd(301);
4619 Options options;
20effc67 4620 options.env = env_;
7c673cae
FG
4621 options.create_if_missing = true;
4622 options.db_write_buffer_size = 20480;
4623 options.write_buffer_size = 20480;
4624 options.max_write_buffer_number = 2;
4625 options.level0_file_num_compaction_trigger = 2;
4626 options.level0_slowdown_writes_trigger = 2;
4627 options.level0_stop_writes_trigger = 2;
4628 options.target_file_size_base = 20480;
4629 options.level_compaction_dynamic_level_bytes = true;
4630 options.max_bytes_for_level_base = 102400;
4631 options.max_bytes_for_level_multiplier = 4;
4632 options.max_background_compactions = 1;
4633 options.num_levels = 5;
4634
4635 options.compression_per_level.resize(3);
4636 options.compression_per_level[0] = kNoCompression;
4637 options.compression_per_level[1] = kNoCompression;
4638 options.compression_per_level[2] = kSnappyCompression;
4639
4640 OnFileDeletionListener* listener = new OnFileDeletionListener();
4641 options.listeners.emplace_back(listener);
4642
4643 DestroyAndReopen(options);
4644
4645 // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
4646 // be compressed, so total data size should be more than 80K.
4647 for (int i = 0; i < 20; i++) {
4648 ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
4649 }
4650 Flush();
4651 dbfull()->TEST_WaitForCompact();
4652
4653 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
4654 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
4655 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
4656 // Assuming each files' metadata is at least 50 bytes/
4657 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4);
4658
4659 // Insert 400KB. Some data will be compressed
4660 for (int i = 21; i < 120; i++) {
4661 ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
4662 }
4663 Flush();
4664 dbfull()->TEST_WaitForCompact();
4665 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
4666 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
4667 ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4),
4668 120U * 4000U + 50U * 24);
4669 // Make sure data in files in L3 is not compacted by removing all files
4670 // in L4 and calculate number of rows
4671 ASSERT_OK(dbfull()->SetOptions({
4672 {"disable_auto_compactions", "true"},
4673 }));
4674 ColumnFamilyMetaData cf_meta;
4675 db_->GetColumnFamilyMetaData(&cf_meta);
4676 for (auto file : cf_meta.levels[4].files) {
4677 listener->SetExpectedFileName(dbname_ + file.name);
4678 ASSERT_OK(dbfull()->DeleteFile(file.name));
4679 }
4680 listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
4681
4682 int num_keys = 0;
4683 std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
4684 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4685 num_keys++;
4686 }
4687 ASSERT_OK(iter->status());
4688 ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U + num_keys * 10U);
4689}
4690
4691TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
4692 if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
4693 return;
4694 }
4695 const int kNKeys = 500;
4696 int keys[kNKeys];
4697 for (int i = 0; i < kNKeys; i++) {
4698 keys[i] = i;
4699 }
20effc67 4700 RandomShuffle(std::begin(keys), std::end(keys));
7c673cae
FG
4701
4702 Random rnd(301);
4703 Options options;
4704 options.create_if_missing = true;
11fdf7f2
TL
4705 options.db_write_buffer_size = 6000000;
4706 options.write_buffer_size = 600000;
7c673cae
FG
4707 options.max_write_buffer_number = 2;
4708 options.level0_file_num_compaction_trigger = 2;
4709 options.level0_slowdown_writes_trigger = 2;
4710 options.level0_stop_writes_trigger = 2;
4711 options.soft_pending_compaction_bytes_limit = 1024 * 1024;
11fdf7f2 4712 options.target_file_size_base = 20;
7c673cae
FG
4713
4714 options.level_compaction_dynamic_level_bytes = true;
4715 options.max_bytes_for_level_base = 200;
4716 options.max_bytes_for_level_multiplier = 8;
4717 options.max_background_compactions = 1;
4718 options.num_levels = 5;
4719 std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
4720 options.table_factory = mtf;
4721
4722 options.compression_per_level.resize(3);
4723 options.compression_per_level[0] = kNoCompression;
4724 options.compression_per_level[1] = kLZ4Compression;
4725 options.compression_per_level[2] = kZlibCompression;
4726
4727 DestroyAndReopen(options);
4728 // When base level is L4, L4 is LZ4.
4729 std::atomic<int> num_zlib(0);
4730 std::atomic<int> num_lz4(0);
4731 std::atomic<int> num_no(0);
f67539c2 4732 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
4733 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
4734 Compaction* compaction = reinterpret_cast<Compaction*>(arg);
4735 if (compaction->output_level() == 4) {
4736 ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
4737 num_lz4.fetch_add(1);
4738 }
4739 });
f67539c2 4740 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
4741 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
4742 auto* compression = reinterpret_cast<CompressionType*>(arg);
4743 ASSERT_TRUE(*compression == kNoCompression);
4744 num_no.fetch_add(1);
4745 });
f67539c2 4746 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4747
4748 for (int i = 0; i < 100; i++) {
20effc67 4749 std::string value = rnd.RandomString(200);
11fdf7f2
TL
4750 ASSERT_OK(Put(Key(keys[i]), value));
4751 if (i % 25 == 24) {
4752 Flush();
4753 dbfull()->TEST_WaitForCompact();
7c673cae
FG
4754 }
4755 }
4756
4757 Flush();
4758 dbfull()->TEST_WaitForFlushMemTable();
4759 dbfull()->TEST_WaitForCompact();
f67539c2
TL
4760 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4761 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
7c673cae
FG
4762
4763 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
4764 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
4765 ASSERT_EQ(NumTableFilesAtLevel(3), 0);
4766 ASSERT_GT(NumTableFilesAtLevel(4), 0);
4767 ASSERT_GT(num_no.load(), 2);
4768 ASSERT_GT(num_lz4.load(), 0);
4769 int prev_num_files_l4 = NumTableFilesAtLevel(4);
4770
4771 // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
4772 num_lz4.store(0);
4773 num_no.store(0);
f67539c2 4774 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
4775 "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
4776 Compaction* compaction = reinterpret_cast<Compaction*>(arg);
4777 if (compaction->output_level() == 4 && compaction->start_level() == 3) {
4778 ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
4779 num_zlib.fetch_add(1);
4780 } else {
4781 ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
4782 num_lz4.fetch_add(1);
4783 }
4784 });
f67539c2 4785 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
7c673cae
FG
4786 "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
4787 auto* compression = reinterpret_cast<CompressionType*>(arg);
4788 ASSERT_TRUE(*compression == kNoCompression);
4789 num_no.fetch_add(1);
4790 });
f67539c2 4791 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
4792
4793 for (int i = 101; i < 500; i++) {
20effc67 4794 std::string value = rnd.RandomString(200);
11fdf7f2 4795 ASSERT_OK(Put(Key(keys[i]), value));
7c673cae
FG
4796 if (i % 100 == 99) {
4797 Flush();
4798 dbfull()->TEST_WaitForCompact();
4799 }
4800 }
4801
f67539c2
TL
4802 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
4803 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
4804 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
4805 ASSERT_EQ(NumTableFilesAtLevel(2), 0);
4806 ASSERT_GT(NumTableFilesAtLevel(3), 0);
4807 ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
4808 ASSERT_GT(num_no.load(), 2);
4809 ASSERT_GT(num_lz4.load(), 0);
4810 ASSERT_GT(num_zlib.load(), 0);
4811}
4812
4813TEST_F(DBTest, DynamicCompactionOptions) {
4814 // minimum write buffer size is enforced at 64KB
4815 const uint64_t k32KB = 1 << 15;
4816 const uint64_t k64KB = 1 << 16;
4817 const uint64_t k128KB = 1 << 17;
4818 const uint64_t k1MB = 1 << 20;
4819 const uint64_t k4KB = 1 << 12;
4820 Options options;
4821 options.env = env_;
4822 options.create_if_missing = true;
4823 options.compression = kNoCompression;
4824 options.soft_pending_compaction_bytes_limit = 1024 * 1024;
4825 options.write_buffer_size = k64KB;
4826 options.arena_block_size = 4 * k4KB;
4827 options.max_write_buffer_number = 2;
4828 // Compaction related options
4829 options.level0_file_num_compaction_trigger = 3;
4830 options.level0_slowdown_writes_trigger = 4;
4831 options.level0_stop_writes_trigger = 8;
4832 options.target_file_size_base = k64KB;
4833 options.max_compaction_bytes = options.target_file_size_base * 10;
4834 options.target_file_size_multiplier = 1;
4835 options.max_bytes_for_level_base = k128KB;
4836 options.max_bytes_for_level_multiplier = 4;
4837
4838 // Block flush thread and disable compaction thread
4839 env_->SetBackgroundThreads(1, Env::LOW);
4840 env_->SetBackgroundThreads(1, Env::HIGH);
4841 DestroyAndReopen(options);
4842
4843 auto gen_l0_kb = [this](int start, int size, int stride) {
4844 Random rnd(301);
4845 for (int i = 0; i < size; i++) {
20effc67 4846 ASSERT_OK(Put(Key(start + stride * i), rnd.RandomString(1024)));
7c673cae
FG
4847 }
4848 dbfull()->TEST_WaitForFlushMemTable();
4849 };
4850
4851 // Write 3 files that have the same key range.
4852 // Since level0_file_num_compaction_trigger is 3, compaction should be
4853 // triggered. The compaction should result in one L1 file
4854 gen_l0_kb(0, 64, 1);
4855 ASSERT_EQ(NumTableFilesAtLevel(0), 1);
4856 gen_l0_kb(0, 64, 1);
4857 ASSERT_EQ(NumTableFilesAtLevel(0), 2);
4858 gen_l0_kb(0, 64, 1);
4859 dbfull()->TEST_WaitForCompact();
4860 ASSERT_EQ("0,1", FilesPerLevel());
4861 std::vector<LiveFileMetaData> metadata;
4862 db_->GetLiveFilesMetaData(&metadata);
4863 ASSERT_EQ(1U, metadata.size());
4864 ASSERT_LE(metadata[0].size, k64KB + k4KB);
4865 ASSERT_GE(metadata[0].size, k64KB - k4KB);
4866
4867 // Test compaction trigger and target_file_size_base
4868 // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
4869 // Writing to 64KB L0 files should trigger a compaction. Since these
4870 // 2 L0 files have the same key range, compaction merge them and should
4871 // result in 2 32KB L1 files.
4872 ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
4873 {"target_file_size_base", ToString(k32KB)}}));
4874
4875 gen_l0_kb(0, 64, 1);
4876 ASSERT_EQ("1,1", FilesPerLevel());
4877 gen_l0_kb(0, 64, 1);
4878 dbfull()->TEST_WaitForCompact();
4879 ASSERT_EQ("0,2", FilesPerLevel());
4880 metadata.clear();
4881 db_->GetLiveFilesMetaData(&metadata);
4882 ASSERT_EQ(2U, metadata.size());
4883 ASSERT_LE(metadata[0].size, k32KB + k4KB);
4884 ASSERT_GE(metadata[0].size, k32KB - k4KB);
4885 ASSERT_LE(metadata[1].size, k32KB + k4KB);
4886 ASSERT_GE(metadata[1].size, k32KB - k4KB);
4887
4888 // Test max_bytes_for_level_base
4889 // Increase level base size to 256KB and write enough data that will
4890 // fill L1 and L2. L1 size should be around 256KB while L2 size should be
4891 // around 256KB x 4.
4892 ASSERT_OK(
4893 dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}}));
4894
4895 // writing 96 x 64KB => 6 * 1024KB
4896 // (L1 + L2) = (1 + 4) * 1024KB
4897 for (int i = 0; i < 96; ++i) {
4898 gen_l0_kb(i, 64, 96);
4899 }
4900 dbfull()->TEST_WaitForCompact();
4901 ASSERT_GT(SizeAtLevel(1), k1MB / 2);
4902 ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
4903
4904 // Within (0.5, 1.5) of 4MB.
4905 ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
4906 ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
4907
4908 // Test max_bytes_for_level_multiplier and
4909 // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
4910 // After filling enough data that can fit in L1 - L3, we should see L1 size
4911 // reduces to 128KB from 256KB which was asserted previously. Same for L2.
4912 ASSERT_OK(
4913 dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"},
4914 {"max_bytes_for_level_base", ToString(k128KB)}}));
4915
4916 // writing 20 x 64KB = 10 x 128KB
4917 // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
4918 for (int i = 0; i < 20; ++i) {
4919 gen_l0_kb(i, 64, 32);
4920 }
4921 dbfull()->TEST_WaitForCompact();
4922 uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
4923 ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
4924
4925 // Test level0_stop_writes_trigger.
4926 // Clean up memtable and L0. Block compaction threads. If continue to write
4927 // and flush memtables. We should see put stop after 8 memtable flushes
4928 // since level0_stop_writes_trigger = 8
11fdf7f2 4929 dbfull()->TEST_FlushMemTable(true, true);
7c673cae
FG
4930 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4931 // Block compaction
4932 test::SleepingBackgroundTask sleeping_task_low;
4933 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4934 Env::Priority::LOW);
4935 sleeping_task_low.WaitUntilSleeping();
4936 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4937 int count = 0;
4938 Random rnd(301);
4939 WriteOptions wo;
4940 while (count < 64) {
20effc67 4941 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo));
11fdf7f2 4942 dbfull()->TEST_FlushMemTable(true, true);
7c673cae
FG
4943 count++;
4944 if (dbfull()->TEST_write_controler().IsStopped()) {
4945 sleeping_task_low.WakeUp();
4946 break;
4947 }
4948 }
4949 // Stop trigger = 8
4950 ASSERT_EQ(count, 8);
4951 // Unblock
4952 sleeping_task_low.WaitUntilDone();
4953
4954 // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
4955 // Block compaction thread again. Perform the put and memtable flushes
4956 // until we see the stop after 6 memtable flushes.
4957 ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}}));
4958 dbfull()->TEST_FlushMemTable(true);
4959 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4960 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4961
4962 // Block compaction again
4963 sleeping_task_low.Reset();
4964 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
4965 Env::Priority::LOW);
4966 sleeping_task_low.WaitUntilSleeping();
4967 count = 0;
4968 while (count < 64) {
20effc67 4969 ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo));
11fdf7f2 4970 dbfull()->TEST_FlushMemTable(true, true);
7c673cae
FG
4971 count++;
4972 if (dbfull()->TEST_write_controler().IsStopped()) {
4973 sleeping_task_low.WakeUp();
4974 break;
4975 }
4976 }
4977 ASSERT_EQ(count, 6);
4978 // Unblock
4979 sleeping_task_low.WaitUntilDone();
4980
4981 // Test disable_auto_compactions
4982 // Compaction thread is unblocked but auto compaction is disabled. Write
4983 // 4 L0 files and compaction should be triggered. If auto compaction is
4984 // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
4985 // L0 files do not change after the call.
4986 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
4987 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4988 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
4989
4990 for (int i = 0; i < 4; ++i) {
20effc67 4991 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
7c673cae
FG
4992 // Wait for compaction so that put won't stop
4993 dbfull()->TEST_FlushMemTable(true);
4994 }
4995 dbfull()->TEST_WaitForCompact();
4996 ASSERT_EQ(NumTableFilesAtLevel(0), 4);
4997
4998 // Enable auto compaction and perform the same test, # of L0 files should be
4999 // reduced after compaction.
5000 ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
5001 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
5002 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5003
5004 for (int i = 0; i < 4; ++i) {
20effc67 5005 ASSERT_OK(Put(Key(i), rnd.RandomString(1024)));
7c673cae
FG
5006 // Wait for compaction so that put won't stop
5007 dbfull()->TEST_FlushMemTable(true);
5008 }
5009 dbfull()->TEST_WaitForCompact();
5010 ASSERT_LT(NumTableFilesAtLevel(0), 4);
5011}
11fdf7f2 5012
494da23a 5013// Test dynamic FIFO compaction options.
11fdf7f2
TL
5014// This test covers just option parsing and makes sure that the options are
5015// correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions
5016// test which makes sure that the FIFO compaction funcionality is working
5017// as expected on dynamically changing the options.
5018// Even more FIFOCompactionTests are at DBTest.FIFOCompaction* .
5019TEST_F(DBTest, DynamicFIFOCompactionOptions) {
5020 Options options;
f67539c2 5021 options.ttl = 0;
11fdf7f2 5022 options.create_if_missing = true;
20effc67 5023 options.env = env_;
11fdf7f2
TL
5024 DestroyAndReopen(options);
5025
5026 // Initial defaults
5027 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5028 1024 * 1024 * 1024);
494da23a 5029 ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
11fdf7f2
TL
5030 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5031 false);
5032
5033 ASSERT_OK(dbfull()->SetOptions(
5034 {{"compaction_options_fifo", "{max_table_files_size=23;}"}}));
5035 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5036 23);
494da23a 5037 ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
11fdf7f2
TL
5038 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5039 false);
5040
494da23a 5041 ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}}));
11fdf7f2
TL
5042 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5043 23);
494da23a 5044 ASSERT_EQ(dbfull()->GetOptions().ttl, 97);
11fdf7f2
TL
5045 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5046 false);
5047
494da23a 5048 ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}}));
11fdf7f2
TL
5049 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5050 23);
494da23a 5051 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
11fdf7f2
TL
5052 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5053 false);
5054
5055 ASSERT_OK(dbfull()->SetOptions(
5056 {{"compaction_options_fifo", "{allow_compaction=true;}"}}));
5057 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5058 23);
494da23a 5059 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
11fdf7f2
TL
5060 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5061 true);
5062
5063 ASSERT_OK(dbfull()->SetOptions(
494da23a 5064 {{"compaction_options_fifo", "{max_table_files_size=31;}"}}));
11fdf7f2
TL
5065 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5066 31);
494da23a 5067 ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
11fdf7f2
TL
5068 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5069 true);
5070
5071 ASSERT_OK(dbfull()->SetOptions(
5072 {{"compaction_options_fifo",
494da23a
TL
5073 "{max_table_files_size=51;allow_compaction=true;}"}}));
5074 ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}}));
11fdf7f2
TL
5075 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
5076 51);
494da23a 5077 ASSERT_EQ(dbfull()->GetOptions().ttl, 49);
11fdf7f2
TL
5078 ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
5079 true);
5080}
5081
5082TEST_F(DBTest, DynamicUniversalCompactionOptions) {
5083 Options options;
5084 options.create_if_missing = true;
20effc67 5085 options.env = env_;
11fdf7f2
TL
5086 DestroyAndReopen(options);
5087
5088 // Initial defaults
f67539c2 5089 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 1U);
11fdf7f2 5090 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
f67539c2 5091 2u);
11fdf7f2
TL
5092 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5093 UINT_MAX);
5094 ASSERT_EQ(dbfull()
5095 ->GetOptions()
5096 .compaction_options_universal.max_size_amplification_percent,
f67539c2 5097 200u);
11fdf7f2
TL
5098 ASSERT_EQ(dbfull()
5099 ->GetOptions()
5100 .compaction_options_universal.compression_size_percent,
5101 -1);
5102 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5103 kCompactionStopStyleTotalSize);
5104 ASSERT_EQ(
5105 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5106 false);
5107
5108 ASSERT_OK(dbfull()->SetOptions(
5109 {{"compaction_options_universal", "{size_ratio=7;}"}}));
f67539c2 5110 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
11fdf7f2 5111 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
f67539c2 5112 2u);
11fdf7f2
TL
5113 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5114 UINT_MAX);
5115 ASSERT_EQ(dbfull()
5116 ->GetOptions()
5117 .compaction_options_universal.max_size_amplification_percent,
f67539c2 5118 200u);
11fdf7f2
TL
5119 ASSERT_EQ(dbfull()
5120 ->GetOptions()
5121 .compaction_options_universal.compression_size_percent,
5122 -1);
5123 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5124 kCompactionStopStyleTotalSize);
5125 ASSERT_EQ(
5126 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5127 false);
5128
5129 ASSERT_OK(dbfull()->SetOptions(
5130 {{"compaction_options_universal", "{min_merge_width=11;}"}}));
f67539c2 5131 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
11fdf7f2 5132 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
f67539c2 5133 11u);
11fdf7f2
TL
5134 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
5135 UINT_MAX);
5136 ASSERT_EQ(dbfull()
5137 ->GetOptions()
5138 .compaction_options_universal.max_size_amplification_percent,
f67539c2 5139 200u);
11fdf7f2
TL
5140 ASSERT_EQ(dbfull()
5141 ->GetOptions()
5142 .compaction_options_universal.compression_size_percent,
5143 -1);
5144 ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
5145 kCompactionStopStyleTotalSize);
5146 ASSERT_EQ(
5147 dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
5148 false);
5149}
7c673cae
FG
5150#endif // ROCKSDB_LITE
5151
5152TEST_F(DBTest, FileCreationRandomFailure) {
5153 Options options;
5154 options.env = env_;
5155 options.create_if_missing = true;
5156 options.write_buffer_size = 100000; // Small write buffer
5157 options.target_file_size_base = 200000;
5158 options.max_bytes_for_level_base = 1000000;
5159 options.max_bytes_for_level_multiplier = 2;
5160
5161 DestroyAndReopen(options);
5162 Random rnd(301);
5163
20effc67
TL
5164 constexpr int kCDTKeysPerBuffer = 4;
5165 constexpr int kTestSize = kCDTKeysPerBuffer * 4096;
5166 constexpr int kTotalIteration = 20;
7c673cae
FG
5167 // the second half of the test involves in random failure
5168 // of file creation.
20effc67
TL
5169 constexpr int kRandomFailureTest = kTotalIteration / 2;
5170
7c673cae
FG
5171 std::vector<std::string> values;
5172 for (int i = 0; i < kTestSize; ++i) {
5173 values.push_back("NOT_FOUND");
5174 }
5175 for (int j = 0; j < kTotalIteration; ++j) {
5176 if (j == kRandomFailureTest) {
5177 env_->non_writeable_rate_.store(90);
5178 }
5179 for (int k = 0; k < kTestSize; ++k) {
5180 // here we expect some of the Put fails.
20effc67 5181 std::string value = rnd.RandomString(100);
7c673cae
FG
5182 Status s = Put(Key(k), Slice(value));
5183 if (s.ok()) {
5184 // update the latest successful put
5185 values[k] = value;
5186 }
5187 // But everything before we simulate the failure-test should succeed.
5188 if (j < kRandomFailureTest) {
5189 ASSERT_OK(s);
5190 }
5191 }
5192 }
5193
5194 // If rocksdb does not do the correct job, internal assert will fail here.
5195 dbfull()->TEST_WaitForFlushMemTable();
5196 dbfull()->TEST_WaitForCompact();
5197
5198 // verify we have the latest successful update
5199 for (int k = 0; k < kTestSize; ++k) {
5200 auto v = Get(Key(k));
5201 ASSERT_EQ(v, values[k]);
5202 }
5203
5204 // reopen and reverify we have the latest successful update
5205 env_->non_writeable_rate_.store(0);
5206 Reopen(options);
5207 for (int k = 0; k < kTestSize; ++k) {
5208 auto v = Get(Key(k));
5209 ASSERT_EQ(v, values[k]);
5210 }
5211}
5212
5213#ifndef ROCKSDB_LITE
11fdf7f2 5214
7c673cae
FG
5215TEST_F(DBTest, DynamicMiscOptions) {
5216 // Test max_sequential_skip_in_iterations
5217 Options options;
5218 options.env = env_;
5219 options.create_if_missing = true;
5220 options.max_sequential_skip_in_iterations = 16;
5221 options.compression = kNoCompression;
f67539c2 5222 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
5223 DestroyAndReopen(options);
5224
5225 auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
5226 int key0 = key_start;
5227 int key1 = key_start + 1;
5228 int key2 = key_start + 2;
5229 Random rnd(301);
20effc67 5230 ASSERT_OK(Put(Key(key0), rnd.RandomString(8)));
7c673cae 5231 for (int i = 0; i < 10; ++i) {
20effc67 5232 ASSERT_OK(Put(Key(key1), rnd.RandomString(8)));
7c673cae 5233 }
20effc67 5234 ASSERT_OK(Put(Key(key2), rnd.RandomString(8)));
7c673cae
FG
5235 std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
5236 iter->Seek(Key(key1));
5237 ASSERT_TRUE(iter->Valid());
5238 ASSERT_EQ(iter->key().compare(Key(key1)), 0);
5239 iter->Next();
5240 ASSERT_TRUE(iter->Valid());
5241 ASSERT_EQ(iter->key().compare(Key(key2)), 0);
5242 ASSERT_EQ(num_reseek,
5243 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
5244 };
5245 // No reseek
5246 assert_reseek_count(100, 0);
5247
5248 ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}}));
5249 // Clear memtable and make new option effective
5250 dbfull()->TEST_FlushMemTable(true);
5251 // Trigger reseek
5252 assert_reseek_count(200, 1);
5253
5254 ASSERT_OK(
5255 dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}}));
5256 // Clear memtable and make new option effective
5257 dbfull()->TEST_FlushMemTable(true);
5258 // No reseek
5259 assert_reseek_count(300, 1);
5260
5261 MutableCFOptions mutable_cf_options;
5262 CreateAndReopenWithCF({"pikachu"}, options);
5263 // Test soft_pending_compaction_bytes_limit,
5264 // hard_pending_compaction_bytes_limit
5265 ASSERT_OK(dbfull()->SetOptions(
5266 handles_[1], {{"soft_pending_compaction_bytes_limit", "200"},
5267 {"hard_pending_compaction_bytes_limit", "300"}}));
5268 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5269 &mutable_cf_options));
5270 ASSERT_EQ(200, mutable_cf_options.soft_pending_compaction_bytes_limit);
5271 ASSERT_EQ(300, mutable_cf_options.hard_pending_compaction_bytes_limit);
5272 // Test report_bg_io_stats
5273 ASSERT_OK(
5274 dbfull()->SetOptions(handles_[1], {{"report_bg_io_stats", "true"}}));
5275 // sanity check
5276 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5277 &mutable_cf_options));
5278 ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
5279 // Test compression
5280 // sanity check
5281 ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}}));
5282 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
5283 &mutable_cf_options));
5284 ASSERT_EQ(CompressionType::kNoCompression, mutable_cf_options.compression);
f67539c2
TL
5285
5286 if (Snappy_Supported()) {
5287 ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}}));
5288 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
5289 &mutable_cf_options));
5290 ASSERT_EQ(CompressionType::kSnappyCompression,
5291 mutable_cf_options.compression);
5292 }
5293
7c673cae
FG
5294 // Test paranoid_file_checks already done in db_block_cache_test
5295 ASSERT_OK(
5296 dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "true"}}));
5297 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5298 &mutable_cf_options));
5299 ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
20effc67
TL
5300 ASSERT_TRUE(mutable_cf_options.check_flush_compaction_key_order);
5301
5302 ASSERT_OK(dbfull()->SetOptions(
5303 handles_[1], {{"check_flush_compaction_key_order", "false"}}));
5304 ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
5305 &mutable_cf_options));
5306 ASSERT_FALSE(mutable_cf_options.check_flush_compaction_key_order);
7c673cae
FG
5307}
5308#endif // ROCKSDB_LITE
5309
5310TEST_F(DBTest, L0L1L2AndUpHitCounter) {
5311 Options options = CurrentOptions();
5312 options.write_buffer_size = 32 * 1024;
5313 options.target_file_size_base = 32 * 1024;
5314 options.level0_file_num_compaction_trigger = 2;
5315 options.level0_slowdown_writes_trigger = 2;
5316 options.level0_stop_writes_trigger = 4;
5317 options.max_bytes_for_level_base = 64 * 1024;
5318 options.max_write_buffer_number = 2;
5319 options.max_background_compactions = 8;
5320 options.max_background_flushes = 8;
f67539c2 5321 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
5322 CreateAndReopenWithCF({"mypikachu"}, options);
5323
5324 int numkeys = 20000;
5325 for (int i = 0; i < numkeys; i++) {
5326 ASSERT_OK(Put(1, Key(i), "val"));
5327 }
5328 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
5329 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
5330 ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
5331
5332 ASSERT_OK(Flush(1));
5333 dbfull()->TEST_WaitForCompact();
5334
5335 for (int i = 0; i < numkeys; i++) {
5336 ASSERT_EQ(Get(1, Key(i)), "val");
5337 }
5338
5339 ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
5340 ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
5341 ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);
5342
5343 ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
5344 TestGetTickerCount(options, GET_HIT_L1) +
5345 TestGetTickerCount(options, GET_HIT_L2_AND_UP));
5346}
5347
5348TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
5349 // iter 0 -- zlib
5350 // iter 1 -- bzip2
5351 // iter 2 -- lz4
5352 // iter 3 -- lz4HC
5353 // iter 4 -- xpress
5354 CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
5355 kLZ4Compression, kLZ4HCCompression,
5356 kXpressCompression};
5357 for (auto comp : compressions) {
5358 if (!CompressionTypeSupported(comp)) {
5359 continue;
5360 }
5361 // first_table_version 1 -- generate with table_version == 1, read with
5362 // table_version == 2
5363 // first_table_version 2 -- generate with table_version == 2, read with
5364 // table_version == 1
5365 for (int first_table_version = 1; first_table_version <= 2;
5366 ++first_table_version) {
5367 BlockBasedTableOptions table_options;
5368 table_options.format_version = first_table_version;
5369 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
5370 Options options = CurrentOptions();
5371 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5372 options.create_if_missing = true;
5373 options.compression = comp;
5374 DestroyAndReopen(options);
5375
11fdf7f2 5376 int kNumKeysWritten = 1000;
7c673cae
FG
5377
5378 Random rnd(301);
5379 for (int i = 0; i < kNumKeysWritten; ++i) {
5380 // compressible string
20effc67 5381 ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a')));
7c673cae
FG
5382 }
5383
5384 table_options.format_version = first_table_version == 1 ? 2 : 1;
5385 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5386 Reopen(options);
5387 for (int i = 0; i < kNumKeysWritten; ++i) {
5388 auto r = Get(Key(i));
5389 ASSERT_EQ(r.substr(128), std::string(128, 'a'));
5390 }
5391 }
5392 }
5393}
5394
5395TEST_F(DBTest, CloseSpeedup) {
5396 Options options = CurrentOptions();
5397 options.compaction_style = kCompactionStyleLevel;
5398 options.write_buffer_size = 110 << 10; // 110KB
5399 options.arena_block_size = 4 << 10;
5400 options.level0_file_num_compaction_trigger = 2;
5401 options.num_levels = 4;
5402 options.max_bytes_for_level_base = 400 * 1024;
5403 options.max_write_buffer_number = 16;
5404
5405 // Block background threads
5406 env_->SetBackgroundThreads(1, Env::LOW);
5407 env_->SetBackgroundThreads(1, Env::HIGH);
5408 test::SleepingBackgroundTask sleeping_task_low;
5409 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5410 Env::Priority::LOW);
5411 test::SleepingBackgroundTask sleeping_task_high;
5412 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
5413 &sleeping_task_high, Env::Priority::HIGH);
5414
5415 std::vector<std::string> filenames;
5416 env_->GetChildren(dbname_, &filenames);
5417 // Delete archival files.
5418 for (size_t i = 0; i < filenames.size(); ++i) {
5419 env_->DeleteFile(dbname_ + "/" + filenames[i]);
5420 }
5421 env_->DeleteDir(dbname_);
5422 DestroyAndReopen(options);
5423
f67539c2 5424 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
5425 env_->SetBackgroundThreads(1, Env::LOW);
5426 env_->SetBackgroundThreads(1, Env::HIGH);
5427 Random rnd(301);
5428 int key_idx = 0;
5429
5430 // First three 110KB files are not going to level 2
5431 // After that, (100K, 200K)
5432 for (int num = 0; num < 5; num++) {
5433 GenerateNewFile(&rnd, &key_idx, true);
5434 }
5435
5436 ASSERT_EQ(0, GetSstFileCount(dbname_));
5437
5438 Close();
5439 ASSERT_EQ(0, GetSstFileCount(dbname_));
5440
5441 // Unblock background threads
5442 sleeping_task_high.WakeUp();
5443 sleeping_task_high.WaitUntilDone();
5444 sleeping_task_low.WakeUp();
5445 sleeping_task_low.WaitUntilDone();
5446
5447 Destroy(options);
5448}
5449
5450class DelayedMergeOperator : public MergeOperator {
5451 private:
5452 DBTest* db_test_;
5453
5454 public:
5455 explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
5456
20effc67 5457 bool FullMergeV2(const MergeOperationInput& merge_in,
494da23a 5458 MergeOperationOutput* merge_out) const override {
20effc67
TL
5459 db_test_->env_->MockSleepForMicroseconds(1000 *
5460 merge_in.operand_list.size());
7c673cae
FG
5461 merge_out->new_value = "";
5462 return true;
5463 }
5464
494da23a 5465 const char* Name() const override { return "DelayedMergeOperator"; }
7c673cae
FG
5466};
5467
5468TEST_F(DBTest, MergeTestTime) {
5469 std::string one, two, three;
5470 PutFixed64(&one, 1);
5471 PutFixed64(&two, 2);
5472 PutFixed64(&three, 3);
5473
5474 // Enable time profiling
5475 SetPerfLevel(kEnableTime);
7c673cae 5476 Options options = CurrentOptions();
f67539c2 5477 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae 5478 options.merge_operator.reset(new DelayedMergeOperator(this));
20effc67 5479 SetTimeElapseOnlySleepOnReopen(&options);
7c673cae
FG
5480 DestroyAndReopen(options);
5481
20effc67
TL
5482 // NOTE: Presumed unnecessary and removed: resetting mock time in env
5483
7c673cae
FG
5484 ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
5485 db_->Put(WriteOptions(), "foo", one);
5486 ASSERT_OK(Flush());
5487 ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
5488 ASSERT_OK(Flush());
5489 ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
5490 ASSERT_OK(Flush());
5491
5492 ReadOptions opt;
5493 opt.verify_checksums = true;
5494 opt.snapshot = nullptr;
5495 std::string result;
5496 db_->Get(opt, "foo", &result);
5497
20effc67 5498 ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
7c673cae
FG
5499
5500 ReadOptions read_options;
5501 std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
5502 int count = 0;
5503 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
5504 ASSERT_OK(iter->status());
5505 ++count;
5506 }
5507
5508 ASSERT_EQ(1, count);
20effc67 5509 ASSERT_EQ(4000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
7c673cae
FG
5510#ifdef ROCKSDB_USING_THREAD_STATUS
5511 ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0);
5512#endif // ROCKSDB_USING_THREAD_STATUS
7c673cae
FG
5513}
5514
5515#ifndef ROCKSDB_LITE
5516TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
5517 SetPerfLevel(kEnableTime);
5518 Options options = CurrentOptions();
5519 options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
f67539c2 5520 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae 5521 options.merge_operator.reset(new DelayedMergeOperator(this));
20effc67 5522 options.disable_auto_compactions = true;
7c673cae 5523 options.max_subcompactions = max_subcompactions_;
20effc67 5524 SetTimeElapseOnlySleepOnReopen(&options);
7c673cae
FG
5525 DestroyAndReopen(options);
5526
20effc67
TL
5527 constexpr unsigned n = 1000;
5528 for (unsigned i = 0; i < n; i++) {
7c673cae
FG
5529 ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
5530 ASSERT_OK(Flush());
5531 }
5532 dbfull()->TEST_WaitForFlushMemTable();
7c673cae 5533
20effc67
TL
5534 CompactRangeOptions cro;
5535 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
5536 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
5537
5538 ASSERT_EQ(uint64_t{n} * 1000000U,
5539 TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
7c673cae
FG
5540}
5541
5542TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
5543 Options options = CurrentOptions();
5544 options.compaction_filter_factory =
5545 std::make_shared<DelayFilterFactory>(this);
5546 options.disable_auto_compactions = true;
5547 options.create_if_missing = true;
f67539c2 5548 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
494da23a 5549 options.statistics->set_stats_level(kExceptTimeForMutex);
7c673cae 5550 options.max_subcompactions = max_subcompactions_;
20effc67 5551 SetTimeElapseOnlySleepOnReopen(&options);
7c673cae
FG
5552 DestroyAndReopen(options);
5553
20effc67 5554 unsigned n = 0;
7c673cae
FG
5555 // put some data
5556 for (int table = 0; table < 4; ++table) {
5557 for (int i = 0; i < 10 + table; ++i) {
5558 Put(ToString(table * 100 + i), "val");
20effc67 5559 ++n;
7c673cae
FG
5560 }
5561 Flush();
5562 }
5563
5564 CompactRangeOptions cro;
5565 cro.exclusive_manual_compaction = exclusive_manual_compaction_;
5566 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
5567 ASSERT_EQ(0U, CountLiveFiles());
5568
5569 Reopen(options);
5570
5571 Iterator* itr = db_->NewIterator(ReadOptions());
5572 itr->SeekToFirst();
20effc67
TL
5573 ASSERT_EQ(uint64_t{n} * 1000000U,
5574 TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME));
7c673cae
FG
5575 delete itr;
5576}
5577#endif // ROCKSDB_LITE
5578
5579TEST_F(DBTest, TestLogCleanup) {
5580 Options options = CurrentOptions();
5581 options.write_buffer_size = 64 * 1024; // very small
5582 // only two memtables allowed ==> only two log files
5583 options.max_write_buffer_number = 2;
5584 Reopen(options);
5585
5586 for (int i = 0; i < 100000; ++i) {
5587 Put(Key(i), "val");
5588 // only 2 memtables will be alive, so logs_to_free needs to always be below
5589 // 2
5590 ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
5591 }
5592}
5593
5594#ifndef ROCKSDB_LITE
5595TEST_F(DBTest, EmptyCompactedDB) {
5596 Options options = CurrentOptions();
5597 options.max_open_files = -1;
5598 Close();
5599 ASSERT_OK(ReadOnlyReopen(options));
5600 Status s = Put("new", "value");
5601 ASSERT_TRUE(s.IsNotSupported());
5602 Close();
5603}
5604#endif // ROCKSDB_LITE
5605
5606#ifndef ROCKSDB_LITE
20effc67 5607TEST_F(DBTest, DISABLED_SuggestCompactRangeTest) {
7c673cae
FG
5608 class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
5609 public:
494da23a 5610 std::unique_ptr<CompactionFilter> CreateCompactionFilter(
7c673cae
FG
5611 const CompactionFilter::Context& context) override {
5612 saved_context = context;
5613 std::unique_ptr<CompactionFilter> empty_filter;
5614 return empty_filter;
5615 }
5616 const char* Name() const override {
5617 return "CompactionFilterFactoryGetContext";
5618 }
5619 static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
5620 return reinterpret_cast<CompactionFilterFactoryGetContext*>(
5621 compaction_filter_factory)
5622 ->saved_context.is_manual_compaction;
5623 }
5624 CompactionFilter::Context saved_context;
5625 };
5626
5627 Options options = CurrentOptions();
5628 options.memtable_factory.reset(
5629 new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
5630 options.compaction_style = kCompactionStyleLevel;
5631 options.compaction_filter_factory.reset(
5632 new CompactionFilterFactoryGetContext());
5633 options.write_buffer_size = 200 << 10;
5634 options.arena_block_size = 4 << 10;
5635 options.level0_file_num_compaction_trigger = 4;
5636 options.num_levels = 4;
5637 options.compression = kNoCompression;
5638 options.max_bytes_for_level_base = 450 << 10;
5639 options.target_file_size_base = 98 << 10;
5640 options.max_compaction_bytes = static_cast<uint64_t>(1) << 60; // inf
5641
5642 Reopen(options);
5643
5644 Random rnd(301);
5645
5646 for (int num = 0; num < 3; num++) {
5647 GenerateNewRandomFile(&rnd);
5648 }
5649
5650 GenerateNewRandomFile(&rnd);
5651 ASSERT_EQ("0,4", FilesPerLevel(0));
5652 ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
5653 options.compaction_filter_factory.get()));
5654
5655 GenerateNewRandomFile(&rnd);
5656 ASSERT_EQ("1,4", FilesPerLevel(0));
5657
5658 GenerateNewRandomFile(&rnd);
5659 ASSERT_EQ("2,4", FilesPerLevel(0));
5660
5661 GenerateNewRandomFile(&rnd);
5662 ASSERT_EQ("3,4", FilesPerLevel(0));
5663
5664 GenerateNewRandomFile(&rnd);
5665 ASSERT_EQ("0,4,4", FilesPerLevel(0));
5666
5667 GenerateNewRandomFile(&rnd);
5668 ASSERT_EQ("1,4,4", FilesPerLevel(0));
5669
5670 GenerateNewRandomFile(&rnd);
5671 ASSERT_EQ("2,4,4", FilesPerLevel(0));
5672
5673 GenerateNewRandomFile(&rnd);
5674 ASSERT_EQ("3,4,4", FilesPerLevel(0));
5675
5676 GenerateNewRandomFile(&rnd);
5677 ASSERT_EQ("0,4,8", FilesPerLevel(0));
5678
5679 GenerateNewRandomFile(&rnd);
5680 ASSERT_EQ("1,4,8", FilesPerLevel(0));
5681
5682 // compact it three times
5683 for (int i = 0; i < 3; ++i) {
5684 ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
5685 dbfull()->TEST_WaitForCompact();
5686 }
5687
5688 // All files are compacted
5689 ASSERT_EQ(0, NumTableFilesAtLevel(0));
5690 ASSERT_EQ(0, NumTableFilesAtLevel(1));
5691
5692 GenerateNewRandomFile(&rnd);
5693 ASSERT_EQ(1, NumTableFilesAtLevel(0));
5694
5695 // nonoverlapping with the file on level 0
5696 Slice start("a"), end("b");
5697 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
5698 dbfull()->TEST_WaitForCompact();
5699
5700 // should not compact the level 0 file
5701 ASSERT_EQ(1, NumTableFilesAtLevel(0));
5702
5703 start = Slice("j");
5704 end = Slice("m");
5705 ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
5706 dbfull()->TEST_WaitForCompact();
5707 ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
5708 options.compaction_filter_factory.get()));
5709
5710 // now it should compact the level 0 file
5711 ASSERT_EQ(0, NumTableFilesAtLevel(0));
5712 ASSERT_EQ(1, NumTableFilesAtLevel(1));
5713}
5714
20effc67 5715
7c673cae
FG
5716TEST_F(DBTest, PromoteL0) {
5717 Options options = CurrentOptions();
5718 options.disable_auto_compactions = true;
5719 options.write_buffer_size = 10 * 1024 * 1024;
5720 DestroyAndReopen(options);
5721
5722 // non overlapping ranges
5723 std::vector<std::pair<int32_t, int32_t>> ranges = {
5724 {81, 160}, {0, 80}, {161, 240}, {241, 320}};
5725
5726 int32_t value_size = 10 * 1024; // 10 KB
5727
5728 Random rnd(301);
5729 std::map<int32_t, std::string> values;
5730 for (const auto& range : ranges) {
5731 for (int32_t j = range.first; j < range.second; j++) {
20effc67 5732 values[j] = rnd.RandomString(value_size);
7c673cae
FG
5733 ASSERT_OK(Put(Key(j), values[j]));
5734 }
5735 ASSERT_OK(Flush());
5736 }
5737
5738 int32_t level0_files = NumTableFilesAtLevel(0, 0);
5739 ASSERT_EQ(level0_files, ranges.size());
5740 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1
5741
5742 // Promote L0 level to L2.
5743 ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
5744 // We expect that all the files were trivially moved from L0 to L2
5745 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
5746 ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
5747
5748 for (const auto& kv : values) {
5749 ASSERT_EQ(Get(Key(kv.first)), kv.second);
5750 }
5751}
5752
5753TEST_F(DBTest, PromoteL0Failure) {
5754 Options options = CurrentOptions();
5755 options.disable_auto_compactions = true;
5756 options.write_buffer_size = 10 * 1024 * 1024;
5757 DestroyAndReopen(options);
5758
5759 // Produce two L0 files with overlapping ranges.
5760 ASSERT_OK(Put(Key(0), ""));
5761 ASSERT_OK(Put(Key(3), ""));
5762 ASSERT_OK(Flush());
5763 ASSERT_OK(Put(Key(1), ""));
5764 ASSERT_OK(Flush());
5765
5766 Status status;
5767 // Fails because L0 has overlapping files.
5768 status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
5769 ASSERT_TRUE(status.IsInvalidArgument());
5770
5771 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5772 // Now there is a file in L1.
5773 ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
5774
5775 ASSERT_OK(Put(Key(5), ""));
5776 ASSERT_OK(Flush());
5777 // Fails because L1 is non-empty.
5778 status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
5779 ASSERT_TRUE(status.IsInvalidArgument());
5780}
7c673cae
FG
5781
5782// Github issue #596
11fdf7f2
TL
5783TEST_F(DBTest, CompactRangeWithEmptyBottomLevel) {
5784 const int kNumLevels = 2;
5785 const int kNumL0Files = 2;
7c673cae 5786 Options options = CurrentOptions();
11fdf7f2
TL
5787 options.disable_auto_compactions = true;
5788 options.num_levels = kNumLevels;
7c673cae
FG
5789 DestroyAndReopen(options);
5790
5791 Random rnd(301);
11fdf7f2 5792 for (int i = 0; i < kNumL0Files; ++i) {
20effc67 5793 ASSERT_OK(Put(Key(0), rnd.RandomString(1024)));
11fdf7f2 5794 Flush();
7c673cae 5795 }
11fdf7f2
TL
5796 ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files);
5797 ASSERT_EQ(NumTableFilesAtLevel(1), 0);
7c673cae
FG
5798
5799 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
11fdf7f2
TL
5800 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5801 ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files);
7c673cae 5802}
11fdf7f2 5803#endif // ROCKSDB_LITE
7c673cae
FG
5804
5805TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
11fdf7f2 5806 const int kNumL0Files = 50;
7c673cae 5807 Options options = CurrentOptions();
11fdf7f2
TL
5808 options.level0_file_num_compaction_trigger = 4;
5809 // never slowdown / stop
5810 options.level0_slowdown_writes_trigger = 999999;
5811 options.level0_stop_writes_trigger = 999999;
7c673cae 5812 options.max_background_compactions = 10;
7c673cae
FG
5813 DestroyAndReopen(options);
5814
11fdf7f2
TL
5815 // schedule automatic compactions after the manual one starts, but before it
5816 // finishes to ensure conflict.
f67539c2 5817 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
11fdf7f2
TL
5818 {{"DBImpl::BackgroundCompaction:Start",
5819 "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"},
5820 {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts",
5821 "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
7c673cae 5822 std::atomic<int> callback_count(0);
f67539c2 5823 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2
TL
5824 "DBImpl::MaybeScheduleFlushOrCompaction:Conflict",
5825 [&](void* /*arg*/) { callback_count.fetch_add(1); });
f67539c2 5826 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
11fdf7f2
TL
5827
5828 Random rnd(301);
5829 for (int i = 0; i < 2; ++i) {
5830 // put two keys to ensure no trivial move
5831 for (int j = 0; j < 2; ++j) {
20effc67 5832 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
11fdf7f2
TL
5833 }
5834 ASSERT_OK(Flush());
5835 }
5836 port::Thread manual_compaction_thread([this]() {
5837 CompactRangeOptions croptions;
5838 croptions.exclusive_manual_compaction = true;
5839 ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
5840 });
5841
5842 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts");
5843 for (int i = 0; i < kNumL0Files; ++i) {
5844 // put two keys to ensure no trivial move
5845 for (int j = 0; j < 2; ++j) {
20effc67 5846 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
11fdf7f2
TL
5847 }
5848 ASSERT_OK(Flush());
5849 }
5850 TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts");
5851
7c673cae 5852 ASSERT_GE(callback_count.load(), 1);
11fdf7f2 5853 for (int i = 0; i < 2; ++i) {
7c673cae
FG
5854 ASSERT_NE("NOT_FOUND", Get(Key(i)));
5855 }
f67539c2 5856 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
11fdf7f2
TL
5857 manual_compaction_thread.join();
5858 dbfull()->TEST_WaitForCompact();
7c673cae
FG
5859}
5860
494da23a
TL
5861#ifndef ROCKSDB_LITE
5862TEST_F(DBTest, CompactFilesShouldTriggerAutoCompaction) {
5863 Options options = CurrentOptions();
5864 options.max_background_compactions = 1;
5865 options.level0_file_num_compaction_trigger = 4;
5866 options.level0_slowdown_writes_trigger = 36;
5867 options.level0_stop_writes_trigger = 36;
5868 DestroyAndReopen(options);
5869
5870 // generate files for manual compaction
5871 Random rnd(301);
5872 for (int i = 0; i < 2; ++i) {
5873 // put two keys to ensure no trivial move
5874 for (int j = 0; j < 2; ++j) {
20effc67 5875 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
494da23a
TL
5876 }
5877 ASSERT_OK(Flush());
5878 }
5879
f67539c2 5880 ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data;
494da23a
TL
5881 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
5882
5883 std::vector<std::string> input_files;
5884 input_files.push_back(cf_meta_data.levels[0].files[0].name);
5885
5886 SyncPoint::GetInstance()->LoadDependency({
5887 {"CompactFilesImpl:0",
5888 "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"},
5889 {"DBTest::CompactFilesShouldTriggerAutoCompaction:End",
5890 "CompactFilesImpl:1"},
5891 });
5892
5893 SyncPoint::GetInstance()->EnableProcessing();
5894
5895 port::Thread manual_compaction_thread([&]() {
5896 auto s = db_->CompactFiles(CompactionOptions(),
5897 db_->DefaultColumnFamily(), input_files, 0);
5898 });
5899
5900 TEST_SYNC_POINT(
5901 "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin");
5902 // generate enough files to trigger compaction
5903 for (int i = 0; i < 20; ++i) {
5904 for (int j = 0; j < 2; ++j) {
20effc67 5905 ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
494da23a
TL
5906 }
5907 ASSERT_OK(Flush());
5908 }
5909 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
5910 ASSERT_GT(cf_meta_data.levels[0].files.size(),
5911 options.level0_file_num_compaction_trigger);
5912 TEST_SYNC_POINT(
5913 "DBTest::CompactFilesShouldTriggerAutoCompaction:End");
5914
5915 manual_compaction_thread.join();
5916 dbfull()->TEST_WaitForCompact();
5917
5918 db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
5919 ASSERT_LE(cf_meta_data.levels[0].files.size(),
5920 options.level0_file_num_compaction_trigger);
5921}
5922#endif // ROCKSDB_LITE
5923
7c673cae
FG
5924// Github issue #595
5925// Large write batch with column families
5926TEST_F(DBTest, LargeBatchWithColumnFamilies) {
5927 Options options = CurrentOptions();
5928 options.env = env_;
5929 options.write_buffer_size = 100000; // Small write buffer
5930 CreateAndReopenWithCF({"pikachu"}, options);
5931 int64_t j = 0;
5932 for (int i = 0; i < 5; i++) {
5933 for (int pass = 1; pass <= 3; pass++) {
5934 WriteBatch batch;
5935 size_t write_size = 1024 * 1024 * (5 + i);
5936 fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n",
5937 (write_size / 1024 / 1024), pass);
5938 for (;;) {
5939 std::string data(3000, j++ % 127 + 20);
5940 data += ToString(j);
5941 batch.Put(handles_[0], Slice(data), Slice(data));
5942 if (batch.GetDataSize() > write_size) {
5943 break;
5944 }
5945 }
5946 fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n",
5947 (batch.GetDataSize() / 1024 / 1024));
5948 ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
5949 fprintf(stderr, "done\n");
5950 }
5951 }
5952 // make sure we can re-open it.
5953 ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
5954}
5955
5956// Make sure that Flushes can proceed in parallel with CompactRange()
5957TEST_F(DBTest, FlushesInParallelWithCompactRange) {
5958 // iter == 0 -- leveled
5959 // iter == 1 -- leveled, but throw in a flush between two levels compacting
5960 // iter == 2 -- universal
5961 for (int iter = 0; iter < 3; ++iter) {
5962 Options options = CurrentOptions();
5963 if (iter < 2) {
5964 options.compaction_style = kCompactionStyleLevel;
5965 } else {
5966 options.compaction_style = kCompactionStyleUniversal;
5967 }
5968 options.write_buffer_size = 110 << 10;
5969 options.level0_file_num_compaction_trigger = 4;
5970 options.num_levels = 4;
5971 options.compression = kNoCompression;
5972 options.max_bytes_for_level_base = 450 << 10;
5973 options.target_file_size_base = 98 << 10;
5974 options.max_write_buffer_number = 2;
5975
5976 DestroyAndReopen(options);
5977
5978 Random rnd(301);
5979 for (int num = 0; num < 14; num++) {
5980 GenerateNewRandomFile(&rnd);
5981 }
5982
5983 if (iter == 1) {
f67539c2 5984 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7c673cae
FG
5985 {{"DBImpl::RunManualCompaction()::1",
5986 "DBTest::FlushesInParallelWithCompactRange:1"},
5987 {"DBTest::FlushesInParallelWithCompactRange:2",
5988 "DBImpl::RunManualCompaction()::2"}});
5989 } else {
f67539c2 5990 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7c673cae
FG
5991 {{"CompactionJob::Run():Start",
5992 "DBTest::FlushesInParallelWithCompactRange:1"},
5993 {"DBTest::FlushesInParallelWithCompactRange:2",
5994 "CompactionJob::Run():End"}});
5995 }
f67539c2 5996 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
5997
5998 std::vector<port::Thread> threads;
5999 threads.emplace_back([&]() { Compact("a", "z"); });
6000
6001 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
6002
6003 // this has to start a flush. if flushes are blocked, this will try to
6004 // create
6005 // 3 memtables, and that will fail because max_write_buffer_number is 2
6006 for (int num = 0; num < 3; num++) {
6007 GenerateNewRandomFile(&rnd, /* nowait */ true);
6008 }
6009
6010 TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
6011
6012 for (auto& t : threads) {
6013 t.join();
6014 }
f67539c2 6015 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
6016 }
6017}
6018
6019TEST_F(DBTest, DelayedWriteRate) {
6020 const int kEntriesPerMemTable = 100;
6021 const int kTotalFlushes = 12;
6022
6023 Options options = CurrentOptions();
6024 env_->SetBackgroundThreads(1, Env::LOW);
6025 options.env = env_;
7c673cae
FG
6026 options.write_buffer_size = 100000000;
6027 options.max_write_buffer_number = 256;
6028 options.max_background_compactions = 1;
6029 options.level0_file_num_compaction_trigger = 3;
6030 options.level0_slowdown_writes_trigger = 3;
6031 options.level0_stop_writes_trigger = 999999;
6032 options.delayed_write_rate = 20000000; // Start with 200MB/s
6033 options.memtable_factory.reset(
6034 new SpecialSkipListFactory(kEntriesPerMemTable));
6035
20effc67 6036 SetTimeElapseOnlySleepOnReopen(&options);
7c673cae
FG
6037 CreateAndReopenWithCF({"pikachu"}, options);
6038
6039 // Block compactions
6040 test::SleepingBackgroundTask sleeping_task_low;
6041 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6042 Env::Priority::LOW);
6043
6044 for (int i = 0; i < 3; i++) {
6045 Put(Key(i), std::string(10000, 'x'));
6046 Flush();
6047 }
6048
6049 // These writes will be slowed down to 1KB/s
6050 uint64_t estimated_sleep_time = 0;
6051 Random rnd(301);
6052 Put("", "");
6053 uint64_t cur_rate = options.delayed_write_rate;
6054 for (int i = 0; i < kTotalFlushes; i++) {
6055 uint64_t size_memtable = 0;
6056 for (int j = 0; j < kEntriesPerMemTable; j++) {
6057 auto rand_num = rnd.Uniform(20);
6058 // Spread the size range to more.
6059 size_t entry_size = rand_num * rand_num * rand_num;
6060 WriteOptions wo;
6061 Put(Key(i), std::string(entry_size, 'x'), wo);
6062 size_memtable += entry_size + 18;
6063 // Occasionally sleep a while
6064 if (rnd.Uniform(20) == 6) {
6065 env_->SleepForMicroseconds(2666);
6066 }
6067 }
6068 dbfull()->TEST_WaitForFlushMemTable();
6069 estimated_sleep_time += size_memtable * 1000000u / cur_rate;
6070 // Slow down twice. One for memtable switch and one for flush finishes.
6071 cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) *
6072 kIncSlowdownRatio * kIncSlowdownRatio);
6073 }
6074 // Estimate the total sleep time fall into the rough range.
20effc67
TL
6075 ASSERT_GT(env_->NowMicros(), estimated_sleep_time / 2);
6076 ASSERT_LT(env_->NowMicros(), estimated_sleep_time * 2);
7c673cae 6077
f67539c2 6078 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
6079 sleeping_task_low.WakeUp();
6080 sleeping_task_low.WaitUntilDone();
6081}
6082
6083TEST_F(DBTest, HardLimit) {
6084 Options options = CurrentOptions();
6085 options.env = env_;
6086 env_->SetBackgroundThreads(1, Env::LOW);
6087 options.max_write_buffer_number = 256;
6088 options.write_buffer_size = 110 << 10; // 110KB
6089 options.arena_block_size = 4 * 1024;
6090 options.level0_file_num_compaction_trigger = 4;
6091 options.level0_slowdown_writes_trigger = 999999;
6092 options.level0_stop_writes_trigger = 999999;
6093 options.hard_pending_compaction_bytes_limit = 800 << 10;
6094 options.max_bytes_for_level_base = 10000000000u;
6095 options.max_background_compactions = 1;
6096 options.memtable_factory.reset(
6097 new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
6098
6099 env_->SetBackgroundThreads(1, Env::LOW);
6100 test::SleepingBackgroundTask sleeping_task_low;
6101 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6102 Env::Priority::LOW);
6103
6104 CreateAndReopenWithCF({"pikachu"}, options);
6105
6106 std::atomic<int> callback_count(0);
f67539c2
TL
6107 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6108 "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
6109 callback_count.fetch_add(1);
6110 sleeping_task_low.WakeUp();
6111 });
6112 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
6113
6114 Random rnd(301);
6115 int key_idx = 0;
6116 for (int num = 0; num < 5; num++) {
6117 GenerateNewFile(&rnd, &key_idx, true);
6118 dbfull()->TEST_WaitForFlushMemTable();
6119 }
6120
6121 ASSERT_EQ(0, callback_count.load());
6122
6123 for (int num = 0; num < 5; num++) {
6124 GenerateNewFile(&rnd, &key_idx, true);
6125 dbfull()->TEST_WaitForFlushMemTable();
6126 }
6127 ASSERT_GE(callback_count.load(), 1);
6128
f67539c2 6129 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
6130 sleeping_task_low.WaitUntilDone();
6131}
6132
11fdf7f2
TL
6133#if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
6134class WriteStallListener : public EventListener {
6135 public:
494da23a 6136 WriteStallListener() : condition_(WriteStallCondition::kNormal) {}
11fdf7f2
TL
6137 void OnStallConditionsChanged(const WriteStallInfo& info) override {
6138 MutexLock l(&mutex_);
6139 condition_ = info.condition.cur;
11fdf7f2
TL
6140 }
6141 bool CheckCondition(WriteStallCondition expected) {
6142 MutexLock l(&mutex_);
494da23a 6143 return expected == condition_;
11fdf7f2
TL
6144 }
6145 private:
6146 port::Mutex mutex_;
11fdf7f2 6147 WriteStallCondition condition_;
11fdf7f2
TL
6148};
6149
7c673cae
FG
6150TEST_F(DBTest, SoftLimit) {
6151 Options options = CurrentOptions();
6152 options.env = env_;
6153 options.write_buffer_size = 100000; // Small write buffer
6154 options.max_write_buffer_number = 256;
6155 options.level0_file_num_compaction_trigger = 1;
6156 options.level0_slowdown_writes_trigger = 3;
6157 options.level0_stop_writes_trigger = 999999;
6158 options.delayed_write_rate = 20000; // About 200KB/s limited rate
6159 options.soft_pending_compaction_bytes_limit = 160000;
6160 options.target_file_size_base = 99999999; // All into one file
6161 options.max_bytes_for_level_base = 50000;
6162 options.max_bytes_for_level_multiplier = 10;
6163 options.max_background_compactions = 1;
6164 options.compression = kNoCompression;
11fdf7f2
TL
6165 WriteStallListener* listener = new WriteStallListener();
6166 options.listeners.emplace_back(listener);
7c673cae 6167
494da23a
TL
6168 // FlushMemtable with opt.wait=true does not wait for
6169 // `OnStallConditionsChanged` being called. The event listener is triggered
6170 // on `JobContext::Clean`, which happens after flush result is installed.
6171 // We use sync point to create a custom WaitForFlush that waits for
6172 // context cleanup.
6173 port::Mutex flush_mutex;
6174 port::CondVar flush_cv(&flush_mutex);
6175 bool flush_finished = false;
6176 auto InstallFlushCallback = [&]() {
6177 {
6178 MutexLock l(&flush_mutex);
6179 flush_finished = false;
6180 }
6181 SyncPoint::GetInstance()->SetCallBack(
6182 "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) {
6183 {
6184 MutexLock l(&flush_mutex);
6185 flush_finished = true;
6186 }
6187 flush_cv.SignalAll();
6188 });
6189 };
6190 auto WaitForFlush = [&]() {
6191 {
6192 MutexLock l(&flush_mutex);
6193 while (!flush_finished) {
6194 flush_cv.Wait();
6195 }
6196 }
6197 SyncPoint::GetInstance()->ClearCallBack(
6198 "DBImpl::BackgroundCallFlush:ContextCleanedUp");
6199 };
6200
f67539c2 6201 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
494da23a 6202
7c673cae
FG
6203 Reopen(options);
6204
6205 // Generating 360KB in Level 3
6206 for (int i = 0; i < 72; i++) {
6207 Put(Key(i), std::string(5000, 'x'));
6208 if (i % 10 == 0) {
11fdf7f2 6209 dbfull()->TEST_FlushMemTable(true, true);
7c673cae
FG
6210 }
6211 }
6212 dbfull()->TEST_WaitForCompact();
6213 MoveFilesToLevel(3);
6214
6215 // Generating 360KB in Level 2
6216 for (int i = 0; i < 72; i++) {
6217 Put(Key(i), std::string(5000, 'x'));
6218 if (i % 10 == 0) {
11fdf7f2 6219 dbfull()->TEST_FlushMemTable(true, true);
7c673cae
FG
6220 }
6221 }
6222 dbfull()->TEST_WaitForCompact();
6223 MoveFilesToLevel(2);
6224
6225 Put(Key(0), "");
6226
6227 test::SleepingBackgroundTask sleeping_task_low;
6228 // Block compactions
6229 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6230 Env::Priority::LOW);
6231 sleeping_task_low.WaitUntilSleeping();
6232
6233 // Create 3 L0 files, making score of L0 to be 3.
6234 for (int i = 0; i < 3; i++) {
6235 Put(Key(i), std::string(5000, 'x'));
6236 Put(Key(100 - i), std::string(5000, 'x'));
6237 // Flush the file. File size is around 30KB.
494da23a 6238 InstallFlushCallback();
11fdf7f2 6239 dbfull()->TEST_FlushMemTable(true, true);
494da23a 6240 WaitForFlush();
7c673cae
FG
6241 }
6242 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6243 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
7c673cae
FG
6244
6245 sleeping_task_low.WakeUp();
6246 sleeping_task_low.WaitUntilDone();
6247 sleeping_task_low.Reset();
6248 dbfull()->TEST_WaitForCompact();
6249
6250 // Now there is one L1 file but doesn't trigger soft_rate_limit
6251 // The L1 file size is around 30KB.
6252 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6253 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6254 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
7c673cae
FG
6255
6256 // Only allow one compactin going through.
f67539c2 6257 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
11fdf7f2 6258 "BackgroundCallCompaction:0", [&](void* /*arg*/) {
7c673cae
FG
6259 // Schedule a sleeping task.
6260 sleeping_task_low.Reset();
6261 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6262 &sleeping_task_low, Env::Priority::LOW);
6263 });
6264
7c673cae
FG
6265 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6266 Env::Priority::LOW);
6267 sleeping_task_low.WaitUntilSleeping();
6268 // Create 3 L0 files, making score of L0 to be 3
6269 for (int i = 0; i < 3; i++) {
6270 Put(Key(10 + i), std::string(5000, 'x'));
6271 Put(Key(90 - i), std::string(5000, 'x'));
6272 // Flush the file. File size is around 30KB.
494da23a 6273 InstallFlushCallback();
11fdf7f2 6274 dbfull()->TEST_FlushMemTable(true, true);
494da23a 6275 WaitForFlush();
7c673cae
FG
6276 }
6277
6278 // Wake up sleep task to enable compaction to run and waits
6279 // for it to go to sleep state again to make sure one compaction
6280 // goes through.
6281 sleeping_task_low.WakeUp();
6282 sleeping_task_low.WaitUntilSleeping();
6283
6284 // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
6285 // Given level multiplier 10, estimated pending compaction is around 100KB
6286 // doesn't trigger soft_pending_compaction_bytes_limit
6287 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6288 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6289 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
7c673cae
FG
6290
6291 // Create 3 L0 files, making score of L0 to be 3, higher than L0.
6292 for (int i = 0; i < 3; i++) {
6293 Put(Key(20 + i), std::string(5000, 'x'));
6294 Put(Key(80 - i), std::string(5000, 'x'));
6295 // Flush the file. File size is around 30KB.
494da23a 6296 InstallFlushCallback();
11fdf7f2 6297 dbfull()->TEST_FlushMemTable(true, true);
494da23a 6298 WaitForFlush();
7c673cae
FG
6299 }
6300 // Wake up sleep task to enable compaction to run and waits
6301 // for it to go to sleep state again to make sure one compaction
6302 // goes through.
6303 sleeping_task_low.WakeUp();
6304 sleeping_task_low.WaitUntilSleeping();
6305
6306 // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
6307 // L2 size is 360KB, so the estimated level fanout 4, estimated pending
6308 // compaction is around 200KB
6309 // triggerring soft_pending_compaction_bytes_limit
6310 ASSERT_EQ(NumTableFilesAtLevel(1), 1);
6311 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6312 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
7c673cae
FG
6313
6314 sleeping_task_low.WakeUp();
6315 sleeping_task_low.WaitUntilSleeping();
6316
6317 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6318 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
7c673cae
FG
6319
6320 // shrink level base so L2 will hit soft limit easier.
6321 ASSERT_OK(dbfull()->SetOptions({
6322 {"max_bytes_for_level_base", "5000"},
6323 }));
6324
6325 Put("", "");
6326 Flush();
6327 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
11fdf7f2 6328 ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
7c673cae
FG
6329
6330 sleeping_task_low.WaitUntilSleeping();
f67539c2 6331 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
6332 sleeping_task_low.WakeUp();
6333 sleeping_task_low.WaitUntilDone();
6334}
6335
6336TEST_F(DBTest, LastWriteBufferDelay) {
6337 Options options = CurrentOptions();
6338 options.env = env_;
6339 options.write_buffer_size = 100000;
6340 options.max_write_buffer_number = 4;
6341 options.delayed_write_rate = 20000;
6342 options.compression = kNoCompression;
6343 options.disable_auto_compactions = true;
6344 int kNumKeysPerMemtable = 3;
6345 options.memtable_factory.reset(
6346 new SpecialSkipListFactory(kNumKeysPerMemtable));
6347
6348 Reopen(options);
6349 test::SleepingBackgroundTask sleeping_task;
6350 // Block flushes
6351 env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
6352 Env::Priority::HIGH);
6353 sleeping_task.WaitUntilSleeping();
6354
6355 // Create 3 L0 files, making score of L0 to be 3.
6356 for (int i = 0; i < 3; i++) {
6357 // Fill one mem table
6358 for (int j = 0; j < kNumKeysPerMemtable; j++) {
6359 Put(Key(j), "");
6360 }
6361 ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
6362 }
6363 // Inserting a new entry would create a new mem table, triggering slow down.
6364 Put(Key(0), "");
6365 ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
6366
6367 sleeping_task.WakeUp();
6368 sleeping_task.WaitUntilDone();
6369}
11fdf7f2 6370#endif // !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
7c673cae
FG
6371
6372TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
6373 CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
6374 kLZ4Compression, kLZ4HCCompression,
6375 kXpressCompression};
6376 for (auto comp : compressions) {
6377 if (!CompressionTypeSupported(comp)) {
6378 // not supported, we should fail the Open()
6379 Options options = CurrentOptions();
6380 options.compression = comp;
6381 ASSERT_TRUE(!TryReopen(options).ok());
6382 // Try if CreateColumnFamily also fails
6383 options.compression = kNoCompression;
6384 ASSERT_OK(TryReopen(options));
6385 ColumnFamilyOptions cf_options(options);
6386 cf_options.compression = comp;
6387 ColumnFamilyHandle* handle;
6388 ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
6389 }
6390 }
6391}
6392
f67539c2
TL
6393TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) {
6394 Options options = CurrentOptions();
6395 options.max_open_files = 100;
6396 Reopen(options);
6397
6398 ColumnFamilyOptions cf_options(options);
6399 // ttl is now supported when max_open_files is -1.
6400 cf_options.ttl = 3600;
6401 ColumnFamilyHandle* handle;
6402 ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle));
6403 delete handle;
6404}
6405
7c673cae
FG
6406#ifndef ROCKSDB_LITE
6407TEST_F(DBTest, RowCache) {
6408 Options options = CurrentOptions();
f67539c2 6409 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
6410 options.row_cache = NewLRUCache(8192);
6411 DestroyAndReopen(options);
6412
6413 ASSERT_OK(Put("foo", "bar"));
6414 ASSERT_OK(Flush());
6415
6416 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
6417 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
6418 ASSERT_EQ(Get("foo"), "bar");
6419 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
6420 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
6421 ASSERT_EQ(Get("foo"), "bar");
6422 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
6423 ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
6424}
11fdf7f2
TL
6425
6426TEST_F(DBTest, PinnableSliceAndRowCache) {
6427 Options options = CurrentOptions();
f67539c2 6428 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
11fdf7f2
TL
6429 options.row_cache = NewLRUCache(8192);
6430 DestroyAndReopen(options);
6431
6432 ASSERT_OK(Put("foo", "bar"));
6433 ASSERT_OK(Flush());
6434
6435 ASSERT_EQ(Get("foo"), "bar");
6436 ASSERT_EQ(
6437 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6438 1);
6439
6440 {
6441 PinnableSlice pin_slice;
6442 ASSERT_EQ(Get("foo", &pin_slice), Status::OK());
6443 ASSERT_EQ(pin_slice.ToString(), "bar");
6444 // Entry is already in cache, lookup will remove the element from lru
6445 ASSERT_EQ(
6446 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6447 0);
6448 }
6449 // After PinnableSlice destruction element is added back in LRU
6450 ASSERT_EQ(
6451 reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
6452 1);
6453}
6454
7c673cae
FG
6455#endif // ROCKSDB_LITE
6456
6457TEST_F(DBTest, DeletingOldWalAfterDrop) {
f67539c2 6458 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
7c673cae
FG
6459 {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"},
6460 {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}});
f67539c2 6461 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
7c673cae 6462
f67539c2 6463 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
7c673cae
FG
6464 Options options = CurrentOptions();
6465 options.max_total_wal_size = 8192;
6466 options.compression = kNoCompression;
6467 options.write_buffer_size = 1 << 20;
6468 options.level0_file_num_compaction_trigger = (1 << 30);
6469 options.level0_slowdown_writes_trigger = (1 << 30);
6470 options.level0_stop_writes_trigger = (1 << 30);
6471 options.disable_auto_compactions = true;
6472 DestroyAndReopen(options);
f67539c2 6473 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
7c673cae
FG
6474
6475 CreateColumnFamilies({"cf1", "cf2"}, options);
6476 ASSERT_OK(Put(0, "key1", DummyString(8192)));
6477 ASSERT_OK(Put(0, "key2", DummyString(8192)));
6478 // the oldest wal should now be getting_flushed
6479 ASSERT_OK(db_->DropColumnFamily(handles_[0]));
6480 // all flushes should now do nothing because their CF is dropped
6481 TEST_SYNC_POINT("Test:AllowFlushes");
6482 TEST_SYNC_POINT("Test:WaitForFlush");
6483 uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
6484 ASSERT_OK(Put(1, "key3", DummyString(8192)));
6485 ASSERT_OK(Put(1, "key4", DummyString(8192)));
6486 // new wal should have been created
6487 uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
6488 EXPECT_GT(lognum2, lognum1);
6489}
6490
6491TEST_F(DBTest, UnsupportedManualSync) {
6492 DestroyAndReopen(CurrentOptions());
6493 env_->is_wal_sync_thread_safe_.store(false);
6494 Status s = db_->SyncWAL();
6495 ASSERT_TRUE(s.IsNotSupported());
6496}
6497
6498INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
6499 ::testing::Combine(::testing::Values(1, 4),
6500 ::testing::Bool()));
6501
6502TEST_F(DBTest, PauseBackgroundWorkTest) {
6503 Options options = CurrentOptions();
6504 options.write_buffer_size = 100000; // Small write buffer
6505 Reopen(options);
6506
6507 std::vector<port::Thread> threads;
6508 std::atomic<bool> done(false);
6509 db_->PauseBackgroundWork();
6510 threads.emplace_back([&]() {
6511 Random rnd(301);
6512 for (int i = 0; i < 10000; ++i) {
20effc67 6513 Put(rnd.RandomString(10), rnd.RandomString(10));
7c673cae
FG
6514 }
6515 done.store(true);
6516 });
6517 env_->SleepForMicroseconds(200000);
6518 // make sure the thread is not done
6519 ASSERT_FALSE(done.load());
6520 db_->ContinueBackgroundWork();
6521 for (auto& t : threads) {
6522 t.join();
6523 }
6524 // now it's done
6525 ASSERT_TRUE(done.load());
6526}
6527
11fdf7f2
TL
6528// Keep spawning short-living threads that create an iterator and quit.
6529// Meanwhile in another thread keep flushing memtables.
6530// This used to cause a deadlock.
6531TEST_F(DBTest, ThreadLocalPtrDeadlock) {
6532 std::atomic<int> flushes_done{0};
6533 std::atomic<int> threads_destroyed{0};
6534 auto done = [&] {
6535 return flushes_done.load() > 10;
6536 };
6537
6538 port::Thread flushing_thread([&] {
6539 for (int i = 0; !done(); ++i) {
6540 ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"),
6541 Slice(std::to_string(i).c_str())));
6542 ASSERT_OK(db_->Flush(FlushOptions()));
6543 int cnt = ++flushes_done;
6544 fprintf(stderr, "Flushed %d times\n", cnt);
6545 }
6546 });
6547
6548 std::vector<port::Thread> thread_spawning_threads(10);
6549 for (auto& t: thread_spawning_threads) {
6550 t = port::Thread([&] {
6551 while (!done()) {
6552 {
6553 port::Thread tmp_thread([&] {
6554 auto it = db_->NewIterator(ReadOptions());
6555 delete it;
6556 });
6557 tmp_thread.join();
6558 }
6559 ++threads_destroyed;
6560 }
6561 });
6562 }
6563
6564 for (auto& t: thread_spawning_threads) {
6565 t.join();
6566 }
6567 flushing_thread.join();
6568 fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n",
6569 flushes_done.load(), threads_destroyed.load());
6570}
f67539c2
TL
6571
6572TEST_F(DBTest, LargeBlockSizeTest) {
6573 Options options = CurrentOptions();
6574 CreateAndReopenWithCF({"pikachu"}, options);
6575 ASSERT_OK(Put(0, "foo", "bar"));
6576 BlockBasedTableOptions table_options;
6577 table_options.block_size = 8LL * 1024 * 1024 * 1024LL;
6578 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6579 ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
6580}
6581
6582#ifndef ROCKSDB_LITE
6583
6584TEST_F(DBTest, CreationTimeOfOldestFile) {
6585 const int kNumKeysPerFile = 32;
6586 const int kNumLevelFiles = 2;
6587 const int kValueSize = 100;
6588
6589 Options options = CurrentOptions();
6590 options.max_open_files = -1;
20effc67 6591 env_->SetMockSleep();
f67539c2
TL
6592 options.env = env_;
6593
20effc67
TL
6594 // NOTE: Presumed unnecessary and removed: resetting mock time in env
6595
f67539c2
TL
6596 DestroyAndReopen(options);
6597
6598 bool set_file_creation_time_to_zero = true;
6599 int idx = 0;
6600
6601 int64_t time_1 = 0;
6602 env_->GetCurrentTime(&time_1);
6603 const uint64_t uint_time_1 = static_cast<uint64_t>(time_1);
6604
6605 // Add 50 hours
20effc67 6606 env_->MockSleepForSeconds(50 * 60 * 60);
f67539c2
TL
6607
6608 int64_t time_2 = 0;
6609 env_->GetCurrentTime(&time_2);
6610 const uint64_t uint_time_2 = static_cast<uint64_t>(time_2);
6611
6612 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6613 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
6614 TableProperties* props = reinterpret_cast<TableProperties*>(arg);
6615 if (set_file_creation_time_to_zero) {
6616 if (idx == 0) {
6617 props->file_creation_time = 0;
6618 idx++;
6619 } else if (idx == 1) {
6620 props->file_creation_time = uint_time_1;
6621 idx = 0;
6622 }
6623 } else {
6624 if (idx == 0) {
6625 props->file_creation_time = uint_time_1;
6626 idx++;
6627 } else if (idx == 1) {
6628 props->file_creation_time = uint_time_2;
6629 }
6630 }
6631 });
6632 // Set file creation time in manifest all to 0.
6633 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6634 "FileMetaData::FileMetaData", [&](void* arg) {
6635 FileMetaData* meta = static_cast<FileMetaData*>(arg);
6636 meta->file_creation_time = 0;
6637 });
6638 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6639
6640 Random rnd(301);
6641 for (int i = 0; i < kNumLevelFiles; ++i) {
6642 for (int j = 0; j < kNumKeysPerFile; ++j) {
6643 ASSERT_OK(
20effc67 6644 Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
f67539c2
TL
6645 }
6646 Flush();
6647 }
6648
6649 // At this point there should be 2 files, one with file_creation_time = 0 and
6650 // the other non-zero. GetCreationTimeOfOldestFile API should return 0.
6651 uint64_t creation_time;
6652 Status s1 = dbfull()->GetCreationTimeOfOldestFile(&creation_time);
6653 ASSERT_EQ(0, creation_time);
6654 ASSERT_EQ(s1, Status::OK());
6655
6656 // Testing with non-zero file creation time.
6657 set_file_creation_time_to_zero = false;
6658 options = CurrentOptions();
6659 options.max_open_files = -1;
f67539c2
TL
6660 options.env = env_;
6661
20effc67
TL
6662 // NOTE: Presumed unnecessary and removed: resetting mock time in env
6663
f67539c2
TL
6664 DestroyAndReopen(options);
6665
6666 for (int i = 0; i < kNumLevelFiles; ++i) {
6667 for (int j = 0; j < kNumKeysPerFile; ++j) {
6668 ASSERT_OK(
20effc67 6669 Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
f67539c2
TL
6670 }
6671 Flush();
6672 }
6673
6674 // At this point there should be 2 files with non-zero file creation time.
6675 // GetCreationTimeOfOldestFile API should return non-zero value.
6676 uint64_t ctime;
6677 Status s2 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
6678 ASSERT_EQ(uint_time_1, ctime);
6679 ASSERT_EQ(s2, Status::OK());
6680
6681 // Testing with max_open_files != -1
6682 options = CurrentOptions();
6683 options.max_open_files = 10;
6684 DestroyAndReopen(options);
6685 Status s3 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
6686 ASSERT_EQ(s3, Status::NotSupported());
6687
6688 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6689}
6690
20effc67
TL
6691TEST_F(DBTest, MemoryUsageWithMaxWriteBufferSizeToMaintain) {
6692 Options options = CurrentOptions();
6693 options.max_write_buffer_size_to_maintain = 10000;
6694 options.write_buffer_size = 160000;
6695 Reopen(options);
6696 Random rnd(301);
6697 bool memory_limit_exceeded = false;
6698 uint64_t size_all_mem_table = 0;
6699 uint64_t cur_active_mem = 0;
6700 for (int i = 0; i < 1000; i++) {
6701 std::string value = rnd.RandomString(1000);
6702 ASSERT_OK(Put("keykey_" + std::to_string(i), value));
6703
6704 dbfull()->TEST_WaitForFlushMemTable();
6705
6706 ASSERT_TRUE(db_->GetIntProperty(db_->DefaultColumnFamily(),
6707 DB::Properties::kSizeAllMemTables,
6708 &size_all_mem_table));
6709 ASSERT_TRUE(db_->GetIntProperty(db_->DefaultColumnFamily(),
6710 DB::Properties::kCurSizeActiveMemTable,
6711 &cur_active_mem));
6712
6713 // Errors out if memory usage keeps on increasing beyond the limit.
6714 // Once memory limit exceeds, memory_limit_exceeded is set and if
6715 // size_all_mem_table doesn't drop out in the next write then it errors out
6716 // (not expected behaviour). If memory usage drops then
6717 // memory_limit_exceeded is set to false.
6718 if ((size_all_mem_table > cur_active_mem) &&
6719 (cur_active_mem >=
6720 static_cast<uint64_t>(options.max_write_buffer_size_to_maintain)) &&
6721 (size_all_mem_table > options.max_write_buffer_size_to_maintain +
6722 options.write_buffer_size)) {
6723 ASSERT_FALSE(memory_limit_exceeded);
6724 memory_limit_exceeded = true;
6725 } else {
6726 memory_limit_exceeded = false;
6727 }
6728 }
6729}
6730
f67539c2
TL
6731#endif
6732
6733} // namespace ROCKSDB_NAMESPACE
6734
6735#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
6736extern "C" {
6737void RegisterCustomObjects(int argc, char** argv);
6738}
6739#else
6740void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
6741#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
7c673cae
FG
6742
6743int main(int argc, char** argv) {
f67539c2 6744 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae 6745 ::testing::InitGoogleTest(&argc, argv);
f67539c2 6746 RegisterCustomObjects(argc, argv);
7c673cae
FG
6747 return RUN_ALL_TESTS();
6748}