ceph/src/rocksdb/db/write_callback_test.cc

   1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5
   6 #ifndef ROCKSDB_LITE
   7
   8 #include <atomic>
   9 #include <functional>
  10 #include <string>
  11 #include <utility>
  12 #include <vector>
  13
  14 #include "db/db_impl.h"
  15 #include "db/write_callback.h"
  16 #include "rocksdb/db.h"
  17 #include "rocksdb/write_batch.h"
  18 #include "port/port.h"
  19 #include "util/random.h"
  20 #include "util/sync_point.h"
  21 #include "util/testharness.h"
  22
  23 using std::string;
  24
  25 namespace rocksdb {
  26
  27 class WriteCallbackTest : public testing::Test {
  28  public:
  29   string dbname;
  30
  31   WriteCallbackTest() {
  32     dbname = test::PerThreadDBPath("write_callback_testdb");
  33   }
  34 };
  35
  36 class WriteCallbackTestWriteCallback1 : public WriteCallback {
  37  public:
  38   bool was_called = false;
  39
  40   Status Callback(DB *db) override {
  41     was_called = true;
  42
  43     // Make sure db is a DBImpl
  44     DBImpl* db_impl = dynamic_cast<DBImpl*> (db);
  45     if (db_impl == nullptr) {
  46       return Status::InvalidArgument("");
  47     }
  48
  49     return Status::OK();
  50   }
  51
  52   bool AllowWriteBatching() override { return true; }
  53 };
  54
  55 class WriteCallbackTestWriteCallback2 : public WriteCallback {
  56  public:
  57   Status Callback(DB* /*db*/) override { return Status::Busy(); }
  58   bool AllowWriteBatching() override { return true; }
  59 };
  60
  61 class MockWriteCallback : public WriteCallback {
  62  public:
  63   bool should_fail_ = false;
  64   bool allow_batching_ = false;
  65   std::atomic<bool> was_called_{false};
  66
  67   MockWriteCallback() {}
  68
  69   MockWriteCallback(const MockWriteCallback& other) {
  70     should_fail_ = other.should_fail_;
  71     allow_batching_ = other.allow_batching_;
  72     was_called_.store(other.was_called_.load());
  73   }
  74
  75   Status Callback(DB* /*db*/) override {
  76     was_called_.store(true);
  77     if (should_fail_) {
  78       return Status::Busy();
  79     } else {
  80       return Status::OK();
  81     }
  82   }
  83
  84   bool AllowWriteBatching() override { return allow_batching_; }
  85 };
  86
  87 TEST_F(WriteCallbackTest, WriteWithCallbackTest) {
  88   struct WriteOP {
  89     WriteOP(bool should_fail = false) { callback_.should_fail_ = should_fail; }
  90
  91     void Put(const string& key, const string& val) {
  92       kvs_.push_back(std::make_pair(key, val));
  93       write_batch_.Put(key, val);
  94     }
  95
  96     void Clear() {
  97       kvs_.clear();
  98       write_batch_.Clear();
  99       callback_.was_called_.store(false);
 100     }
 101
 102     MockWriteCallback callback_;
 103     WriteBatch write_batch_;
 104     std::vector<std::pair<string, string>> kvs_;
 105   };
 106
 107   // In each scenario we'll launch multiple threads to write.
 108   // The size of each array equals to number of threads, and
 109   // each boolean in it denote whether callback of corresponding
 110   // thread should succeed or fail.
 111   std::vector<std::vector<WriteOP>> write_scenarios = {
 112       {true},
 113       {false},
 114       {false, false},
 115       {true, true},
 116       {true, false},
 117       {false, true},
 118       {false, false, false},
 119       {true, true, true},
 120       {false, true, false},
 121       {true, false, true},
 122       {true, false, false, false, false},
 123       {false, false, false, false, true},
 124       {false, false, true, false, true},
 125   };
 126
 127   for (auto& seq_per_batch : {true, false}) {
 128   for (auto& two_queues : {true, false}) {
 129     for (auto& allow_parallel : {true, false}) {
 130       for (auto& allow_batching : {true, false}) {
 131         for (auto& enable_WAL : {true, false}) {
 132           for (auto& enable_pipelined_write : {true, false}) {
 133             for (auto& write_group : write_scenarios) {
 134               Options options;
 135               options.create_if_missing = true;
 136               options.allow_concurrent_memtable_write = allow_parallel;
 137               options.enable_pipelined_write = enable_pipelined_write;
 138               options.two_write_queues = two_queues;
 139               if (options.enable_pipelined_write && seq_per_batch) {
 140                 // This combination is not supported
 141                 continue;
 142               }
 143               if (options.enable_pipelined_write && options.two_write_queues) {
 144                 // This combination is not supported
 145                 continue;
 146               }
 147
 148               ReadOptions read_options;
 149               DB* db;
 150               DBImpl* db_impl;
 151
 152               DestroyDB(dbname, options);
 153
 154               DBOptions db_options(options);
 155               ColumnFamilyOptions cf_options(options);
 156               std::vector<ColumnFamilyDescriptor> column_families;
 157               column_families.push_back(
 158                   ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
 159               std::vector<ColumnFamilyHandle*> handles;
 160               auto open_s =
 161                   DBImpl::Open(db_options, dbname, column_families, &handles,
 162                                &db, seq_per_batch, true /* batch_per_txn */);
 163               ASSERT_OK(open_s);
 164               assert(handles.size() == 1);
 165               delete handles[0];
 166
 167               db_impl = dynamic_cast<DBImpl*>(db);
 168               ASSERT_TRUE(db_impl);
 169
 170               // Writers that have called JoinBatchGroup.
 171               std::atomic<uint64_t> threads_joining(0);
 172               // Writers that have linked to the queue
 173               std::atomic<uint64_t> threads_linked(0);
 174               // Writers that pass WriteThread::JoinBatchGroup:Wait sync-point.
 175               std::atomic<uint64_t> threads_verified(0);
 176
 177               std::atomic<uint64_t> seq(db_impl->GetLatestSequenceNumber());
 178               ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0);
 179
 180               rocksdb::SyncPoint::GetInstance()->SetCallBack(
 181                   "WriteThread::JoinBatchGroup:Start", [&](void*) {
 182                     uint64_t cur_threads_joining = threads_joining.fetch_add(1);
 183                     // Wait for the last joined writer to link to the queue.
 184                     // In this way the writers link to the queue one by one.
 185                     // This allows us to confidently detect the first writer
 186                     // who increases threads_linked as the leader.
 187                     while (threads_linked.load() < cur_threads_joining) {
 188                     }
 189                   });
 190
 191               // Verification once writers call JoinBatchGroup.
 192               rocksdb::SyncPoint::GetInstance()->SetCallBack(
 193                   "WriteThread::JoinBatchGroup:Wait", [&](void* arg) {
 194                     uint64_t cur_threads_linked = threads_linked.fetch_add(1);
 195                     bool is_leader = false;
 196                     bool is_last = false;
 197
 198                     // who am i
 199                     is_leader = (cur_threads_linked == 0);
 200                     is_last = (cur_threads_linked == write_group.size() - 1);
 201
 202                     // check my state
 203                     auto* writer = reinterpret_cast<WriteThread::Writer*>(arg);
 204
 205                     if (is_leader) {
 206                       ASSERT_TRUE(writer->state ==
 207                                   WriteThread::State::STATE_GROUP_LEADER);
 208                     } else {
 209                       ASSERT_TRUE(writer->state ==
 210                                   WriteThread::State::STATE_INIT);
 211                     }
 212
 213                     // (meta test) the first WriteOP should indeed be the first
 214                     // and the last should be the last (all others can be out of
 215                     // order)
 216                     if (is_leader) {
 217                       ASSERT_TRUE(writer->callback->Callback(nullptr).ok() ==
 218                                   !write_group.front().callback_.should_fail_);
 219                     } else if (is_last) {
 220                       ASSERT_TRUE(writer->callback->Callback(nullptr).ok() ==
 221                                   !write_group.back().callback_.should_fail_);
 222                     }
 223
 224                     threads_verified.fetch_add(1);
 225                     // Wait here until all verification in this sync-point
 226                     // callback finish for all writers.
 227                     while (threads_verified.load() < write_group.size()) {
 228                     }
 229                   });
 230
 231               rocksdb::SyncPoint::GetInstance()->SetCallBack(
 232                   "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) {
 233                     // check my state
 234                     auto* writer = reinterpret_cast<WriteThread::Writer*>(arg);
 235
 236                     if (!allow_batching) {
 237                       // no batching so everyone should be a leader
 238                       ASSERT_TRUE(writer->state ==
 239                                   WriteThread::State::STATE_GROUP_LEADER);
 240                     } else if (!allow_parallel) {
 241                       ASSERT_TRUE(writer->state ==
 242                                       WriteThread::State::STATE_COMPLETED ||
 243                                   (enable_pipelined_write &&
 244                                    writer->state ==
 245                                        WriteThread::State::
 246                                            STATE_MEMTABLE_WRITER_LEADER));
 247                     }
 248                   });
 249
 250               std::atomic<uint32_t> thread_num(0);
 251               std::atomic<char> dummy_key(0);
 252
 253               // Each write thread create a random write batch and write to DB
 254               // with a write callback.
 255               std::function<void()> write_with_callback_func = [&]() {
 256                 uint32_t i = thread_num.fetch_add(1);
 257                 Random rnd(i);
 258
 259                 // leaders gotta lead
 260                 while (i > 0 && threads_verified.load() < 1) {
 261                 }
 262
 263                 // loser has to lose
 264                 while (i == write_group.size() - 1 &&
 265                        threads_verified.load() < write_group.size() - 1) {
 266                 }
 267
 268                 auto& write_op = write_group.at(i);
 269                 write_op.Clear();
 270                 write_op.callback_.allow_batching_ = allow_batching;
 271
 272                 // insert some keys
 273                 for (uint32_t j = 0; j < rnd.Next() % 50; j++) {
 274                   // grab unique key
 275                   char my_key = dummy_key.fetch_add(1);
 276
 277                   string skey(5, my_key);
 278                   string sval(10, my_key);
 279                   write_op.Put(skey, sval);
 280
 281                   if (!write_op.callback_.should_fail_ && !seq_per_batch) {
 282                     seq.fetch_add(1);
 283                   }
 284                 }
 285                 if (!write_op.callback_.should_fail_ && seq_per_batch) {
 286                   seq.fetch_add(1);
 287                 }
 288
 289                 WriteOptions woptions;
 290                 woptions.disableWAL = !enable_WAL;
 291                 woptions.sync = enable_WAL;
 292                 Status s;
 293                 if (seq_per_batch) {
 294                   class PublishSeqCallback : public PreReleaseCallback {
 295                    public:
 296                     PublishSeqCallback(DBImpl* db_impl_in)
 297                         : db_impl_(db_impl_in) {}
 298                     virtual Status Callback(SequenceNumber last_seq,
 299                                             bool /*not used*/) override {
 300                       db_impl_->SetLastPublishedSequence(last_seq);
 301                       return Status::OK();
 302                     }
 303                     DBImpl* db_impl_;
 304                   } publish_seq_callback(db_impl);
 305                   // seq_per_batch requires a natural batch separator or Noop
 306                   WriteBatchInternal::InsertNoop(&write_op.write_batch_);
 307                   const size_t ONE_BATCH = 1;
 308                   s = db_impl->WriteImpl(
 309                       woptions, &write_op.write_batch_, &write_op.callback_,
 310                       nullptr, 0, false, nullptr, ONE_BATCH,
 311                       two_queues ? &publish_seq_callback : nullptr);
 312                 } else {
 313                   s = db_impl->WriteWithCallback(
 314                       woptions, &write_op.write_batch_, &write_op.callback_);
 315                 }
 316
 317                 if (write_op.callback_.should_fail_) {
 318                   ASSERT_TRUE(s.IsBusy());
 319                 } else {
 320                   ASSERT_OK(s);
 321                 }
 322               };
 323
 324               rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 325
 326               // do all the writes
 327               std::vector<port::Thread> threads;
 328               for (uint32_t i = 0; i < write_group.size(); i++) {
 329                 threads.emplace_back(write_with_callback_func);
 330               }
 331               for (auto& t : threads) {
 332                 t.join();
 333               }
 334
 335               rocksdb::SyncPoint::GetInstance()->DisableProcessing();
 336
 337               // check for keys
 338               string value;
 339               for (auto& w : write_group) {
 340                 ASSERT_TRUE(w.callback_.was_called_.load());
 341                 for (auto& kvp : w.kvs_) {
 342                   if (w.callback_.should_fail_) {
 343                     ASSERT_TRUE(
 344                         db->Get(read_options, kvp.first, &value).IsNotFound());
 345                   } else {
 346                     ASSERT_OK(db->Get(read_options, kvp.first, &value));
 347                     ASSERT_EQ(value, kvp.second);
 348                   }
 349                 }
 350               }
 351
 352               ASSERT_EQ(seq.load(), db_impl->TEST_GetLastVisibleSequence());
 353
 354               delete db;
 355               DestroyDB(dbname, options);
 356             }
 357           }
 358         }
 359       }
 360     }
 361 }
 362 }
 363 }
 364
 365 TEST_F(WriteCallbackTest, WriteCallBackTest) {
 366   Options options;
 367   WriteOptions write_options;
 368   ReadOptions read_options;
 369   string value;
 370   DB* db;
 371   DBImpl* db_impl;
 372
 373   DestroyDB(dbname, options);
 374
 375   options.create_if_missing = true;
 376   Status s = DB::Open(options, dbname, &db);
 377   ASSERT_OK(s);
 378
 379   db_impl = dynamic_cast<DBImpl*> (db);
 380   ASSERT_TRUE(db_impl);
 381
 382   WriteBatch wb;
 383
 384   wb.Put("a", "value.a");
 385   wb.Delete("x");
 386
 387   // Test a simple Write
 388   s = db->Write(write_options, &wb);
 389   ASSERT_OK(s);
 390
 391   s = db->Get(read_options, "a", &value);
 392   ASSERT_OK(s);
 393   ASSERT_EQ("value.a", value);
 394
 395   // Test WriteWithCallback
 396   WriteCallbackTestWriteCallback1 callback1;
 397   WriteBatch wb2;
 398
 399   wb2.Put("a", "value.a2");
 400
 401   s = db_impl->WriteWithCallback(write_options, &wb2, &callback1);
 402   ASSERT_OK(s);
 403   ASSERT_TRUE(callback1.was_called);
 404
 405   s = db->Get(read_options, "a", &value);
 406   ASSERT_OK(s);
 407   ASSERT_EQ("value.a2", value);
 408
 409   // Test WriteWithCallback for a callback that fails
 410   WriteCallbackTestWriteCallback2 callback2;
 411   WriteBatch wb3;
 412
 413   wb3.Put("a", "value.a3");
 414
 415   s = db_impl->WriteWithCallback(write_options, &wb3, &callback2);
 416   ASSERT_NOK(s);
 417
 418   s = db->Get(read_options, "a", &value);
 419   ASSERT_OK(s);
 420   ASSERT_EQ("value.a2", value);
 421
 422   delete db;
 423   DestroyDB(dbname, options);
 424 }
 425
 426 }  // namespace rocksdb
 427
 428 int main(int argc, char** argv) {
 429   ::testing::InitGoogleTest(&argc, argv);
 430   return RUN_ALL_TESTS();
 431 }
 432
 433 #else
 434 #include <stdio.h>
 435
 436 int main(int /*argc*/, char** /*argv*/) {
 437   fprintf(stderr,
 438           "SKIPPED as WriteWithCallback is not supported in ROCKSDB_LITE\n");
 439   return 0;
 440 }
 441
 442 #endif  // !ROCKSDB_LITE