]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/utilities/transactions/transaction_test.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / utilities / transactions / transaction_test.h
CommitLineData
11fdf7f2
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#pragma once
7
11fdf7f2 8#include <algorithm>
f67539c2 9#include <cinttypes>
11fdf7f2
TL
10#include <functional>
11#include <string>
12#include <thread>
13
f67539c2 14#include "db/db_impl/db_impl.h"
20effc67
TL
15#include "db/db_test_util.h"
16#include "port/port.h"
11fdf7f2
TL
17#include "rocksdb/db.h"
18#include "rocksdb/options.h"
19#include "rocksdb/utilities/transaction.h"
20#include "rocksdb/utilities/transaction_db.h"
21#include "table/mock_table.h"
f67539c2
TL
22#include "test_util/sync_point.h"
23#include "test_util/testharness.h"
24#include "test_util/testutil.h"
25#include "test_util/transaction_test_util.h"
11fdf7f2
TL
26#include "util/random.h"
27#include "util/string_util.h"
20effc67 28#include "utilities/fault_injection_env.h"
11fdf7f2
TL
29#include "utilities/merge_operators.h"
30#include "utilities/merge_operators/string_append/stringappend.h"
31#include "utilities/transactions/pessimistic_transaction_db.h"
f67539c2 32#include "utilities/transactions/write_unprepared_txn_db.h"
11fdf7f2 33
f67539c2 34namespace ROCKSDB_NAMESPACE {
11fdf7f2
TL
35
36// Return true if the ith bit is set in combination represented by comb
37bool IsInCombination(size_t i, size_t comb) { return comb & (size_t(1) << i); }
38
f67539c2
TL
39enum WriteOrdering : bool { kOrderedWrite, kUnorderedWrite };
40
11fdf7f2
TL
41class TransactionTestBase : public ::testing::Test {
42 public:
43 TransactionDB* db;
20effc67 44 SpecialEnv special_env;
11fdf7f2
TL
45 FaultInjectionTestEnv* env;
46 std::string dbname;
47 Options options;
48
49 TransactionDBOptions txn_db_options;
50 bool use_stackable_db_;
51
52 TransactionTestBase(bool use_stackable_db, bool two_write_queue,
f67539c2
TL
53 TxnDBWritePolicy write_policy,
54 WriteOrdering write_ordering)
20effc67
TL
55 : db(nullptr),
56 special_env(Env::Default()),
57 env(nullptr),
58 use_stackable_db_(use_stackable_db) {
11fdf7f2
TL
59 options.create_if_missing = true;
60 options.max_write_buffer_number = 2;
61 options.write_buffer_size = 4 * 1024;
f67539c2 62 options.unordered_write = write_ordering == kUnorderedWrite;
11fdf7f2
TL
63 options.level0_file_num_compaction_trigger = 2;
64 options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
20effc67
TL
65 special_env.skip_fsync_ = true;
66 env = new FaultInjectionTestEnv(&special_env);
11fdf7f2
TL
67 options.env = env;
68 options.two_write_queues = two_write_queue;
69 dbname = test::PerThreadDBPath("transaction_testdb");
70
71 DestroyDB(dbname, options);
72 txn_db_options.transaction_lock_timeout = 0;
73 txn_db_options.default_lock_timeout = 0;
74 txn_db_options.write_policy = write_policy;
75 txn_db_options.rollback_merge_operands = true;
f67539c2
TL
76 // This will stress write unprepared, by forcing write batch flush on every
77 // write.
78 txn_db_options.default_write_batch_flush_threshold = 1;
79 // Write unprepared requires all transactions to be named. This setting
80 // autogenerates the name so that existing tests can pass.
81 txn_db_options.autogenerate_name = true;
11fdf7f2
TL
82 Status s;
83 if (use_stackable_db == false) {
84 s = TransactionDB::Open(options, txn_db_options, dbname, &db);
85 } else {
86 s = OpenWithStackableDB();
87 }
88 assert(s.ok());
89 }
90
91 ~TransactionTestBase() {
92 delete db;
93 db = nullptr;
94 // This is to skip the assert statement in FaultInjectionTestEnv. There
95 // seems to be a bug in btrfs that the makes readdir return recently
96 // unlink-ed files. By using the default fs we simply ignore errors resulted
97 // from attempting to delete such files in DestroyDB.
98 options.env = Env::Default();
99 DestroyDB(dbname, options);
100 delete env;
101 }
102
103 Status ReOpenNoDelete() {
104 delete db;
105 db = nullptr;
106 env->AssertNoOpenFile();
107 env->DropUnsyncedFileData();
108 env->ResetState();
109 Status s;
110 if (use_stackable_db_ == false) {
111 s = TransactionDB::Open(options, txn_db_options, dbname, &db);
112 } else {
113 s = OpenWithStackableDB();
114 }
115 assert(!s.ok() || db != nullptr);
116 return s;
117 }
118
119 Status ReOpenNoDelete(std::vector<ColumnFamilyDescriptor>& cfs,
120 std::vector<ColumnFamilyHandle*>* handles) {
121 for (auto h : *handles) {
122 delete h;
123 }
124 handles->clear();
125 delete db;
126 db = nullptr;
127 env->AssertNoOpenFile();
128 env->DropUnsyncedFileData();
129 env->ResetState();
130 Status s;
131 if (use_stackable_db_ == false) {
132 s = TransactionDB::Open(options, txn_db_options, dbname, cfs, handles,
133 &db);
134 } else {
135 s = OpenWithStackableDB(cfs, handles);
136 }
f67539c2 137 assert(!s.ok() || db != nullptr);
11fdf7f2
TL
138 return s;
139 }
140
141 Status ReOpen() {
142 delete db;
143 db = nullptr;
144 DestroyDB(dbname, options);
145 Status s;
146 if (use_stackable_db_ == false) {
147 s = TransactionDB::Open(options, txn_db_options, dbname, &db);
148 } else {
149 s = OpenWithStackableDB();
150 }
151 assert(db != nullptr);
152 return s;
153 }
154
155 Status OpenWithStackableDB(std::vector<ColumnFamilyDescriptor>& cfs,
156 std::vector<ColumnFamilyHandle*>* handles) {
157 std::vector<size_t> compaction_enabled_cf_indices;
158 TransactionDB::PrepareWrap(&options, &cfs, &compaction_enabled_cf_indices);
159 DB* root_db = nullptr;
160 Options options_copy(options);
161 const bool use_seq_per_batch =
162 txn_db_options.write_policy == WRITE_PREPARED ||
163 txn_db_options.write_policy == WRITE_UNPREPARED;
164 const bool use_batch_per_txn =
165 txn_db_options.write_policy == WRITE_COMMITTED ||
166 txn_db_options.write_policy == WRITE_PREPARED;
167 Status s = DBImpl::Open(options_copy, dbname, cfs, handles, &root_db,
168 use_seq_per_batch, use_batch_per_txn);
169 StackableDB* stackable_db = new StackableDB(root_db);
170 if (s.ok()) {
171 assert(root_db != nullptr);
172 s = TransactionDB::WrapStackableDB(stackable_db, txn_db_options,
173 compaction_enabled_cf_indices,
174 *handles, &db);
175 }
176 if (!s.ok()) {
177 delete stackable_db;
11fdf7f2
TL
178 }
179 return s;
180 }
181
182 Status OpenWithStackableDB() {
183 std::vector<size_t> compaction_enabled_cf_indices;
184 std::vector<ColumnFamilyDescriptor> column_families{ColumnFamilyDescriptor(
185 kDefaultColumnFamilyName, ColumnFamilyOptions(options))};
186
187 TransactionDB::PrepareWrap(&options, &column_families,
188 &compaction_enabled_cf_indices);
189 std::vector<ColumnFamilyHandle*> handles;
190 DB* root_db = nullptr;
191 Options options_copy(options);
192 const bool use_seq_per_batch =
193 txn_db_options.write_policy == WRITE_PREPARED ||
194 txn_db_options.write_policy == WRITE_UNPREPARED;
195 const bool use_batch_per_txn =
196 txn_db_options.write_policy == WRITE_COMMITTED ||
197 txn_db_options.write_policy == WRITE_PREPARED;
198 Status s = DBImpl::Open(options_copy, dbname, column_families, &handles,
199 &root_db, use_seq_per_batch, use_batch_per_txn);
200 if (!s.ok()) {
201 delete root_db;
202 return s;
203 }
204 StackableDB* stackable_db = new StackableDB(root_db);
205 assert(root_db != nullptr);
206 assert(handles.size() == 1);
207 s = TransactionDB::WrapStackableDB(stackable_db, txn_db_options,
208 compaction_enabled_cf_indices, handles,
209 &db);
210 delete handles[0];
211 if (!s.ok()) {
212 delete stackable_db;
11fdf7f2
TL
213 }
214 return s;
215 }
216
217 std::atomic<size_t> linked = {0};
218 std::atomic<size_t> exp_seq = {0};
219 std::atomic<size_t> commit_writes = {0};
220 std::atomic<size_t> expected_commits = {0};
f67539c2
TL
221 // Without Prepare, the commit does not write to WAL
222 std::atomic<size_t> with_empty_commits = {0};
11fdf7f2
TL
223 std::function<void(size_t, Status)> txn_t0_with_status = [&](size_t index,
224 Status exp_s) {
225 // Test DB's internal txn. It involves no prepare phase nor a commit marker.
226 WriteOptions wopts;
227 auto s = db->Put(wopts, "key" + std::to_string(index), "value");
228 ASSERT_EQ(exp_s, s);
229 if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
230 // Consume one seq per key
231 exp_seq++;
232 } else {
233 // Consume one seq per batch
234 exp_seq++;
235 if (options.two_write_queues) {
236 // Consume one seq for commit
237 exp_seq++;
238 }
239 }
f67539c2 240 with_empty_commits++;
11fdf7f2
TL
241 };
242 std::function<void(size_t)> txn_t0 = [&](size_t index) {
243 return txn_t0_with_status(index, Status::OK());
244 };
245 std::function<void(size_t)> txn_t1 = [&](size_t index) {
246 // Testing directly writing a write batch. Functionality-wise it is
247 // equivalent to commit without prepare.
248 WriteBatch wb;
249 auto istr = std::to_string(index);
250 ASSERT_OK(wb.Put("k1" + istr, "v1"));
251 ASSERT_OK(wb.Put("k2" + istr, "v2"));
252 ASSERT_OK(wb.Put("k3" + istr, "v3"));
253 WriteOptions wopts;
254 auto s = db->Write(wopts, &wb);
255 if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
256 // Consume one seq per key
257 exp_seq += 3;
258 } else {
259 // Consume one seq per batch
260 exp_seq++;
261 if (options.two_write_queues) {
262 // Consume one seq for commit
263 exp_seq++;
264 }
265 }
266 ASSERT_OK(s);
f67539c2 267 with_empty_commits++;
11fdf7f2
TL
268 };
269 std::function<void(size_t)> txn_t2 = [&](size_t index) {
270 // Commit without prepare. It should write to DB without a commit marker.
271 TransactionOptions txn_options;
272 WriteOptions write_options;
273 Transaction* txn = db->BeginTransaction(write_options, txn_options);
274 auto istr = std::to_string(index);
275 ASSERT_OK(txn->SetName("xid" + istr));
276 ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
277 ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
278 ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
279 ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
280 ASSERT_OK(txn->Commit());
281 if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
282 // Consume one seq per key
283 exp_seq += 4;
f67539c2
TL
284 } else if (txn_db_options.write_policy ==
285 TxnDBWritePolicy::WRITE_PREPARED) {
11fdf7f2
TL
286 // Consume one seq per batch
287 exp_seq++;
288 if (options.two_write_queues) {
289 // Consume one seq for commit
290 exp_seq++;
291 }
f67539c2
TL
292 } else {
293 // Flushed after each key, consume one seq per flushed batch
294 exp_seq += 4;
295 // WriteUnprepared implements CommitWithoutPrepareInternal by simply
296 // calling Prepare then Commit. Consume one seq for the prepare.
297 exp_seq++;
11fdf7f2
TL
298 }
299 delete txn;
f67539c2 300 with_empty_commits++;
11fdf7f2
TL
301 };
302 std::function<void(size_t)> txn_t3 = [&](size_t index) {
303 // A full 2pc txn that also involves a commit marker.
304 TransactionOptions txn_options;
305 WriteOptions write_options;
306 Transaction* txn = db->BeginTransaction(write_options, txn_options);
307 auto istr = std::to_string(index);
308 ASSERT_OK(txn->SetName("xid" + istr));
309 ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
310 ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
311 ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
312 ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
313 ASSERT_OK(txn->Put(Slice("foo5" + istr), Slice("bar5")));
314 expected_commits++;
315 ASSERT_OK(txn->Prepare());
316 commit_writes++;
317 ASSERT_OK(txn->Commit());
318 if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
319 // Consume one seq per key
320 exp_seq += 5;
f67539c2
TL
321 } else if (txn_db_options.write_policy ==
322 TxnDBWritePolicy::WRITE_PREPARED) {
11fdf7f2
TL
323 // Consume one seq per batch
324 exp_seq++;
325 // Consume one seq per commit marker
326 exp_seq++;
f67539c2
TL
327 } else {
328 // Flushed after each key, consume one seq per flushed batch
329 exp_seq += 5;
330 // Consume one seq per commit marker
331 exp_seq++;
11fdf7f2
TL
332 }
333 delete txn;
334 };
335 std::function<void(size_t)> txn_t4 = [&](size_t index) {
336 // A full 2pc txn that also involves a commit marker.
337 TransactionOptions txn_options;
338 WriteOptions write_options;
339 Transaction* txn = db->BeginTransaction(write_options, txn_options);
340 auto istr = std::to_string(index);
341 ASSERT_OK(txn->SetName("xid" + istr));
342 ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
343 ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
344 ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
345 ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
346 ASSERT_OK(txn->Put(Slice("foo5" + istr), Slice("bar5")));
347 expected_commits++;
348 ASSERT_OK(txn->Prepare());
349 commit_writes++;
350 ASSERT_OK(txn->Rollback());
351 if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
352 // No seq is consumed for deleting the txn buffer
353 exp_seq += 0;
f67539c2
TL
354 } else if (txn_db_options.write_policy ==
355 TxnDBWritePolicy::WRITE_PREPARED) {
11fdf7f2
TL
356 // Consume one seq per batch
357 exp_seq++;
358 // Consume one seq per rollback batch
359 exp_seq++;
360 if (options.two_write_queues) {
361 // Consume one seq for rollback commit
362 exp_seq++;
363 }
f67539c2
TL
364 } else {
365 // Flushed after each key, consume one seq per flushed batch
366 exp_seq += 5;
367 // Consume one seq per rollback batch
368 exp_seq++;
369 if (options.two_write_queues) {
370 // Consume one seq for rollback commit
371 exp_seq++;
372 }
11fdf7f2
TL
373 }
374 delete txn;
375 };
376
377 // Test that we can change write policy after a clean shutdown (which would
378 // empty the WAL)
379 void CrossCompatibilityTest(TxnDBWritePolicy from_policy,
380 TxnDBWritePolicy to_policy, bool empty_wal) {
381 TransactionOptions txn_options;
382 ReadOptions read_options;
383 WriteOptions write_options;
384 uint32_t index = 0;
385 Random rnd(1103);
386 options.write_buffer_size = 1024; // To create more sst files
387 std::unordered_map<std::string, std::string> committed_kvs;
388 Transaction* txn;
389
390 txn_db_options.write_policy = from_policy;
f67539c2
TL
391 if (txn_db_options.write_policy == WRITE_COMMITTED) {
392 options.unordered_write = false;
393 }
11fdf7f2
TL
394 ReOpen();
395
396 for (int i = 0; i < 1024; i++) {
397 auto istr = std::to_string(index);
398 auto k = Slice("foo-" + istr).ToString();
399 auto v = Slice("bar-" + istr).ToString();
400 // For test the duplicate keys
401 auto v2 = Slice("bar2-" + istr).ToString();
402 auto type = rnd.Uniform(4);
403 switch (type) {
404 case 0:
405 committed_kvs[k] = v;
406 ASSERT_OK(db->Put(write_options, k, v));
407 committed_kvs[k] = v2;
408 ASSERT_OK(db->Put(write_options, k, v2));
409 break;
410 case 1: {
411 WriteBatch wb;
412 committed_kvs[k] = v;
413 wb.Put(k, v);
414 committed_kvs[k] = v2;
415 wb.Put(k, v2);
416 ASSERT_OK(db->Write(write_options, &wb));
417
418 } break;
419 case 2:
420 case 3:
421 txn = db->BeginTransaction(write_options, txn_options);
422 ASSERT_OK(txn->SetName("xid" + istr));
423 committed_kvs[k] = v;
424 ASSERT_OK(txn->Put(k, v));
425 committed_kvs[k] = v2;
426 ASSERT_OK(txn->Put(k, v2));
427
428 if (type == 3) {
429 ASSERT_OK(txn->Prepare());
430 }
431 ASSERT_OK(txn->Commit());
432 delete txn;
433 break;
434 default:
435 assert(0);
436 }
437
438 index++;
439 } // for i
440
441 txn_db_options.write_policy = to_policy;
f67539c2
TL
442 if (txn_db_options.write_policy == WRITE_COMMITTED) {
443 options.unordered_write = false;
444 }
20effc67 445 auto db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
11fdf7f2
TL
446 // Before upgrade/downgrade the WAL must be emptied
447 if (empty_wal) {
448 db_impl->TEST_FlushMemTable();
449 } else {
450 db_impl->FlushWAL(true);
451 }
452 auto s = ReOpenNoDelete();
453 if (empty_wal) {
454 ASSERT_OK(s);
455 } else {
456 // Test that we can detect the WAL that is produced by an incompatible
457 // WritePolicy and fail fast before mis-interpreting the WAL.
458 ASSERT_TRUE(s.IsNotSupported());
459 return;
460 }
20effc67 461 db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
11fdf7f2
TL
462 // Check that WAL is empty
463 VectorLogPtr log_files;
464 db_impl->GetSortedWalFiles(log_files);
465 ASSERT_EQ(0, log_files.size());
466
467 for (auto& kv : committed_kvs) {
468 std::string value;
469 s = db->Get(read_options, kv.first, &value);
470 if (s.IsNotFound()) {
471 printf("key = %s\n", kv.first.c_str());
472 }
473 ASSERT_OK(s);
474 if (kv.second != value) {
475 printf("key = %s\n", kv.first.c_str());
476 }
477 ASSERT_EQ(kv.second, value);
478 }
479 }
480};
481
f67539c2
TL
482class TransactionTest
483 : public TransactionTestBase,
484 virtual public ::testing::WithParamInterface<
485 std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering>> {
11fdf7f2
TL
486 public:
487 TransactionTest()
488 : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
f67539c2 489 std::get<2>(GetParam()), std::get<3>(GetParam())){};
11fdf7f2
TL
490};
491
492class TransactionStressTest : public TransactionTest {};
493
494da23a
TL
494class MySQLStyleTransactionTest
495 : public TransactionTestBase,
496 virtual public ::testing::WithParamInterface<
f67539c2 497 std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering, bool>> {
494da23a
TL
498 public:
499 MySQLStyleTransactionTest()
500 : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
f67539c2
TL
501 std::get<2>(GetParam()), std::get<3>(GetParam())),
502 with_slow_threads_(std::get<4>(GetParam())) {
494da23a
TL
503 if (with_slow_threads_ &&
504 (txn_db_options.write_policy == WRITE_PREPARED ||
505 txn_db_options.write_policy == WRITE_UNPREPARED)) {
506 // The corner case with slow threads involves the caches filling
507 // over which would not happen even with artifial delays. To help
508 // such cases to show up we lower the size of the cache-related data
509 // structures.
510 txn_db_options.wp_snapshot_cache_bits = 1;
511 txn_db_options.wp_commit_cache_bits = 10;
f67539c2 512 options.write_buffer_size = 1024;
494da23a
TL
513 EXPECT_OK(ReOpen());
514 }
515 };
516
517 protected:
518 // Also emulate slow threads by addin artiftial delays
519 const bool with_slow_threads_;
520};
11fdf7f2 521
f67539c2 522} // namespace ROCKSDB_NAMESPACE