1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
13 #include "rocksdb/comparator.h"
14 #include "rocksdb/db.h"
15 #include "rocksdb/utilities/stackable_db.h"
16 #include "rocksdb/utilities/transaction.h"
18 // Database with Transaction support.
20 // See transaction.h and examples/transaction_example.cc
24 class TransactionDBMutexFactory
;
26 struct TransactionDBOptions
{
27 // Specifies the maximum number of keys that can be locked at the same time
29 // If the number of locked keys is greater than max_num_locks, transaction
30 // writes (or GetForUpdate) will return an error.
31 // If this value is not positive, no limit will be enforced.
32 int64_t max_num_locks
= -1;
34 // Increasing this value will increase the concurrency by dividing the lock
35 // table (per column family) into more sub-tables, each with their own
38 size_t num_stripes
= 16;
40 // If positive, specifies the default wait timeout in milliseconds when
41 // a transaction attempts to lock a key if not specified by
42 // TransactionOptions::lock_timeout.
44 // If 0, no waiting is done if a lock cannot instantly be acquired.
45 // If negative, there is no timeout. Not using a timeout is not recommended
46 // as it can lead to deadlocks. Currently, there is no deadlock-detection to
49 int64_t transaction_lock_timeout
= 1000; // 1 second
51 // If positive, specifies the wait timeout in milliseconds when writing a key
52 // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write()
54 // If 0, no waiting is done if a lock cannot instantly be acquired.
55 // If negative, there is no timeout and will block indefinitely when acquiring
58 // Not using a timeout can lead to deadlocks. Currently, there
59 // is no deadlock-detection to recover from a deadlock. While DB writes
60 // cannot deadlock with other DB writes, they can deadlock with a transaction.
61 // A negative timeout should only be used if all transactions have a small
63 int64_t default_lock_timeout
= 1000; // 1 second
65 // If set, the TransactionDB will use this implemenation of a mutex and
66 // condition variable for all transaction locking instead of the default
67 // mutex/condvar implementation.
68 std::shared_ptr
<TransactionDBMutexFactory
> custom_mutex_factory
;
71 struct TransactionOptions
{
72 // Setting set_snapshot=true is the same as calling
73 // Transaction::SetSnapshot().
74 bool set_snapshot
= false;
76 // Setting to true means that before acquiring locks, this transaction will
77 // check if doing so will cause a deadlock. If so, it will return with
78 // Status::Busy. The user should retry their transaction.
79 bool deadlock_detect
= false;
81 // TODO(agiardullo): TransactionDB does not yet support comparators that allow
82 // two non-equal keys to be equivalent. Ie, cmp->Compare(a,b) should only
84 // a.compare(b) returns 0.
87 // If positive, specifies the wait timeout in milliseconds when
88 // a transaction attempts to lock a key.
90 // If 0, no waiting is done if a lock cannot instantly be acquired.
91 // If negative, TransactionDBOptions::transaction_lock_timeout will be used.
92 int64_t lock_timeout
= -1;
94 // Expiration duration in milliseconds. If non-negative, transactions that
95 // last longer than this many milliseconds will fail to commit. If not set,
96 // a forgotten transaction that is never committed, rolled back, or deleted
97 // will never relinquish any locks it holds. This could prevent keys from
98 // being written by other writers.
99 int64_t expiration
= -1;
101 // The number of traversals to make during deadlock detection.
102 int64_t deadlock_detect_depth
= 50;
104 // The maximum number of bytes used for the write batch. 0 means no limit.
105 size_t max_write_batch_size
= 0;
110 std::vector
<TransactionID
> ids
;
114 class TransactionDB
: public StackableDB
{
116 // Open a TransactionDB similar to DB::Open().
117 // Internally call PrepareWrap() and WrapDB()
118 static Status
Open(const Options
& options
,
119 const TransactionDBOptions
& txn_db_options
,
120 const std::string
& dbname
, TransactionDB
** dbptr
);
122 static Status
Open(const DBOptions
& db_options
,
123 const TransactionDBOptions
& txn_db_options
,
124 const std::string
& dbname
,
125 const std::vector
<ColumnFamilyDescriptor
>& column_families
,
126 std::vector
<ColumnFamilyHandle
*>* handles
,
127 TransactionDB
** dbptr
);
128 // The following functions are used to open a TransactionDB internally using
129 // an opened DB or StackableDB.
130 // 1. Call prepareWrap(), passing an empty std::vector<size_t> to
131 // compaction_enabled_cf_indices.
132 // 2. Open DB or Stackable DB with db_options and column_families passed to
134 // Note: PrepareWrap() may change parameters, make copies before the
135 // invocation if needed.
136 // 3. Call Wrap*DB() with compaction_enabled_cf_indices in step 1 and handles
137 // of the opened DB/StackableDB in step 2
138 static void PrepareWrap(DBOptions
* db_options
,
139 std::vector
<ColumnFamilyDescriptor
>* column_families
,
140 std::vector
<size_t>* compaction_enabled_cf_indices
);
141 static Status
WrapDB(DB
* db
, const TransactionDBOptions
& txn_db_options
,
142 const std::vector
<size_t>& compaction_enabled_cf_indices
,
143 const std::vector
<ColumnFamilyHandle
*>& handles
,
144 TransactionDB
** dbptr
);
145 static Status
WrapStackableDB(
146 StackableDB
* db
, const TransactionDBOptions
& txn_db_options
,
147 const std::vector
<size_t>& compaction_enabled_cf_indices
,
148 const std::vector
<ColumnFamilyHandle
*>& handles
, TransactionDB
** dbptr
);
149 virtual ~TransactionDB() {}
151 // Starts a new Transaction.
153 // Caller is responsible for deleting the returned transaction when no
156 // If old_txn is not null, BeginTransaction will reuse this Transaction
157 // handle instead of allocating a new one. This is an optimization to avoid
158 // extra allocations when repeatedly creating transactions.
159 virtual Transaction
* BeginTransaction(
160 const WriteOptions
& write_options
,
161 const TransactionOptions
& txn_options
= TransactionOptions(),
162 Transaction
* old_txn
= nullptr) = 0;
164 virtual Transaction
* GetTransactionByName(const TransactionName
& name
) = 0;
165 virtual void GetAllPreparedTransactions(std::vector
<Transaction
*>* trans
) = 0;
167 // Returns set of all locks held.
169 // The mapping is column family id -> KeyLockInfo
170 virtual std::unordered_multimap
<uint32_t, KeyLockInfo
>
171 GetLockStatusData() = 0;
174 // To Create an TransactionDB, call Open()
175 explicit TransactionDB(DB
* db
) : StackableDB(db
) {}
178 // No copying allowed
179 TransactionDB(const TransactionDB
&);
180 void operator=(const TransactionDB
&);
183 } // namespace rocksdb
185 #endif // ROCKSDB_LITE