]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under the BSD-style license found in the | |
3 | // LICENSE file in the root directory of this source tree. An additional grant | |
4 | // of patent rights can be found in the PATENTS file in the same directory. | |
5 | ||
6 | #pragma once | |
7 | #ifndef ROCKSDB_LITE | |
8 | ||
9 | #include <string> | |
10 | #include <utility> | |
11 | #include <vector> | |
12 | ||
13 | #include "rocksdb/comparator.h" | |
14 | #include "rocksdb/db.h" | |
15 | #include "rocksdb/utilities/stackable_db.h" | |
16 | #include "rocksdb/utilities/transaction.h" | |
17 | ||
18 | // Database with Transaction support. | |
19 | // | |
20 | // See transaction.h and examples/transaction_example.cc | |
21 | ||
22 | namespace rocksdb { | |
23 | ||
24 | class TransactionDBMutexFactory; | |
25 | ||
26 | struct TransactionDBOptions { | |
27 | // Specifies the maximum number of keys that can be locked at the same time | |
28 | // per column family. | |
29 | // If the number of locked keys is greater than max_num_locks, transaction | |
30 | // writes (or GetForUpdate) will return an error. | |
31 | // If this value is not positive, no limit will be enforced. | |
32 | int64_t max_num_locks = -1; | |
33 | ||
34 | // Increasing this value will increase the concurrency by dividing the lock | |
35 | // table (per column family) into more sub-tables, each with their own | |
36 | // separate | |
37 | // mutex. | |
38 | size_t num_stripes = 16; | |
39 | ||
40 | // If positive, specifies the default wait timeout in milliseconds when | |
41 | // a transaction attempts to lock a key if not specified by | |
42 | // TransactionOptions::lock_timeout. | |
43 | // | |
44 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
45 | // If negative, there is no timeout. Not using a timeout is not recommended | |
46 | // as it can lead to deadlocks. Currently, there is no deadlock-detection to | |
47 | // recover | |
48 | // from a deadlock. | |
49 | int64_t transaction_lock_timeout = 1000; // 1 second | |
50 | ||
51 | // If positive, specifies the wait timeout in milliseconds when writing a key | |
52 | // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write() | |
53 | // directly). | |
54 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
55 | // If negative, there is no timeout and will block indefinitely when acquiring | |
56 | // a lock. | |
57 | // | |
58 | // Not using a timeout can lead to deadlocks. Currently, there | |
59 | // is no deadlock-detection to recover from a deadlock. While DB writes | |
60 | // cannot deadlock with other DB writes, they can deadlock with a transaction. | |
61 | // A negative timeout should only be used if all transactions have a small | |
62 | // expiration set. | |
63 | int64_t default_lock_timeout = 1000; // 1 second | |
64 | ||
65 | // If set, the TransactionDB will use this implemenation of a mutex and | |
66 | // condition variable for all transaction locking instead of the default | |
67 | // mutex/condvar implementation. | |
68 | std::shared_ptr<TransactionDBMutexFactory> custom_mutex_factory; | |
69 | }; | |
70 | ||
71 | struct TransactionOptions { | |
72 | // Setting set_snapshot=true is the same as calling | |
73 | // Transaction::SetSnapshot(). | |
74 | bool set_snapshot = false; | |
75 | ||
76 | // Setting to true means that before acquiring locks, this transaction will | |
77 | // check if doing so will cause a deadlock. If so, it will return with | |
78 | // Status::Busy. The user should retry their transaction. | |
79 | bool deadlock_detect = false; | |
80 | ||
81 | // TODO(agiardullo): TransactionDB does not yet support comparators that allow | |
82 | // two non-equal keys to be equivalent. Ie, cmp->Compare(a,b) should only | |
83 | // return 0 if | |
84 | // a.compare(b) returns 0. | |
85 | ||
86 | ||
87 | // If positive, specifies the wait timeout in milliseconds when | |
88 | // a transaction attempts to lock a key. | |
89 | // | |
90 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
91 | // If negative, TransactionDBOptions::transaction_lock_timeout will be used. | |
92 | int64_t lock_timeout = -1; | |
93 | ||
94 | // Expiration duration in milliseconds. If non-negative, transactions that | |
95 | // last longer than this many milliseconds will fail to commit. If not set, | |
96 | // a forgotten transaction that is never committed, rolled back, or deleted | |
97 | // will never relinquish any locks it holds. This could prevent keys from | |
98 | // being written by other writers. | |
99 | int64_t expiration = -1; | |
100 | ||
101 | // The number of traversals to make during deadlock detection. | |
102 | int64_t deadlock_detect_depth = 50; | |
103 | ||
104 | // The maximum number of bytes used for the write batch. 0 means no limit. | |
105 | size_t max_write_batch_size = 0; | |
106 | }; | |
107 | ||
108 | struct KeyLockInfo { | |
109 | std::string key; | |
110 | std::vector<TransactionID> ids; | |
111 | bool exclusive; | |
112 | }; | |
113 | ||
114 | class TransactionDB : public StackableDB { | |
115 | public: | |
116 | // Open a TransactionDB similar to DB::Open(). | |
117 | // Internally call PrepareWrap() and WrapDB() | |
118 | static Status Open(const Options& options, | |
119 | const TransactionDBOptions& txn_db_options, | |
120 | const std::string& dbname, TransactionDB** dbptr); | |
121 | ||
122 | static Status Open(const DBOptions& db_options, | |
123 | const TransactionDBOptions& txn_db_options, | |
124 | const std::string& dbname, | |
125 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
126 | std::vector<ColumnFamilyHandle*>* handles, | |
127 | TransactionDB** dbptr); | |
128 | // The following functions are used to open a TransactionDB internally using | |
129 | // an opened DB or StackableDB. | |
130 | // 1. Call prepareWrap(), passing an empty std::vector<size_t> to | |
131 | // compaction_enabled_cf_indices. | |
132 | // 2. Open DB or Stackable DB with db_options and column_families passed to | |
133 | // prepareWrap() | |
134 | // Note: PrepareWrap() may change parameters, make copies before the | |
135 | // invocation if needed. | |
136 | // 3. Call Wrap*DB() with compaction_enabled_cf_indices in step 1 and handles | |
137 | // of the opened DB/StackableDB in step 2 | |
138 | static void PrepareWrap(DBOptions* db_options, | |
139 | std::vector<ColumnFamilyDescriptor>* column_families, | |
140 | std::vector<size_t>* compaction_enabled_cf_indices); | |
141 | static Status WrapDB(DB* db, const TransactionDBOptions& txn_db_options, | |
142 | const std::vector<size_t>& compaction_enabled_cf_indices, | |
143 | const std::vector<ColumnFamilyHandle*>& handles, | |
144 | TransactionDB** dbptr); | |
145 | static Status WrapStackableDB( | |
146 | StackableDB* db, const TransactionDBOptions& txn_db_options, | |
147 | const std::vector<size_t>& compaction_enabled_cf_indices, | |
148 | const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr); | |
149 | virtual ~TransactionDB() {} | |
150 | ||
151 | // Starts a new Transaction. | |
152 | // | |
153 | // Caller is responsible for deleting the returned transaction when no | |
154 | // longer needed. | |
155 | // | |
156 | // If old_txn is not null, BeginTransaction will reuse this Transaction | |
157 | // handle instead of allocating a new one. This is an optimization to avoid | |
158 | // extra allocations when repeatedly creating transactions. | |
159 | virtual Transaction* BeginTransaction( | |
160 | const WriteOptions& write_options, | |
161 | const TransactionOptions& txn_options = TransactionOptions(), | |
162 | Transaction* old_txn = nullptr) = 0; | |
163 | ||
164 | virtual Transaction* GetTransactionByName(const TransactionName& name) = 0; | |
165 | virtual void GetAllPreparedTransactions(std::vector<Transaction*>* trans) = 0; | |
166 | ||
167 | // Returns set of all locks held. | |
168 | // | |
169 | // The mapping is column family id -> KeyLockInfo | |
170 | virtual std::unordered_multimap<uint32_t, KeyLockInfo> | |
171 | GetLockStatusData() = 0; | |
172 | ||
173 | protected: | |
174 | // To Create an TransactionDB, call Open() | |
175 | explicit TransactionDB(DB* db) : StackableDB(db) {} | |
176 | ||
177 | private: | |
178 | // No copying allowed | |
179 | TransactionDB(const TransactionDB&); | |
180 | void operator=(const TransactionDB&); | |
181 | }; | |
182 | ||
183 | } // namespace rocksdb | |
184 | ||
185 | #endif // ROCKSDB_LITE |