]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/utilities/transaction_db.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / include / rocksdb / utilities / transaction_db.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5
6 #pragma once
7 #ifndef ROCKSDB_LITE
8
9 #include <string>
10 #include <utility>
11 #include <vector>
12
13 #include "rocksdb/comparator.h"
14 #include "rocksdb/db.h"
15 #include "rocksdb/utilities/stackable_db.h"
16 #include "rocksdb/utilities/transaction.h"
17
18 // Database with Transaction support.
19 //
20 // See transaction.h and examples/transaction_example.cc
21
22 namespace rocksdb {
23
24 class TransactionDBMutexFactory;
25
26 struct TransactionDBOptions {
27 // Specifies the maximum number of keys that can be locked at the same time
28 // per column family.
29 // If the number of locked keys is greater than max_num_locks, transaction
30 // writes (or GetForUpdate) will return an error.
31 // If this value is not positive, no limit will be enforced.
32 int64_t max_num_locks = -1;
33
34 // Increasing this value will increase the concurrency by dividing the lock
35 // table (per column family) into more sub-tables, each with their own
36 // separate
37 // mutex.
38 size_t num_stripes = 16;
39
40 // If positive, specifies the default wait timeout in milliseconds when
41 // a transaction attempts to lock a key if not specified by
42 // TransactionOptions::lock_timeout.
43 //
44 // If 0, no waiting is done if a lock cannot instantly be acquired.
45 // If negative, there is no timeout. Not using a timeout is not recommended
46 // as it can lead to deadlocks. Currently, there is no deadlock-detection to
47 // recover
48 // from a deadlock.
49 int64_t transaction_lock_timeout = 1000; // 1 second
50
51 // If positive, specifies the wait timeout in milliseconds when writing a key
52 // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write()
53 // directly).
54 // If 0, no waiting is done if a lock cannot instantly be acquired.
55 // If negative, there is no timeout and will block indefinitely when acquiring
56 // a lock.
57 //
58 // Not using a timeout can lead to deadlocks. Currently, there
59 // is no deadlock-detection to recover from a deadlock. While DB writes
60 // cannot deadlock with other DB writes, they can deadlock with a transaction.
61 // A negative timeout should only be used if all transactions have a small
62 // expiration set.
63 int64_t default_lock_timeout = 1000; // 1 second
64
65 // If set, the TransactionDB will use this implemenation of a mutex and
66 // condition variable for all transaction locking instead of the default
67 // mutex/condvar implementation.
68 std::shared_ptr<TransactionDBMutexFactory> custom_mutex_factory;
69 };
70
71 struct TransactionOptions {
72 // Setting set_snapshot=true is the same as calling
73 // Transaction::SetSnapshot().
74 bool set_snapshot = false;
75
76 // Setting to true means that before acquiring locks, this transaction will
77 // check if doing so will cause a deadlock. If so, it will return with
78 // Status::Busy. The user should retry their transaction.
79 bool deadlock_detect = false;
80
81 // TODO(agiardullo): TransactionDB does not yet support comparators that allow
82 // two non-equal keys to be equivalent. Ie, cmp->Compare(a,b) should only
83 // return 0 if
84 // a.compare(b) returns 0.
85
86
87 // If positive, specifies the wait timeout in milliseconds when
88 // a transaction attempts to lock a key.
89 //
90 // If 0, no waiting is done if a lock cannot instantly be acquired.
91 // If negative, TransactionDBOptions::transaction_lock_timeout will be used.
92 int64_t lock_timeout = -1;
93
94 // Expiration duration in milliseconds. If non-negative, transactions that
95 // last longer than this many milliseconds will fail to commit. If not set,
96 // a forgotten transaction that is never committed, rolled back, or deleted
97 // will never relinquish any locks it holds. This could prevent keys from
98 // being written by other writers.
99 int64_t expiration = -1;
100
101 // The number of traversals to make during deadlock detection.
102 int64_t deadlock_detect_depth = 50;
103
104 // The maximum number of bytes used for the write batch. 0 means no limit.
105 size_t max_write_batch_size = 0;
106 };
107
108 struct KeyLockInfo {
109 std::string key;
110 std::vector<TransactionID> ids;
111 bool exclusive;
112 };
113
114 class TransactionDB : public StackableDB {
115 public:
116 // Open a TransactionDB similar to DB::Open().
117 // Internally call PrepareWrap() and WrapDB()
118 static Status Open(const Options& options,
119 const TransactionDBOptions& txn_db_options,
120 const std::string& dbname, TransactionDB** dbptr);
121
122 static Status Open(const DBOptions& db_options,
123 const TransactionDBOptions& txn_db_options,
124 const std::string& dbname,
125 const std::vector<ColumnFamilyDescriptor>& column_families,
126 std::vector<ColumnFamilyHandle*>* handles,
127 TransactionDB** dbptr);
128 // The following functions are used to open a TransactionDB internally using
129 // an opened DB or StackableDB.
130 // 1. Call prepareWrap(), passing an empty std::vector<size_t> to
131 // compaction_enabled_cf_indices.
132 // 2. Open DB or Stackable DB with db_options and column_families passed to
133 // prepareWrap()
134 // Note: PrepareWrap() may change parameters, make copies before the
135 // invocation if needed.
136 // 3. Call Wrap*DB() with compaction_enabled_cf_indices in step 1 and handles
137 // of the opened DB/StackableDB in step 2
138 static void PrepareWrap(DBOptions* db_options,
139 std::vector<ColumnFamilyDescriptor>* column_families,
140 std::vector<size_t>* compaction_enabled_cf_indices);
141 static Status WrapDB(DB* db, const TransactionDBOptions& txn_db_options,
142 const std::vector<size_t>& compaction_enabled_cf_indices,
143 const std::vector<ColumnFamilyHandle*>& handles,
144 TransactionDB** dbptr);
145 static Status WrapStackableDB(
146 StackableDB* db, const TransactionDBOptions& txn_db_options,
147 const std::vector<size_t>& compaction_enabled_cf_indices,
148 const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr);
149 virtual ~TransactionDB() {}
150
151 // Starts a new Transaction.
152 //
153 // Caller is responsible for deleting the returned transaction when no
154 // longer needed.
155 //
156 // If old_txn is not null, BeginTransaction will reuse this Transaction
157 // handle instead of allocating a new one. This is an optimization to avoid
158 // extra allocations when repeatedly creating transactions.
159 virtual Transaction* BeginTransaction(
160 const WriteOptions& write_options,
161 const TransactionOptions& txn_options = TransactionOptions(),
162 Transaction* old_txn = nullptr) = 0;
163
164 virtual Transaction* GetTransactionByName(const TransactionName& name) = 0;
165 virtual void GetAllPreparedTransactions(std::vector<Transaction*>* trans) = 0;
166
167 // Returns set of all locks held.
168 //
169 // The mapping is column family id -> KeyLockInfo
170 virtual std::unordered_multimap<uint32_t, KeyLockInfo>
171 GetLockStatusData() = 0;
172
173 protected:
174 // To Create an TransactionDB, call Open()
175 explicit TransactionDB(DB* db) : StackableDB(db) {}
176
177 private:
178 // No copying allowed
179 TransactionDB(const TransactionDB&);
180 void operator=(const TransactionDB&);
181 };
182
183 } // namespace rocksdb
184
185 #endif // ROCKSDB_LITE