]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | #ifndef ROCKSDB_LITE | |
8 | ||
9 | #include <string> | |
10 | #include <utility> | |
11 | #include <vector> | |
12 | ||
13 | #include "rocksdb/comparator.h" | |
14 | #include "rocksdb/db.h" | |
15 | #include "rocksdb/utilities/stackable_db.h" | |
16 | #include "rocksdb/utilities/transaction.h" | |
17 | ||
18 | // Database with Transaction support. | |
19 | // | |
20 | // See transaction.h and examples/transaction_example.cc | |
21 | ||
f67539c2 | 22 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
23 | |
24 | class TransactionDBMutexFactory; | |
25 | ||
11fdf7f2 TL |
26 | enum TxnDBWritePolicy { |
27 | WRITE_COMMITTED = 0, // write only the committed data | |
1e59de90 TL |
28 | WRITE_PREPARED, // write data after the prepare phase of 2pc |
29 | WRITE_UNPREPARED // write data before the prepare phase of 2pc | |
11fdf7f2 TL |
30 | }; |
31 | ||
1e59de90 TL |
32 | constexpr uint32_t kInitialMaxDeadlocks = 5; |
33 | ||
34 | class LockManager; | |
35 | struct RangeLockInfo; | |
36 | ||
37 | // A lock manager handle | |
38 | // The workflow is as follows: | |
39 | // * Use a factory method (like NewRangeLockManager()) to create a lock | |
40 | // manager and get its handle. | |
41 | // * A Handle for a particular kind of lock manager will have extra | |
42 | // methods and parameters to control the lock manager | |
43 | // * Pass the handle to RocksDB in TransactionDBOptions::lock_mgr_handle. It | |
44 | // will be used to perform locking. | |
45 | class LockManagerHandle { | |
46 | public: | |
47 | // PessimisticTransactionDB will call this to get the Lock Manager it's going | |
48 | // to use. | |
49 | virtual LockManager* getLockManager() = 0; | |
50 | ||
51 | virtual ~LockManagerHandle() {} | |
52 | }; | |
53 | ||
54 | // Same as class Endpoint, but use std::string to manage the buffer allocation | |
55 | struct EndpointWithString { | |
56 | std::string slice; | |
57 | bool inf_suffix; | |
58 | }; | |
59 | ||
60 | struct RangeDeadlockInfo { | |
61 | TransactionID m_txn_id; | |
62 | uint32_t m_cf_id; | |
63 | bool m_exclusive; | |
64 | ||
65 | EndpointWithString m_start; | |
66 | EndpointWithString m_end; | |
67 | }; | |
68 | ||
69 | struct RangeDeadlockPath { | |
70 | std::vector<RangeDeadlockInfo> path; | |
71 | bool limit_exceeded; | |
72 | int64_t deadlock_time; | |
73 | ||
74 | explicit RangeDeadlockPath(std::vector<RangeDeadlockInfo> path_entry, | |
75 | const int64_t& dl_time) | |
76 | : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {} | |
77 | ||
78 | // empty path, limit exceeded constructor and default constructor | |
79 | explicit RangeDeadlockPath(const int64_t& dl_time = 0, bool limit = false) | |
80 | : path(0), limit_exceeded(limit), deadlock_time(dl_time) {} | |
81 | ||
82 | bool empty() { return path.empty() && !limit_exceeded; } | |
83 | }; | |
84 | ||
85 | // A handle to control RangeLockManager (Range-based lock manager) from outside | |
86 | // RocksDB | |
87 | class RangeLockManagerHandle : public LockManagerHandle { | |
88 | public: | |
89 | // Set total amount of lock memory to use. | |
90 | // | |
91 | // @return 0 Ok | |
92 | // @return EDOM Failed to set because currently using more memory than | |
93 | // specified | |
94 | virtual int SetMaxLockMemory(size_t max_lock_memory) = 0; | |
95 | virtual size_t GetMaxLockMemory() = 0; | |
96 | ||
97 | using RangeLockStatus = | |
98 | std::unordered_multimap<ColumnFamilyId, RangeLockInfo>; | |
99 | ||
100 | // Lock Escalation barrier check function. | |
101 | // It is called for a couple of endpoints A and B, such that A < B. | |
102 | // If escalation_barrier_check_func(A, B)==true, then there's a lock | |
103 | // escalation barrier between A and B, and lock escalation is not allowed | |
104 | // to bridge the gap between A and B. | |
105 | // | |
106 | // The function may be called from any thread that acquires or releases | |
107 | // locks. It should not throw exceptions. There is currently no way to return | |
108 | // an error. | |
109 | using EscalationBarrierFunc = | |
110 | std::function<bool(const Endpoint& a, const Endpoint& b)>; | |
111 | ||
112 | // Set the user-provided barrier check function | |
113 | virtual void SetEscalationBarrierFunc(EscalationBarrierFunc func) = 0; | |
114 | ||
115 | virtual RangeLockStatus GetRangeLockStatusData() = 0; | |
116 | ||
117 | class Counters { | |
118 | public: | |
119 | // Number of times lock escalation was triggered (for all column families) | |
120 | uint64_t escalation_count; | |
121 | ||
122 | // Number of times lock acquisition had to wait for a conflicting lock | |
123 | // to be released. This counts both successful waits (where the desired | |
124 | // lock was acquired) and waits that timed out or got other error. | |
125 | uint64_t lock_wait_count; | |
126 | ||
127 | // How much memory is currently used for locks (total for all column | |
128 | // families) | |
129 | uint64_t current_lock_memory; | |
130 | }; | |
131 | ||
132 | // Get the current counter values | |
133 | virtual Counters GetStatus() = 0; | |
134 | ||
135 | // Functions for range-based Deadlock reporting. | |
136 | virtual std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() = 0; | |
137 | virtual void SetRangeDeadlockInfoBufferSize(uint32_t target_size) = 0; | |
138 | ||
139 | virtual ~RangeLockManagerHandle() {} | |
140 | }; | |
141 | ||
142 | // A factory function to create a Range Lock Manager. The created object should | |
143 | // be: | |
144 | // 1. Passed in TransactionDBOptions::lock_mgr_handle to open the database in | |
145 | // range-locking mode | |
146 | // 2. Used to control the lock manager when the DB is already open. | |
147 | RangeLockManagerHandle* NewRangeLockManager( | |
148 | std::shared_ptr<TransactionDBMutexFactory> mutex_factory); | |
11fdf7f2 | 149 | |
7c673cae FG |
150 | struct TransactionDBOptions { |
151 | // Specifies the maximum number of keys that can be locked at the same time | |
152 | // per column family. | |
153 | // If the number of locked keys is greater than max_num_locks, transaction | |
154 | // writes (or GetForUpdate) will return an error. | |
155 | // If this value is not positive, no limit will be enforced. | |
156 | int64_t max_num_locks = -1; | |
157 | ||
11fdf7f2 TL |
158 | // Stores the number of latest deadlocks to track |
159 | uint32_t max_num_deadlocks = kInitialMaxDeadlocks; | |
160 | ||
7c673cae FG |
161 | // Increasing this value will increase the concurrency by dividing the lock |
162 | // table (per column family) into more sub-tables, each with their own | |
1e59de90 | 163 | // separate mutex. |
7c673cae FG |
164 | size_t num_stripes = 16; |
165 | ||
166 | // If positive, specifies the default wait timeout in milliseconds when | |
167 | // a transaction attempts to lock a key if not specified by | |
168 | // TransactionOptions::lock_timeout. | |
169 | // | |
170 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
171 | // If negative, there is no timeout. Not using a timeout is not recommended | |
172 | // as it can lead to deadlocks. Currently, there is no deadlock-detection to | |
1e59de90 | 173 | // recover from a deadlock. |
7c673cae FG |
174 | int64_t transaction_lock_timeout = 1000; // 1 second |
175 | ||
176 | // If positive, specifies the wait timeout in milliseconds when writing a key | |
177 | // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write() | |
178 | // directly). | |
179 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
180 | // If negative, there is no timeout and will block indefinitely when acquiring | |
181 | // a lock. | |
182 | // | |
183 | // Not using a timeout can lead to deadlocks. Currently, there | |
184 | // is no deadlock-detection to recover from a deadlock. While DB writes | |
185 | // cannot deadlock with other DB writes, they can deadlock with a transaction. | |
186 | // A negative timeout should only be used if all transactions have a small | |
187 | // expiration set. | |
188 | int64_t default_lock_timeout = 1000; // 1 second | |
189 | ||
11fdf7f2 | 190 | // If set, the TransactionDB will use this implementation of a mutex and |
7c673cae FG |
191 | // condition variable for all transaction locking instead of the default |
192 | // mutex/condvar implementation. | |
193 | std::shared_ptr<TransactionDBMutexFactory> custom_mutex_factory; | |
11fdf7f2 TL |
194 | |
195 | // The policy for when to write the data into the DB. The default policy is to | |
196 | // write only the committed data (WRITE_COMMITTED). The data could be written | |
197 | // before the commit phase. The DB then needs to provide the mechanisms to | |
198 | // tell apart committed from uncommitted data. | |
199 | TxnDBWritePolicy write_policy = TxnDBWritePolicy::WRITE_COMMITTED; | |
200 | ||
201 | // TODO(myabandeh): remove this option | |
202 | // Note: this is a temporary option as a hot fix in rollback of writeprepared | |
203 | // txns in myrocks. MyRocks uses merge operands for autoinc column id without | |
204 | // however obtaining locks. This breaks the assumption behind the rollback | |
205 | // logic in myrocks. This hack of simply not rolling back merge operands works | |
206 | // for the special way that myrocks uses this operands. | |
207 | bool rollback_merge_operands = false; | |
494da23a | 208 | |
1e59de90 TL |
209 | // nullptr means use default lock manager. |
210 | // Other value means the user provides a custom lock manager. | |
211 | std::shared_ptr<LockManagerHandle> lock_mgr_handle; | |
212 | ||
f67539c2 TL |
213 | // If true, the TransactionDB implementation might skip concurrency control |
214 | // unless it is overridden by TransactionOptions or | |
1e59de90 | 215 | // TransactionDBWriteOptimizations. This can be used in conjunction with |
f67539c2 TL |
216 | // DBOptions::unordered_write when the TransactionDB is used solely for write |
217 | // ordering rather than concurrency control. | |
218 | bool skip_concurrency_control = false; | |
219 | ||
220 | // This option is only valid for write unprepared. If a write batch exceeds | |
221 | // this threshold, then the transaction will implicitly flush the currently | |
222 | // pending writes into the database. A value of 0 or less means no limit. | |
223 | int64_t default_write_batch_flush_threshold = 0; | |
224 | ||
1e59de90 TL |
225 | // This option is valid only for write-prepared/write-unprepared. Transaction |
226 | // will rely on this callback to determine if a key should be rolled back | |
227 | // with Delete or SingleDelete when necessary. If the callback returns true, | |
228 | // then SingleDelete should be used. If the callback is not callable or the | |
229 | // callback returns false, then a Delete is used. | |
230 | // The application should ensure thread-safety of this callback. | |
231 | // The callback should not throw because RocksDB is not exception-safe. | |
232 | // The callback may be removed if we allow mixing Delete and SingleDelete in | |
233 | // the future. | |
234 | std::function<bool(TransactionDB* /*db*/, | |
235 | ColumnFamilyHandle* /*column_family*/, | |
236 | const Slice& /*key*/)> | |
237 | rollback_deletion_type_callback; | |
238 | ||
494da23a TL |
239 | private: |
240 | // 128 entries | |
1e59de90 TL |
241 | // Should the default value change, please also update wp_snapshot_cache_bits |
242 | // in db_stress_gflags.cc | |
494da23a TL |
243 | size_t wp_snapshot_cache_bits = static_cast<size_t>(7); |
244 | // 8m entry, 64MB size | |
1e59de90 TL |
245 | // Should the default value change, please also update wp_commit_cache_bits |
246 | // in db_stress_gflags.cc | |
494da23a TL |
247 | size_t wp_commit_cache_bits = static_cast<size_t>(23); |
248 | ||
f67539c2 TL |
249 | // For testing, whether transaction name should be auto-generated or not. This |
250 | // is useful for write unprepared which requires named transactions. | |
251 | bool autogenerate_name = false; | |
252 | ||
494da23a | 253 | friend class WritePreparedTxnDB; |
f67539c2 | 254 | friend class WriteUnpreparedTxn; |
494da23a | 255 | friend class WritePreparedTransactionTestBase; |
f67539c2 | 256 | friend class TransactionTestBase; |
494da23a | 257 | friend class MySQLStyleTransactionTest; |
1e59de90 | 258 | friend class StressTest; |
7c673cae FG |
259 | }; |
260 | ||
261 | struct TransactionOptions { | |
262 | // Setting set_snapshot=true is the same as calling | |
263 | // Transaction::SetSnapshot(). | |
264 | bool set_snapshot = false; | |
265 | ||
266 | // Setting to true means that before acquiring locks, this transaction will | |
267 | // check if doing so will cause a deadlock. If so, it will return with | |
268 | // Status::Busy. The user should retry their transaction. | |
269 | bool deadlock_detect = false; | |
270 | ||
11fdf7f2 TL |
271 | // If set, it states that the CommitTimeWriteBatch represents the latest state |
272 | // of the application, has only one sub-batch, i.e., no duplicate keys, and | |
273 | // meant to be used later during recovery. It enables an optimization to | |
274 | // postpone updating the memtable with CommitTimeWriteBatch to only | |
275 | // SwitchMemtable or recovery. | |
1e59de90 TL |
276 | // This option does not affect write-committed. Only |
277 | // write-prepared/write-unprepared transactions will be affected. | |
11fdf7f2 TL |
278 | bool use_only_the_last_commit_time_batch_for_recovery = false; |
279 | ||
7c673cae FG |
280 | // TODO(agiardullo): TransactionDB does not yet support comparators that allow |
281 | // two non-equal keys to be equivalent. Ie, cmp->Compare(a,b) should only | |
282 | // return 0 if | |
283 | // a.compare(b) returns 0. | |
284 | ||
7c673cae FG |
285 | // If positive, specifies the wait timeout in milliseconds when |
286 | // a transaction attempts to lock a key. | |
287 | // | |
288 | // If 0, no waiting is done if a lock cannot instantly be acquired. | |
289 | // If negative, TransactionDBOptions::transaction_lock_timeout will be used. | |
290 | int64_t lock_timeout = -1; | |
291 | ||
292 | // Expiration duration in milliseconds. If non-negative, transactions that | |
293 | // last longer than this many milliseconds will fail to commit. If not set, | |
294 | // a forgotten transaction that is never committed, rolled back, or deleted | |
295 | // will never relinquish any locks it holds. This could prevent keys from | |
296 | // being written by other writers. | |
297 | int64_t expiration = -1; | |
298 | ||
299 | // The number of traversals to make during deadlock detection. | |
300 | int64_t deadlock_detect_depth = 50; | |
301 | ||
302 | // The maximum number of bytes used for the write batch. 0 means no limit. | |
303 | size_t max_write_batch_size = 0; | |
11fdf7f2 TL |
304 | |
305 | // Skip Concurrency Control. This could be as an optimization if the | |
306 | // application knows that the transaction would not have any conflict with | |
307 | // concurrent transactions. It could also be used during recovery if (i) | |
308 | // application guarantees no conflict between prepared transactions in the WAL | |
309 | // (ii) application guarantees that recovered transactions will be rolled | |
310 | // back/commit before new transactions start. | |
311 | // Default: false | |
312 | bool skip_concurrency_control = false; | |
f67539c2 | 313 | |
20effc67 TL |
314 | // In pessimistic transaction, if this is true, then you can skip Prepare |
315 | // before Commit, otherwise, you must Prepare before Commit. | |
316 | bool skip_prepare = true; | |
317 | ||
f67539c2 TL |
318 | // See TransactionDBOptions::default_write_batch_flush_threshold for |
319 | // description. If a negative value is specified, then the default value from | |
320 | // TransactionDBOptions is used. | |
321 | int64_t write_batch_flush_threshold = -1; | |
11fdf7f2 TL |
322 | }; |
323 | ||
324 | // The per-write optimizations that do not involve transactions. TransactionDB | |
325 | // implementation might or might not make use of the specified optimizations. | |
326 | struct TransactionDBWriteOptimizations { | |
327 | // If it is true it means that the application guarantees that the | |
328 | // key-set in the write batch do not conflict with any concurrent transaction | |
329 | // and hence the concurrency control mechanism could be skipped for this | |
330 | // write. | |
331 | bool skip_concurrency_control = false; | |
332 | // If true, the application guarantees that there is no duplicate <column | |
333 | // family, key> in the write batch and any employed mechanism to handle | |
334 | // duplicate keys could be skipped. | |
335 | bool skip_duplicate_key_check = false; | |
7c673cae FG |
336 | }; |
337 | ||
338 | struct KeyLockInfo { | |
339 | std::string key; | |
340 | std::vector<TransactionID> ids; | |
341 | bool exclusive; | |
342 | }; | |
343 | ||
20effc67 | 344 | struct RangeLockInfo { |
1e59de90 TL |
345 | EndpointWithString start; |
346 | EndpointWithString end; | |
20effc67 TL |
347 | std::vector<TransactionID> ids; |
348 | bool exclusive; | |
349 | }; | |
350 | ||
11fdf7f2 TL |
351 | struct DeadlockInfo { |
352 | TransactionID m_txn_id; | |
353 | uint32_t m_cf_id; | |
11fdf7f2 | 354 | bool m_exclusive; |
494da23a | 355 | std::string m_waiting_key; |
11fdf7f2 TL |
356 | }; |
357 | ||
358 | struct DeadlockPath { | |
359 | std::vector<DeadlockInfo> path; | |
360 | bool limit_exceeded; | |
361 | int64_t deadlock_time; | |
362 | ||
363 | explicit DeadlockPath(std::vector<DeadlockInfo> path_entry, | |
364 | const int64_t& dl_time) | |
365 | : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {} | |
366 | ||
367 | // empty path, limit exceeded constructor and default constructor | |
368 | explicit DeadlockPath(const int64_t& dl_time = 0, bool limit = false) | |
369 | : path(0), limit_exceeded(limit), deadlock_time(dl_time) {} | |
370 | ||
371 | bool empty() { return path.empty() && !limit_exceeded; } | |
372 | }; | |
373 | ||
7c673cae FG |
374 | class TransactionDB : public StackableDB { |
375 | public: | |
11fdf7f2 TL |
376 | // Optimized version of ::Write that receives more optimization request such |
377 | // as skip_concurrency_control. | |
378 | using StackableDB::Write; | |
379 | virtual Status Write(const WriteOptions& opts, | |
380 | const TransactionDBWriteOptimizations&, | |
381 | WriteBatch* updates) { | |
382 | // The default implementation ignores TransactionDBWriteOptimizations and | |
383 | // falls back to the un-optimized version of ::Write | |
384 | return Write(opts, updates); | |
385 | } | |
20effc67 TL |
386 | // Transactional `DeleteRange()` is not yet supported. |
387 | // However, users who know their deleted range does not conflict with | |
388 | // anything can still use it via the `Write()` API. In all cases, the | |
389 | // `Write()` overload specifying `TransactionDBWriteOptimizations` must be | |
390 | // used and `skip_concurrency_control` must be set. When using either | |
391 | // WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must | |
392 | // additionally be set. | |
1e59de90 | 393 | using StackableDB::DeleteRange; |
20effc67 TL |
394 | virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*, |
395 | const Slice&, const Slice&) override { | |
396 | return Status::NotSupported(); | |
397 | } | |
7c673cae FG |
398 | // Open a TransactionDB similar to DB::Open(). |
399 | // Internally call PrepareWrap() and WrapDB() | |
11fdf7f2 | 400 | // If the return status is not ok, then dbptr is set to nullptr. |
7c673cae FG |
401 | static Status Open(const Options& options, |
402 | const TransactionDBOptions& txn_db_options, | |
403 | const std::string& dbname, TransactionDB** dbptr); | |
404 | ||
405 | static Status Open(const DBOptions& db_options, | |
406 | const TransactionDBOptions& txn_db_options, | |
407 | const std::string& dbname, | |
408 | const std::vector<ColumnFamilyDescriptor>& column_families, | |
409 | std::vector<ColumnFamilyHandle*>* handles, | |
410 | TransactionDB** dbptr); | |
7c673cae FG |
411 | // Note: PrepareWrap() may change parameters, make copies before the |
412 | // invocation if needed. | |
7c673cae FG |
413 | static void PrepareWrap(DBOptions* db_options, |
414 | std::vector<ColumnFamilyDescriptor>* column_families, | |
415 | std::vector<size_t>* compaction_enabled_cf_indices); | |
11fdf7f2 TL |
416 | // If the return status is not ok, then dbptr will bet set to nullptr. The |
417 | // input db parameter might or might not be deleted as a result of the | |
418 | // failure. If it is properly deleted it will be set to nullptr. If the return | |
419 | // status is ok, the ownership of db is transferred to dbptr. | |
7c673cae FG |
420 | static Status WrapDB(DB* db, const TransactionDBOptions& txn_db_options, |
421 | const std::vector<size_t>& compaction_enabled_cf_indices, | |
422 | const std::vector<ColumnFamilyHandle*>& handles, | |
423 | TransactionDB** dbptr); | |
11fdf7f2 TL |
424 | // If the return status is not ok, then dbptr will bet set to nullptr. The |
425 | // input db parameter might or might not be deleted as a result of the | |
426 | // failure. If it is properly deleted it will be set to nullptr. If the return | |
427 | // status is ok, the ownership of db is transferred to dbptr. | |
7c673cae FG |
428 | static Status WrapStackableDB( |
429 | StackableDB* db, const TransactionDBOptions& txn_db_options, | |
430 | const std::vector<size_t>& compaction_enabled_cf_indices, | |
431 | const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr); | |
11fdf7f2 TL |
432 | // Since the destructor in StackableDB is virtual, this destructor is virtual |
433 | // too. The root db will be deleted by the base's destructor. | |
434 | ~TransactionDB() override {} | |
7c673cae FG |
435 | |
436 | // Starts a new Transaction. | |
437 | // | |
438 | // Caller is responsible for deleting the returned transaction when no | |
439 | // longer needed. | |
440 | // | |
441 | // If old_txn is not null, BeginTransaction will reuse this Transaction | |
442 | // handle instead of allocating a new one. This is an optimization to avoid | |
443 | // extra allocations when repeatedly creating transactions. | |
444 | virtual Transaction* BeginTransaction( | |
445 | const WriteOptions& write_options, | |
446 | const TransactionOptions& txn_options = TransactionOptions(), | |
447 | Transaction* old_txn = nullptr) = 0; | |
448 | ||
449 | virtual Transaction* GetTransactionByName(const TransactionName& name) = 0; | |
450 | virtual void GetAllPreparedTransactions(std::vector<Transaction*>* trans) = 0; | |
451 | ||
452 | // Returns set of all locks held. | |
453 | // | |
454 | // The mapping is column family id -> KeyLockInfo | |
455 | virtual std::unordered_multimap<uint32_t, KeyLockInfo> | |
456 | GetLockStatusData() = 0; | |
20effc67 | 457 | |
11fdf7f2 TL |
458 | virtual std::vector<DeadlockPath> GetDeadlockInfoBuffer() = 0; |
459 | virtual void SetDeadlockInfoBufferSize(uint32_t target_size) = 0; | |
7c673cae | 460 | |
1e59de90 TL |
461 | // Create a snapshot and assign ts to it. Return the snapshot to caller. The |
462 | // snapshot-timestamp mapping is also tracked by the database. | |
463 | // Caller must ensure there are no active writes when this API is called. | |
464 | virtual std::pair<Status, std::shared_ptr<const Snapshot>> | |
465 | CreateTimestampedSnapshot(TxnTimestamp ts) = 0; | |
466 | ||
467 | // Return the latest timestamped snapshot if present. | |
468 | std::shared_ptr<const Snapshot> GetLatestTimestampedSnapshot() const { | |
469 | return GetTimestampedSnapshot(kMaxTxnTimestamp); | |
470 | } | |
471 | // Return the snapshot correponding to given timestamp. If ts is | |
472 | // kMaxTxnTimestamp, then we return the latest timestamped snapshot if | |
473 | // present. Othersise, we return the snapshot whose timestamp is equal to | |
474 | // `ts`. If no such snapshot exists, then we return null. | |
475 | virtual std::shared_ptr<const Snapshot> GetTimestampedSnapshot( | |
476 | TxnTimestamp ts) const = 0; | |
477 | // Release timestamped snapshots whose timestamps are less than or equal to | |
478 | // ts. | |
479 | virtual void ReleaseTimestampedSnapshotsOlderThan(TxnTimestamp ts) = 0; | |
480 | ||
481 | // Get all timestamped snapshots which will be stored in | |
482 | // timestamped_snapshots. | |
483 | Status GetAllTimestampedSnapshots( | |
484 | std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots) | |
485 | const { | |
486 | return GetTimestampedSnapshots(/*ts_lb=*/0, /*ts_ub=*/kMaxTxnTimestamp, | |
487 | timestamped_snapshots); | |
488 | } | |
489 | ||
490 | // Get all timestamped snapshots whose timestamps fall within [ts_lb, ts_ub). | |
491 | // timestamped_snapshots will be cleared and contain returned snapshots. | |
492 | virtual Status GetTimestampedSnapshots( | |
493 | TxnTimestamp ts_lb, TxnTimestamp ts_ub, | |
494 | std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots) | |
495 | const = 0; | |
496 | ||
7c673cae FG |
497 | protected: |
498 | // To Create an TransactionDB, call Open() | |
11fdf7f2 | 499 | // The ownership of db is transferred to the base StackableDB |
7c673cae | 500 | explicit TransactionDB(DB* db) : StackableDB(db) {} |
7c673cae | 501 | // No copying allowed |
f67539c2 TL |
502 | TransactionDB(const TransactionDB&) = delete; |
503 | void operator=(const TransactionDB&) = delete; | |
7c673cae FG |
504 | }; |
505 | ||
f67539c2 | 506 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
507 | |
508 | #endif // ROCKSDB_LITE |