]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/transactions/write_unprepared_txn.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / utilities / transactions / write_unprepared_txn.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #ifndef ROCKSDB_LITE
9
10 #include <set>
11
12 #include "utilities/transactions/write_prepared_txn.h"
13 #include "utilities/transactions/write_unprepared_txn_db.h"
14
15 namespace rocksdb {
16
17 class WriteUnpreparedTxnDB;
18 class WriteUnpreparedTxn;
19
20 class WriteUnpreparedTxnReadCallback : public ReadCallback {
21 public:
22 WriteUnpreparedTxnReadCallback(WritePreparedTxnDB* db,
23 SequenceNumber snapshot,
24 SequenceNumber min_uncommitted,
25 WriteUnpreparedTxn* txn)
26 // Pass our last uncommitted seq as the snapshot to the parent class to
27 // ensure that the parent will not prematurely filter out own writes. We
28 // will do the exact comparison agaisnt snapshots in IsVisibleFullCheck
29 // override.
30 : ReadCallback(CalcMaxVisibleSeq(txn, snapshot), min_uncommitted),
31 db_(db),
32 txn_(txn),
33 wup_snapshot_(snapshot) {}
34
35 virtual bool IsVisibleFullCheck(SequenceNumber seq) override;
36
37 bool CanReseekToSkip() override {
38 return wup_snapshot_ == max_visible_seq_;
39 // Otherwise our own writes uncommitted are in db, and the assumptions
40 // behind reseek optimizations are no longer valid.
41 }
42
43 // TODO(myabandeh): override Refresh when Iterator::Refresh is supported
44 private:
45 static SequenceNumber CalcMaxVisibleSeq(WriteUnpreparedTxn* txn,
46 SequenceNumber snapshot_seq) {
47 SequenceNumber max_unprepared = CalcMaxUnpreparedSequenceNumber(txn);
48 assert(snapshot_seq < max_unprepared || max_unprepared == 0 ||
49 snapshot_seq == kMaxSequenceNumber);
50 return std::max(max_unprepared, snapshot_seq);
51 }
52 static SequenceNumber CalcMaxUnpreparedSequenceNumber(
53 WriteUnpreparedTxn* txn);
54 WritePreparedTxnDB* db_;
55 WriteUnpreparedTxn* txn_;
56 SequenceNumber wup_snapshot_;
57 };
58
59 class WriteUnpreparedTxn : public WritePreparedTxn {
60 public:
61 WriteUnpreparedTxn(WriteUnpreparedTxnDB* db,
62 const WriteOptions& write_options,
63 const TransactionOptions& txn_options);
64
65 virtual ~WriteUnpreparedTxn();
66
67 using TransactionBaseImpl::Put;
68 virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
69 const Slice& value,
70 const bool assume_tracked = false) override;
71 virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
72 const SliceParts& value,
73 const bool assume_tracked = false) override;
74
75 using TransactionBaseImpl::Merge;
76 virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
77 const Slice& value,
78 const bool assume_tracked = false) override;
79
80 using TransactionBaseImpl::Delete;
81 virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
82 const bool assume_tracked = false) override;
83 virtual Status Delete(ColumnFamilyHandle* column_family,
84 const SliceParts& key,
85 const bool assume_tracked = false) override;
86
87 using TransactionBaseImpl::SingleDelete;
88 virtual Status SingleDelete(ColumnFamilyHandle* column_family,
89 const Slice& key,
90 const bool assume_tracked = false) override;
91 virtual Status SingleDelete(ColumnFamilyHandle* column_family,
92 const SliceParts& key,
93 const bool assume_tracked = false) override;
94
95 virtual Status RebuildFromWriteBatch(WriteBatch*) override {
96 // This function was only useful for recovering prepared transactions, but
97 // is unused for write prepared because a transaction may consist of
98 // multiple write batches.
99 //
100 // If there are use cases outside of recovery that can make use of this,
101 // then support could be added.
102 return Status::NotSupported("Not supported for WriteUnprepared");
103 }
104
105 const std::map<SequenceNumber, size_t>& GetUnpreparedSequenceNumbers();
106
107 void UpdateWriteKeySet(uint32_t cfid, const Slice& key);
108
109 protected:
110 void Initialize(const TransactionOptions& txn_options) override;
111
112 Status PrepareInternal() override;
113
114 Status CommitWithoutPrepareInternal() override;
115 Status CommitInternal() override;
116
117 Status RollbackInternal() override;
118
119 // Get and GetIterator needs to be overridden so that a ReadCallback to
120 // handle read-your-own-write is used.
121 using Transaction::Get;
122 virtual Status Get(const ReadOptions& options,
123 ColumnFamilyHandle* column_family, const Slice& key,
124 PinnableSlice* value) override;
125
126 using Transaction::GetIterator;
127 virtual Iterator* GetIterator(const ReadOptions& options) override;
128 virtual Iterator* GetIterator(const ReadOptions& options,
129 ColumnFamilyHandle* column_family) override;
130
131 private:
132 friend class WriteUnpreparedTransactionTest_ReadYourOwnWrite_Test;
133 friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
134 friend class WriteUnpreparedTransactionTest_UnpreparedBatch_Test;
135 friend class WriteUnpreparedTxnDB;
136
137 Status MaybeFlushWriteBatchToDB();
138 Status FlushWriteBatchToDB(bool prepared);
139
140 // For write unprepared, we check on every writebatch append to see if
141 // max_write_batch_size_ has been exceeded, and then call
142 // FlushWriteBatchToDB if so. This logic is encapsulated in
143 // MaybeFlushWriteBatchToDB.
144 size_t max_write_batch_size_;
145 WriteUnpreparedTxnDB* wupt_db_;
146
147 // Ordered list of unprep_seq sequence numbers that we have already written
148 // to DB.
149 //
150 // This maps unprep_seq => prepare_batch_cnt for each unprepared batch
151 // written by this transaction.
152 //
153 // Note that this contains both prepared and unprepared batches, since they
154 // are treated similarily in prepare heap/commit map, so it simplifies the
155 // commit callbacks.
156 std::map<SequenceNumber, size_t> unprep_seqs_;
157
158 // Set of keys that have written to that have already been written to DB
159 // (ie. not in write_batch_).
160 //
161 std::map<uint32_t, std::vector<std::string>> write_set_keys_;
162 };
163
164 } // namespace rocksdb
165
166 #endif // ROCKSDB_LITE