]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/transactions/transaction_util.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / utilities / transactions / transaction_util.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #ifndef ROCKSDB_LITE
7
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11
12 #include "utilities/transactions/transaction_util.h"
13
14 #include <inttypes.h>
15 #include <string>
16 #include <vector>
17
18 #include "db/db_impl.h"
19 #include "rocksdb/status.h"
20 #include "rocksdb/utilities/write_batch_with_index.h"
21 #include "util/string_util.h"
22
23 namespace rocksdb {
24
25 Status TransactionUtil::CheckKeyForConflicts(
26 DBImpl* db_impl, ColumnFamilyHandle* column_family, const std::string& key,
27 SequenceNumber snap_seq, bool cache_only, ReadCallback* snap_checker,
28 SequenceNumber min_uncommitted) {
29 Status result;
30
31 auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
32 auto cfd = cfh->cfd();
33 SuperVersion* sv = db_impl->GetAndRefSuperVersion(cfd);
34
35 if (sv == nullptr) {
36 result = Status::InvalidArgument("Could not access column family " +
37 cfh->GetName());
38 }
39
40 if (result.ok()) {
41 SequenceNumber earliest_seq =
42 db_impl->GetEarliestMemTableSequenceNumber(sv, true);
43
44 result = CheckKey(db_impl, sv, earliest_seq, snap_seq, key, cache_only,
45 snap_checker, min_uncommitted);
46
47 db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
48 }
49
50 return result;
51 }
52
53 Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
54 SequenceNumber earliest_seq,
55 SequenceNumber snap_seq,
56 const std::string& key, bool cache_only,
57 ReadCallback* snap_checker,
58 SequenceNumber min_uncommitted) {
59 Status result;
60 bool need_to_read_sst = false;
61
62 // Since it would be too slow to check the SST files, we will only use
63 // the memtables to check whether there have been any recent writes
64 // to this key after it was accessed in this transaction. But if the
65 // Memtables do not contain a long enough history, we must fail the
66 // transaction.
67 if (earliest_seq == kMaxSequenceNumber) {
68 // The age of this memtable is unknown. Cannot rely on it to check
69 // for recent writes. This error shouldn't happen often in practice as
70 // the Memtable should have a valid earliest sequence number except in some
71 // corner cases (such as error cases during recovery).
72 need_to_read_sst = true;
73
74 if (cache_only) {
75 result = Status::TryAgain(
76 "Transaction ould not check for conflicts as the MemTable does not "
77 "countain a long enough history to check write at SequenceNumber: ",
78 ToString(snap_seq));
79 }
80 } else if (snap_seq < earliest_seq || min_uncommitted <= earliest_seq) {
81 // Use <= for min_uncommitted since earliest_seq is actually the largest sec
82 // before this memtable was created
83 need_to_read_sst = true;
84
85 if (cache_only) {
86 // The age of this memtable is too new to use to check for recent
87 // writes.
88 char msg[300];
89 snprintf(msg, sizeof(msg),
90 "Transaction could not check for conflicts for operation at "
91 "SequenceNumber %" PRIu64
92 " as the MemTable only contains changes newer than "
93 "SequenceNumber %" PRIu64
94 ". Increasing the value of the "
95 "max_write_buffer_number_to_maintain option could reduce the "
96 "frequency "
97 "of this error.",
98 snap_seq, earliest_seq);
99 result = Status::TryAgain(msg);
100 }
101 }
102
103 if (result.ok()) {
104 SequenceNumber seq = kMaxSequenceNumber;
105 bool found_record_for_key = false;
106
107 Status s = db_impl->GetLatestSequenceForKey(sv, key, !need_to_read_sst,
108 &seq, &found_record_for_key);
109
110 if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
111 result = s;
112 } else if (found_record_for_key) {
113 bool write_conflict = snap_checker == nullptr
114 ? snap_seq < seq
115 : !snap_checker->IsVisible(seq);
116 if (write_conflict) {
117 result = Status::Busy();
118 }
119 }
120 }
121
122 return result;
123 }
124
125 Status TransactionUtil::CheckKeysForConflicts(DBImpl* db_impl,
126 const TransactionKeyMap& key_map,
127 bool cache_only) {
128 Status result;
129
130 for (auto& key_map_iter : key_map) {
131 uint32_t cf_id = key_map_iter.first;
132 const auto& keys = key_map_iter.second;
133
134 SuperVersion* sv = db_impl->GetAndRefSuperVersion(cf_id);
135 if (sv == nullptr) {
136 result = Status::InvalidArgument("Could not access column family " +
137 ToString(cf_id));
138 break;
139 }
140
141 SequenceNumber earliest_seq =
142 db_impl->GetEarliestMemTableSequenceNumber(sv, true);
143
144 // For each of the keys in this transaction, check to see if someone has
145 // written to this key since the start of the transaction.
146 for (const auto& key_iter : keys) {
147 const auto& key = key_iter.first;
148 const SequenceNumber key_seq = key_iter.second.seq;
149
150 result = CheckKey(db_impl, sv, earliest_seq, key_seq, key, cache_only);
151
152 if (!result.ok()) {
153 break;
154 }
155 }
156
157 db_impl->ReturnAndCleanupSuperVersion(cf_id, sv);
158
159 if (!result.ok()) {
160 break;
161 }
162 }
163
164 return result;
165 }
166
167
168 } // namespace rocksdb
169
170 #endif // ROCKSDB_LITE