]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/get_context.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / table / get_context.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5
6 #include "table/get_context.h"
7 #include "db/merge_helper.h"
8 #include "db/pinned_iterators_manager.h"
9 #include "monitoring/perf_context_imp.h"
10 #include "monitoring/statistics.h"
11 #include "rocksdb/env.h"
12 #include "rocksdb/merge_operator.h"
13 #include "rocksdb/statistics.h"
14
15 namespace rocksdb {
16
17 namespace {
18
19 void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) {
20 #ifndef ROCKSDB_LITE
21 if (replay_log) {
22 if (replay_log->empty()) {
23 // Optimization: in the common case of only one operation in the
24 // log, we allocate the exact amount of space needed.
25 replay_log->reserve(1 + VarintLength(value.size()) + value.size());
26 }
27 replay_log->push_back(type);
28 PutLengthPrefixedSlice(replay_log, value);
29 }
30 #endif // ROCKSDB_LITE
31 }
32
33 } // namespace
34
35 GetContext::GetContext(const Comparator* ucmp,
36 const MergeOperator* merge_operator, Logger* logger,
37 Statistics* statistics, GetState init_state,
38 const Slice& user_key, PinnableSlice* pinnable_val,
39 bool* value_found, MergeContext* merge_context,
40 RangeDelAggregator* _range_del_agg, Env* env,
41 SequenceNumber* seq,
42 PinnedIteratorsManager* _pinned_iters_mgr)
43 : ucmp_(ucmp),
44 merge_operator_(merge_operator),
45 logger_(logger),
46 statistics_(statistics),
47 state_(init_state),
48 user_key_(user_key),
49 pinnable_val_(pinnable_val),
50 value_found_(value_found),
51 merge_context_(merge_context),
52 range_del_agg_(_range_del_agg),
53 env_(env),
54 seq_(seq),
55 replay_log_(nullptr),
56 pinned_iters_mgr_(_pinned_iters_mgr) {
57 if (seq_) {
58 *seq_ = kMaxSequenceNumber;
59 }
60 }
61
62 // Called from TableCache::Get and Table::Get when file/block in which
63 // key may exist are not there in TableCache/BlockCache respectively. In this
64 // case we can't guarantee that key does not exist and are not permitted to do
65 // IO to be certain.Set the status=kFound and value_found=false to let the
66 // caller know that key may exist but is not there in memory
67 void GetContext::MarkKeyMayExist() {
68 state_ = kFound;
69 if (value_found_ != nullptr) {
70 *value_found_ = false;
71 }
72 }
73
74 void GetContext::SaveValue(const Slice& value, SequenceNumber seq) {
75 assert(state_ == kNotFound);
76 appendToReplayLog(replay_log_, kTypeValue, value);
77
78 state_ = kFound;
79 if (LIKELY(pinnable_val_ != nullptr)) {
80 pinnable_val_->PinSelf(value);
81 }
82 }
83
84 bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
85 const Slice& value, Cleanable* value_pinner) {
86 assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
87 merge_context_ != nullptr);
88 if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
89 appendToReplayLog(replay_log_, parsed_key.type, value);
90
91 if (seq_ != nullptr) {
92 // Set the sequence number if it is uninitialized
93 if (*seq_ == kMaxSequenceNumber) {
94 *seq_ = parsed_key.sequence;
95 }
96 }
97
98 auto type = parsed_key.type;
99 // Key matches. Process it
100 if ((type == kTypeValue || type == kTypeMerge) &&
101 range_del_agg_ != nullptr && range_del_agg_->ShouldDelete(parsed_key)) {
102 type = kTypeRangeDeletion;
103 }
104 switch (type) {
105 case kTypeValue:
106 assert(state_ == kNotFound || state_ == kMerge);
107 if (kNotFound == state_) {
108 state_ = kFound;
109 if (LIKELY(pinnable_val_ != nullptr)) {
110 if (LIKELY(value_pinner != nullptr)) {
111 // If the backing resources for the value are provided, pin them
112 pinnable_val_->PinSlice(value, value_pinner);
113 } else {
114 // Otherwise copy the value
115 pinnable_val_->PinSelf(value);
116 }
117 }
118 } else if (kMerge == state_) {
119 assert(merge_operator_ != nullptr);
120 state_ = kFound;
121 if (LIKELY(pinnable_val_ != nullptr)) {
122 Status merge_status = MergeHelper::TimedFullMerge(
123 merge_operator_, user_key_, &value,
124 merge_context_->GetOperands(), pinnable_val_->GetSelf(),
125 logger_, statistics_, env_);
126 pinnable_val_->PinSelf();
127 if (!merge_status.ok()) {
128 state_ = kCorrupt;
129 }
130 }
131 }
132 return false;
133
134 case kTypeDeletion:
135 case kTypeSingleDeletion:
136 case kTypeRangeDeletion:
137 // TODO(noetzli): Verify correctness once merge of single-deletes
138 // is supported
139 assert(state_ == kNotFound || state_ == kMerge);
140 if (kNotFound == state_) {
141 state_ = kDeleted;
142 } else if (kMerge == state_) {
143 state_ = kFound;
144 if (LIKELY(pinnable_val_ != nullptr)) {
145 Status merge_status = MergeHelper::TimedFullMerge(
146 merge_operator_, user_key_, nullptr,
147 merge_context_->GetOperands(), pinnable_val_->GetSelf(),
148 logger_, statistics_, env_);
149 pinnable_val_->PinSelf();
150 if (!merge_status.ok()) {
151 state_ = kCorrupt;
152 }
153 }
154 }
155 return false;
156
157 case kTypeMerge:
158 assert(state_ == kNotFound || state_ == kMerge);
159 state_ = kMerge;
160 // value_pinner is not set from plain_table_reader.cc for example.
161 if (pinned_iters_mgr() && pinned_iters_mgr()->PinningEnabled() &&
162 value_pinner != nullptr) {
163 value_pinner->DelegateCleanupsTo(pinned_iters_mgr());
164 merge_context_->PushOperand(value, true /*value_pinned*/);
165 } else {
166 merge_context_->PushOperand(value, false);
167 }
168 return true;
169
170 default:
171 assert(false);
172 break;
173 }
174 }
175
176 // state_ could be Corrupt, merge or notfound
177 return false;
178 }
179
180 void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
181 GetContext* get_context) {
182 #ifndef ROCKSDB_LITE
183 static Cleanable nonToClean;
184 Slice s = replay_log;
185 while (s.size()) {
186 auto type = static_cast<ValueType>(*s.data());
187 s.remove_prefix(1);
188 Slice value;
189 bool ret = GetLengthPrefixedSlice(&s, &value);
190 assert(ret);
191 (void)ret;
192
193 // Since SequenceNumber is not stored and unknown, we will use
194 // kMaxSequenceNumber.
195 get_context->SaveValue(
196 ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
197 &nonToClean);
198 }
199 #else // ROCKSDB_LITE
200 assert(false);
201 #endif // ROCKSDB_LITE
202 }
203
204 } // namespace rocksdb