]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/get_context.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / table / get_context.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7 #include <string>
8
9 #include "db/read_callback.h"
10 #include "rocksdb/types.h"
11
12 namespace ROCKSDB_NAMESPACE {
13 class BlobFetcher;
14 class Comparator;
15 class Logger;
16 class MergeContext;
17 class MergeOperator;
18 class PinnableWideColumns;
19 class PinnedIteratorsManager;
20 class Statistics;
21 class SystemClock;
22 struct ParsedInternalKey;
23
24 // Data structure for accumulating statistics during a point lookup. At the
25 // end of the point lookup, the corresponding ticker stats are updated. This
26 // avoids the overhead of frequent ticker stats updates
27 struct GetContextStats {
28 uint64_t num_cache_hit = 0;
29 uint64_t num_cache_index_hit = 0;
30 uint64_t num_cache_data_hit = 0;
31 uint64_t num_cache_filter_hit = 0;
32 uint64_t num_cache_compression_dict_hit = 0;
33 uint64_t num_cache_index_miss = 0;
34 uint64_t num_cache_filter_miss = 0;
35 uint64_t num_cache_data_miss = 0;
36 uint64_t num_cache_compression_dict_miss = 0;
37 uint64_t num_cache_bytes_read = 0;
38 uint64_t num_cache_miss = 0;
39 uint64_t num_cache_add = 0;
40 uint64_t num_cache_add_redundant = 0;
41 uint64_t num_cache_bytes_write = 0;
42 uint64_t num_cache_index_add = 0;
43 uint64_t num_cache_index_add_redundant = 0;
44 uint64_t num_cache_index_bytes_insert = 0;
45 uint64_t num_cache_data_add = 0;
46 uint64_t num_cache_data_add_redundant = 0;
47 uint64_t num_cache_data_bytes_insert = 0;
48 uint64_t num_cache_filter_add = 0;
49 uint64_t num_cache_filter_add_redundant = 0;
50 uint64_t num_cache_filter_bytes_insert = 0;
51 uint64_t num_cache_compression_dict_add = 0;
52 uint64_t num_cache_compression_dict_add_redundant = 0;
53 uint64_t num_cache_compression_dict_bytes_insert = 0;
54 // MultiGet stats.
55 uint64_t num_filter_read = 0;
56 uint64_t num_index_read = 0;
57 uint64_t num_sst_read = 0;
58 };
59
60 // A class to hold context about a point lookup, such as pointer to value
61 // slice, key, merge context etc, as well as the current state of the
62 // lookup. Any user using GetContext to track the lookup result must call
63 // SaveValue() whenever the internal key is found. This can happen
64 // repeatedly in case of merge operands. In case the key may exist with
65 // high probability, but IO is required to confirm and the user doesn't allow
66 // it, MarkKeyMayExist() must be called instead of SaveValue().
67 class GetContext {
68 public:
69 // Current state of the point lookup. All except kNotFound and kMerge are
70 // terminal states
71 enum GetState {
72 kNotFound,
73 kFound,
74 kDeleted,
75 kCorrupt,
76 kMerge, // saver contains the current merge result (the operands)
77 kUnexpectedBlobIndex,
78 };
79 GetContextStats get_context_stats_;
80
81 // Constructor
82 // @param value Holds the value corresponding to user_key. If its nullptr
83 // then return all merge operands corresponding to user_key
84 // via merge_context
85 // @param value_found If non-nullptr, set to false if key may be present
86 // but we can't be certain because we cannot do IO
87 // @param max_covering_tombstone_seq Pointer to highest sequence number of
88 // range deletion covering the key. When an internal key
89 // is found with smaller sequence number, the lookup
90 // terminates
91 // @param seq If non-nullptr, the sequence number of the found key will be
92 // saved here
93 // @param callback Pointer to ReadCallback to perform additional checks
94 // for visibility of a key
95 // @param is_blob_index If non-nullptr, will be used to indicate if a found
96 // key is of type blob index
97 // @param do_merge True if value associated with user_key has to be returned
98 // and false if all the merge operands associated with user_key has to be
99 // returned. Id do_merge=false then all the merge operands are stored in
100 // merge_context and they are never merged. The value pointer is untouched.
101 GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
102 Logger* logger, Statistics* statistics, GetState init_state,
103 const Slice& user_key, PinnableSlice* value,
104 PinnableWideColumns* columns, bool* value_found,
105 MergeContext* merge_context, bool do_merge,
106 SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
107 SequenceNumber* seq = nullptr,
108 PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
109 ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
110 uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
111 GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
112 Logger* logger, Statistics* statistics, GetState init_state,
113 const Slice& user_key, PinnableSlice* value,
114 PinnableWideColumns* columns, std::string* timestamp,
115 bool* value_found, MergeContext* merge_context, bool do_merge,
116 SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
117 SequenceNumber* seq = nullptr,
118 PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
119 ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
120 uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
121
122 GetContext() = delete;
123
124 // This can be called to indicate that a key may be present, but cannot be
125 // confirmed due to IO not allowed
126 void MarkKeyMayExist();
127
128 // Records this key, value, and any meta-data (such as sequence number and
129 // state) into this GetContext.
130 //
131 // If the parsed_key matches the user key that we are looking for, sets
132 // matched to true.
133 //
134 // Returns True if more keys need to be read (due to merges) or
135 // False if the complete value has been found.
136 bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
137 bool* matched, Cleanable* value_pinner = nullptr);
138
139 // Simplified version of the previous function. Should only be used when we
140 // know that the operation is a Put.
141 void SaveValue(const Slice& value, SequenceNumber seq);
142
143 GetState State() const { return state_; }
144
145 SequenceNumber* max_covering_tombstone_seq() {
146 return max_covering_tombstone_seq_;
147 }
148
149 bool NeedTimestamp() { return timestamp_ != nullptr; }
150
151 void SetTimestampFromRangeTombstone(const Slice& timestamp) {
152 assert(timestamp_);
153 timestamp_->assign(timestamp.data(), timestamp.size());
154 ts_from_rangetombstone_ = true;
155 }
156
157 PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
158
159 // If a non-null string is passed, all the SaveValue calls will be
160 // logged into the string. The operations can then be replayed on
161 // another GetContext with replayGetContextLog.
162 void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
163
164 // Do we need to fetch the SequenceNumber for this key?
165 bool NeedToReadSequence() const { return (seq_ != nullptr); }
166
167 bool sample() const { return sample_; }
168
169 bool CheckCallback(SequenceNumber seq) {
170 if (callback_) {
171 return callback_->IsVisible(seq);
172 }
173 return true;
174 }
175
176 void ReportCounters();
177
178 bool has_callback() const { return callback_ != nullptr; }
179
180 uint64_t get_tracing_get_id() const { return tracing_get_id_; }
181
182 void push_operand(const Slice& value, Cleanable* value_pinner);
183
184 private:
185 void Merge(const Slice* value);
186 void MergeWithEntity(Slice entity);
187 bool GetBlobValue(const Slice& blob_index, PinnableSlice* blob_value);
188
189 const Comparator* ucmp_;
190 const MergeOperator* merge_operator_;
191 // the merge operations encountered;
192 Logger* logger_;
193 Statistics* statistics_;
194
195 GetState state_;
196 Slice user_key_;
197 PinnableSlice* pinnable_val_;
198 PinnableWideColumns* columns_;
199 std::string* timestamp_;
200 bool ts_from_rangetombstone_{false};
201 bool* value_found_; // Is value set correctly? Used by KeyMayExist
202 MergeContext* merge_context_;
203 SequenceNumber* max_covering_tombstone_seq_;
204 SystemClock* clock_;
205 // If a key is found, seq_ will be set to the SequenceNumber of most recent
206 // write to the key or kMaxSequenceNumber if unknown
207 SequenceNumber* seq_;
208 std::string* replay_log_;
209 // Used to temporarily pin blocks when state_ == GetContext::kMerge
210 PinnedIteratorsManager* pinned_iters_mgr_;
211 ReadCallback* callback_;
212 bool sample_;
213 // Value is true if it's called as part of DB Get API and false if it's
214 // called as part of DB GetMergeOperands API. When it's false merge operators
215 // are never merged.
216 bool do_merge_;
217 bool* is_blob_index_;
218 // Used for block cache tracing only. A tracing get id uniquely identifies a
219 // Get or a MultiGet.
220 const uint64_t tracing_get_id_;
221 BlobFetcher* blob_fetcher_;
222 };
223
224 // Call this to replay a log and bring the get_context up to date. The replay
225 // log must have been created by another GetContext object, whose replay log
226 // must have been set by calling GetContext::SetReplayLog().
227 void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
228 GetContext* get_context,
229 Cleanable* value_pinner = nullptr);
230
231 } // namespace ROCKSDB_NAMESPACE