]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | #include <string> | |
f67539c2 | 8 | #include "db/dbformat.h" |
7c673cae | 9 | #include "db/merge_context.h" |
11fdf7f2 | 10 | #include "db/read_callback.h" |
7c673cae | 11 | #include "rocksdb/env.h" |
11fdf7f2 | 12 | #include "rocksdb/statistics.h" |
7c673cae | 13 | #include "rocksdb/types.h" |
f67539c2 | 14 | #include "table/block_based/block.h" |
7c673cae | 15 | |
f67539c2 | 16 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
17 | class MergeContext; |
18 | class PinnedIteratorsManager; | |
19 | ||
f67539c2 TL |
20 | // Data structure for accumulating statistics during a point lookup. At the |
21 | // end of the point lookup, the corresponding ticker stats are updated. This | |
22 | // avoids the overhead of frequent ticker stats updates | |
11fdf7f2 TL |
23 | struct GetContextStats { |
24 | uint64_t num_cache_hit = 0; | |
25 | uint64_t num_cache_index_hit = 0; | |
26 | uint64_t num_cache_data_hit = 0; | |
27 | uint64_t num_cache_filter_hit = 0; | |
494da23a | 28 | uint64_t num_cache_compression_dict_hit = 0; |
11fdf7f2 TL |
29 | uint64_t num_cache_index_miss = 0; |
30 | uint64_t num_cache_filter_miss = 0; | |
31 | uint64_t num_cache_data_miss = 0; | |
494da23a | 32 | uint64_t num_cache_compression_dict_miss = 0; |
11fdf7f2 TL |
33 | uint64_t num_cache_bytes_read = 0; |
34 | uint64_t num_cache_miss = 0; | |
35 | uint64_t num_cache_add = 0; | |
20effc67 | 36 | uint64_t num_cache_add_redundant = 0; |
11fdf7f2 TL |
37 | uint64_t num_cache_bytes_write = 0; |
38 | uint64_t num_cache_index_add = 0; | |
20effc67 | 39 | uint64_t num_cache_index_add_redundant = 0; |
11fdf7f2 TL |
40 | uint64_t num_cache_index_bytes_insert = 0; |
41 | uint64_t num_cache_data_add = 0; | |
20effc67 | 42 | uint64_t num_cache_data_add_redundant = 0; |
11fdf7f2 TL |
43 | uint64_t num_cache_data_bytes_insert = 0; |
44 | uint64_t num_cache_filter_add = 0; | |
20effc67 | 45 | uint64_t num_cache_filter_add_redundant = 0; |
11fdf7f2 | 46 | uint64_t num_cache_filter_bytes_insert = 0; |
494da23a | 47 | uint64_t num_cache_compression_dict_add = 0; |
20effc67 | 48 | uint64_t num_cache_compression_dict_add_redundant = 0; |
494da23a | 49 | uint64_t num_cache_compression_dict_bytes_insert = 0; |
20effc67 TL |
50 | // MultiGet stats. |
51 | uint64_t num_filter_read = 0; | |
52 | uint64_t num_index_read = 0; | |
53 | uint64_t num_data_read = 0; | |
54 | uint64_t num_sst_read = 0; | |
11fdf7f2 TL |
55 | }; |
56 | ||
f67539c2 TL |
57 | // A class to hold context about a point lookup, such as pointer to value |
58 | // slice, key, merge context etc, as well as the current state of the | |
59 | // lookup. Any user using GetContext to track the lookup result must call | |
60 | // SaveValue() whenever the internal key is found. This can happen | |
61 | // repeatedly in case of merge operands. In case the key may exist with | |
62 | // high probability, but IO is required to confirm and the user doesn't allow | |
63 | // it, MarkKeyMayExist() must be called instead of SaveValue(). | |
7c673cae FG |
64 | class GetContext { |
65 | public: | |
f67539c2 TL |
66 | // Current state of the point lookup. All except kNotFound and kMerge are |
67 | // terminal states | |
7c673cae FG |
68 | enum GetState { |
69 | kNotFound, | |
70 | kFound, | |
71 | kDeleted, | |
72 | kCorrupt, | |
11fdf7f2 | 73 | kMerge, // saver contains the current merge result (the operands) |
20effc67 | 74 | kUnexpectedBlobIndex, |
7c673cae | 75 | }; |
11fdf7f2 | 76 | GetContextStats get_context_stats_; |
7c673cae | 77 | |
f67539c2 TL |
78 | // Constructor |
79 | // @param value Holds the value corresponding to user_key. If its nullptr | |
80 | // then return all merge operands corresponding to user_key | |
81 | // via merge_context | |
82 | // @param value_found If non-nullptr, set to false if key may be present | |
83 | // but we can't be certain because we cannot do IO | |
84 | // @param max_covering_tombstone_seq Pointer to highest sequence number of | |
85 | // range deletion covering the key. When an internal key | |
86 | // is found with smaller sequence number, the lookup | |
87 | // terminates | |
88 | // @param seq If non-nullptr, the sequence number of the found key will be | |
89 | // saved here | |
90 | // @param callback Pointer to ReadCallback to perform additional checks | |
91 | // for visibility of a key | |
92 | // @param is_blob_index If non-nullptr, will be used to indicate if a found | |
93 | // key is of type blob index | |
94 | // @param do_merge True if value associated with user_key has to be returned | |
95 | // and false if all the merge operands associated with user_key has to be | |
96 | // returned. Id do_merge=false then all the merge operands are stored in | |
97 | // merge_context and they are never merged. The value pointer is untouched. | |
7c673cae FG |
98 | GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, |
99 | Logger* logger, Statistics* statistics, GetState init_state, | |
20effc67 TL |
100 | const Slice& user_key, PinnableSlice* value, |
101 | bool* value_found, MergeContext* merge_context, bool do_merge, | |
102 | SequenceNumber* max_covering_tombstone_seq, Env* env, | |
103 | SequenceNumber* seq = nullptr, | |
104 | PinnedIteratorsManager* _pinned_iters_mgr = nullptr, | |
105 | ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, | |
106 | uint64_t tracing_get_id = 0); | |
107 | GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, | |
108 | Logger* logger, Statistics* statistics, GetState init_state, | |
109 | const Slice& user_key, PinnableSlice* value, | |
110 | std::string* timestamp, bool* value_found, | |
f67539c2 | 111 | MergeContext* merge_context, bool do_merge, |
494da23a TL |
112 | SequenceNumber* max_covering_tombstone_seq, Env* env, |
113 | SequenceNumber* seq = nullptr, | |
11fdf7f2 | 114 | PinnedIteratorsManager* _pinned_iters_mgr = nullptr, |
f67539c2 TL |
115 | ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, |
116 | uint64_t tracing_get_id = 0); | |
7c673cae | 117 | |
f67539c2 TL |
118 | GetContext() = delete; |
119 | ||
120 | // This can be called to indicate that a key may be present, but cannot be | |
121 | // confirmed due to IO not allowed | |
7c673cae FG |
122 | void MarkKeyMayExist(); |
123 | ||
124 | // Records this key, value, and any meta-data (such as sequence number and | |
125 | // state) into this GetContext. | |
126 | // | |
11fdf7f2 | 127 | // If the parsed_key matches the user key that we are looking for, sets |
f67539c2 | 128 | // matched to true. |
11fdf7f2 | 129 | // |
7c673cae FG |
130 | // Returns True if more keys need to be read (due to merges) or |
131 | // False if the complete value has been found. | |
132 | bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, | |
11fdf7f2 | 133 | bool* matched, Cleanable* value_pinner = nullptr); |
7c673cae FG |
134 | |
135 | // Simplified version of the previous function. Should only be used when we | |
136 | // know that the operation is a Put. | |
137 | void SaveValue(const Slice& value, SequenceNumber seq); | |
138 | ||
139 | GetState State() const { return state_; } | |
140 | ||
494da23a TL |
141 | SequenceNumber* max_covering_tombstone_seq() { |
142 | return max_covering_tombstone_seq_; | |
143 | } | |
7c673cae FG |
144 | |
145 | PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; } | |
146 | ||
147 | // If a non-null string is passed, all the SaveValue calls will be | |
148 | // logged into the string. The operations can then be replayed on | |
149 | // another GetContext with replayGetContextLog. | |
150 | void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; } | |
151 | ||
152 | // Do we need to fetch the SequenceNumber for this key? | |
153 | bool NeedToReadSequence() const { return (seq_ != nullptr); } | |
154 | ||
11fdf7f2 TL |
155 | bool sample() const { return sample_; } |
156 | ||
157 | bool CheckCallback(SequenceNumber seq) { | |
158 | if (callback_) { | |
159 | return callback_->IsVisible(seq); | |
160 | } | |
161 | return true; | |
162 | } | |
163 | ||
164 | void ReportCounters(); | |
165 | ||
f67539c2 TL |
166 | bool has_callback() const { return callback_ != nullptr; } |
167 | ||
168 | uint64_t get_tracing_get_id() const { return tracing_get_id_; } | |
169 | ||
170 | void push_operand(const Slice& value, Cleanable* value_pinner); | |
171 | ||
7c673cae FG |
172 | private: |
173 | const Comparator* ucmp_; | |
174 | const MergeOperator* merge_operator_; | |
175 | // the merge operations encountered; | |
176 | Logger* logger_; | |
177 | Statistics* statistics_; | |
178 | ||
179 | GetState state_; | |
180 | Slice user_key_; | |
181 | PinnableSlice* pinnable_val_; | |
20effc67 | 182 | std::string* timestamp_; |
7c673cae FG |
183 | bool* value_found_; // Is value set correctly? Used by KeyMayExist |
184 | MergeContext* merge_context_; | |
494da23a | 185 | SequenceNumber* max_covering_tombstone_seq_; |
7c673cae FG |
186 | Env* env_; |
187 | // If a key is found, seq_ will be set to the SequenceNumber of most recent | |
188 | // write to the key or kMaxSequenceNumber if unknown | |
189 | SequenceNumber* seq_; | |
190 | std::string* replay_log_; | |
191 | // Used to temporarily pin blocks when state_ == GetContext::kMerge | |
192 | PinnedIteratorsManager* pinned_iters_mgr_; | |
11fdf7f2 TL |
193 | ReadCallback* callback_; |
194 | bool sample_; | |
f67539c2 TL |
195 | // Value is true if it's called as part of DB Get API and false if it's |
196 | // called as part of DB GetMergeOperands API. When it's false merge operators | |
197 | // are never merged. | |
198 | bool do_merge_; | |
11fdf7f2 | 199 | bool* is_blob_index_; |
f67539c2 TL |
200 | // Used for block cache tracing only. A tracing get id uniquely identifies a |
201 | // Get or a MultiGet. | |
202 | const uint64_t tracing_get_id_; | |
7c673cae FG |
203 | }; |
204 | ||
f67539c2 TL |
205 | // Call this to replay a log and bring the get_context up to date. The replay |
206 | // log must have been created by another GetContext object, whose replay log | |
207 | // must have been set by calling GetContext::SetReplayLog(). | |
7c673cae | 208 | void replayGetContextLog(const Slice& replay_log, const Slice& user_key, |
11fdf7f2 TL |
209 | GetContext* get_context, |
210 | Cleanable* value_pinner = nullptr); | |
7c673cae | 211 | |
f67539c2 | 212 | } // namespace ROCKSDB_NAMESPACE |