]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/get_context.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / table / get_context.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#pragma once
7#include <string>
f67539c2 8#include "db/dbformat.h"
7c673cae 9#include "db/merge_context.h"
11fdf7f2 10#include "db/read_callback.h"
7c673cae 11#include "rocksdb/env.h"
11fdf7f2 12#include "rocksdb/statistics.h"
7c673cae 13#include "rocksdb/types.h"
f67539c2 14#include "table/block_based/block.h"
7c673cae 15
f67539c2 16namespace ROCKSDB_NAMESPACE {
7c673cae
FG
17class MergeContext;
18class PinnedIteratorsManager;
19
f67539c2
TL
20// Data structure for accumulating statistics during a point lookup. At the
21// end of the point lookup, the corresponding ticker stats are updated. This
22// avoids the overhead of frequent ticker stats updates
11fdf7f2
TL
23struct GetContextStats {
24 uint64_t num_cache_hit = 0;
25 uint64_t num_cache_index_hit = 0;
26 uint64_t num_cache_data_hit = 0;
27 uint64_t num_cache_filter_hit = 0;
494da23a 28 uint64_t num_cache_compression_dict_hit = 0;
11fdf7f2
TL
29 uint64_t num_cache_index_miss = 0;
30 uint64_t num_cache_filter_miss = 0;
31 uint64_t num_cache_data_miss = 0;
494da23a 32 uint64_t num_cache_compression_dict_miss = 0;
11fdf7f2
TL
33 uint64_t num_cache_bytes_read = 0;
34 uint64_t num_cache_miss = 0;
35 uint64_t num_cache_add = 0;
20effc67 36 uint64_t num_cache_add_redundant = 0;
11fdf7f2
TL
37 uint64_t num_cache_bytes_write = 0;
38 uint64_t num_cache_index_add = 0;
20effc67 39 uint64_t num_cache_index_add_redundant = 0;
11fdf7f2
TL
40 uint64_t num_cache_index_bytes_insert = 0;
41 uint64_t num_cache_data_add = 0;
20effc67 42 uint64_t num_cache_data_add_redundant = 0;
11fdf7f2
TL
43 uint64_t num_cache_data_bytes_insert = 0;
44 uint64_t num_cache_filter_add = 0;
20effc67 45 uint64_t num_cache_filter_add_redundant = 0;
11fdf7f2 46 uint64_t num_cache_filter_bytes_insert = 0;
494da23a 47 uint64_t num_cache_compression_dict_add = 0;
20effc67 48 uint64_t num_cache_compression_dict_add_redundant = 0;
494da23a 49 uint64_t num_cache_compression_dict_bytes_insert = 0;
20effc67
TL
50 // MultiGet stats.
51 uint64_t num_filter_read = 0;
52 uint64_t num_index_read = 0;
53 uint64_t num_data_read = 0;
54 uint64_t num_sst_read = 0;
11fdf7f2
TL
55};
56
f67539c2
TL
57// A class to hold context about a point lookup, such as pointer to value
58// slice, key, merge context etc, as well as the current state of the
59// lookup. Any user using GetContext to track the lookup result must call
60// SaveValue() whenever the internal key is found. This can happen
61// repeatedly in case of merge operands. In case the key may exist with
62// high probability, but IO is required to confirm and the user doesn't allow
63// it, MarkKeyMayExist() must be called instead of SaveValue().
7c673cae
FG
64class GetContext {
65 public:
f67539c2
TL
66 // Current state of the point lookup. All except kNotFound and kMerge are
67 // terminal states
7c673cae
FG
68 enum GetState {
69 kNotFound,
70 kFound,
71 kDeleted,
72 kCorrupt,
11fdf7f2 73 kMerge, // saver contains the current merge result (the operands)
20effc67 74 kUnexpectedBlobIndex,
7c673cae 75 };
11fdf7f2 76 GetContextStats get_context_stats_;
7c673cae 77
f67539c2
TL
78 // Constructor
79 // @param value Holds the value corresponding to user_key. If its nullptr
80 // then return all merge operands corresponding to user_key
81 // via merge_context
82 // @param value_found If non-nullptr, set to false if key may be present
83 // but we can't be certain because we cannot do IO
84 // @param max_covering_tombstone_seq Pointer to highest sequence number of
85 // range deletion covering the key. When an internal key
86 // is found with smaller sequence number, the lookup
87 // terminates
88 // @param seq If non-nullptr, the sequence number of the found key will be
89 // saved here
90 // @param callback Pointer to ReadCallback to perform additional checks
91 // for visibility of a key
92 // @param is_blob_index If non-nullptr, will be used to indicate if a found
93 // key is of type blob index
94 // @param do_merge True if value associated with user_key has to be returned
95 // and false if all the merge operands associated with user_key has to be
96 // returned. Id do_merge=false then all the merge operands are stored in
97 // merge_context and they are never merged. The value pointer is untouched.
7c673cae
FG
98 GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
99 Logger* logger, Statistics* statistics, GetState init_state,
20effc67
TL
100 const Slice& user_key, PinnableSlice* value,
101 bool* value_found, MergeContext* merge_context, bool do_merge,
102 SequenceNumber* max_covering_tombstone_seq, Env* env,
103 SequenceNumber* seq = nullptr,
104 PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
105 ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
106 uint64_t tracing_get_id = 0);
107 GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
108 Logger* logger, Statistics* statistics, GetState init_state,
109 const Slice& user_key, PinnableSlice* value,
110 std::string* timestamp, bool* value_found,
f67539c2 111 MergeContext* merge_context, bool do_merge,
494da23a
TL
112 SequenceNumber* max_covering_tombstone_seq, Env* env,
113 SequenceNumber* seq = nullptr,
11fdf7f2 114 PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
f67539c2
TL
115 ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
116 uint64_t tracing_get_id = 0);
7c673cae 117
f67539c2
TL
118 GetContext() = delete;
119
120 // This can be called to indicate that a key may be present, but cannot be
121 // confirmed due to IO not allowed
7c673cae
FG
122 void MarkKeyMayExist();
123
124 // Records this key, value, and any meta-data (such as sequence number and
125 // state) into this GetContext.
126 //
11fdf7f2 127 // If the parsed_key matches the user key that we are looking for, sets
f67539c2 128 // matched to true.
11fdf7f2 129 //
7c673cae
FG
130 // Returns True if more keys need to be read (due to merges) or
131 // False if the complete value has been found.
132 bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
11fdf7f2 133 bool* matched, Cleanable* value_pinner = nullptr);
7c673cae
FG
134
135 // Simplified version of the previous function. Should only be used when we
136 // know that the operation is a Put.
137 void SaveValue(const Slice& value, SequenceNumber seq);
138
139 GetState State() const { return state_; }
140
494da23a
TL
141 SequenceNumber* max_covering_tombstone_seq() {
142 return max_covering_tombstone_seq_;
143 }
7c673cae
FG
144
145 PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
146
147 // If a non-null string is passed, all the SaveValue calls will be
148 // logged into the string. The operations can then be replayed on
149 // another GetContext with replayGetContextLog.
150 void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
151
152 // Do we need to fetch the SequenceNumber for this key?
153 bool NeedToReadSequence() const { return (seq_ != nullptr); }
154
11fdf7f2
TL
155 bool sample() const { return sample_; }
156
157 bool CheckCallback(SequenceNumber seq) {
158 if (callback_) {
159 return callback_->IsVisible(seq);
160 }
161 return true;
162 }
163
164 void ReportCounters();
165
f67539c2
TL
166 bool has_callback() const { return callback_ != nullptr; }
167
168 uint64_t get_tracing_get_id() const { return tracing_get_id_; }
169
170 void push_operand(const Slice& value, Cleanable* value_pinner);
171
7c673cae
FG
172 private:
173 const Comparator* ucmp_;
174 const MergeOperator* merge_operator_;
175 // the merge operations encountered;
176 Logger* logger_;
177 Statistics* statistics_;
178
179 GetState state_;
180 Slice user_key_;
181 PinnableSlice* pinnable_val_;
20effc67 182 std::string* timestamp_;
7c673cae
FG
183 bool* value_found_; // Is value set correctly? Used by KeyMayExist
184 MergeContext* merge_context_;
494da23a 185 SequenceNumber* max_covering_tombstone_seq_;
7c673cae
FG
186 Env* env_;
187 // If a key is found, seq_ will be set to the SequenceNumber of most recent
188 // write to the key or kMaxSequenceNumber if unknown
189 SequenceNumber* seq_;
190 std::string* replay_log_;
191 // Used to temporarily pin blocks when state_ == GetContext::kMerge
192 PinnedIteratorsManager* pinned_iters_mgr_;
11fdf7f2
TL
193 ReadCallback* callback_;
194 bool sample_;
f67539c2
TL
195 // Value is true if it's called as part of DB Get API and false if it's
196 // called as part of DB GetMergeOperands API. When it's false merge operators
197 // are never merged.
198 bool do_merge_;
11fdf7f2 199 bool* is_blob_index_;
f67539c2
TL
200 // Used for block cache tracing only. A tracing get id uniquely identifies a
201 // Get or a MultiGet.
202 const uint64_t tracing_get_id_;
7c673cae
FG
203};
204
f67539c2
TL
205// Call this to replay a log and bring the get_context up to date. The replay
206// log must have been created by another GetContext object, whose replay log
207// must have been set by calling GetContext::SetReplayLog().
7c673cae 208void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
11fdf7f2
TL
209 GetContext* get_context,
210 Cleanable* value_pinner = nullptr);
7c673cae 211
f67539c2 212} // namespace ROCKSDB_NAMESPACE