]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/merge_operator.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / include / rocksdb / merge_operator.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #include <deque>
9 #include <memory>
10 #include <string>
11 #include <vector>
12
13 #include "rocksdb/slice.h"
14
15 namespace ROCKSDB_NAMESPACE {
16
17 class Slice;
18 class Logger;
19
20 // The Merge Operator
21 //
22 // Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only
23 // client knows. It could be numeric addition, list append, string
24 // concatenation, edit data structure, ... , anything.
25 // The library, on the other hand, is concerned with the exercise of this
26 // interface, at the right time (during get, iteration, compaction...)
27 //
28 // To use merge, the client needs to provide an object implementing one of
29 // the following interfaces:
30 // a) AssociativeMergeOperator - for most simple semantics (always take
31 // two values, and merge them into one value, which is then put back
32 // into rocksdb); numeric addition and string concatenation are examples;
33 //
34 // b) MergeOperator - the generic class for all the more abstract / complex
35 // operations; one method (FullMergeV2) to merge a Put/Delete value with a
36 // merge operand; and another method (PartialMerge) that merges multiple
37 // operands together. this is especially useful if your key values have
38 // complex structures but you would still like to support client-specific
39 // incremental updates.
40 //
41 // AssociativeMergeOperator is simpler to implement. MergeOperator is simply
42 // more powerful.
43 //
44 // Refer to rocksdb-merge wiki for more details and example implementations.
45 //
46 class MergeOperator {
47 public:
48 virtual ~MergeOperator() {}
49 static const char* Type() { return "MergeOperator"; }
50
51 // Gives the client a way to express the read -> modify -> write semantics
52 // key: (IN) The key that's associated with this merge operation.
53 // Client could multiplex the merge operator based on it
54 // if the key space is partitioned and different subspaces
55 // refer to different types of data which have different
56 // merge operation semantics
57 // existing: (IN) null indicates that the key does not exist before this op
58 // operand_list:(IN) the sequence of merge operations to apply, front() first.
59 // new_value:(OUT) Client is responsible for filling the merge result here.
60 // The string that new_value is pointing to will be empty.
61 // logger: (IN) Client could use this to log errors during merge.
62 //
63 // Return true on success.
64 // All values passed in will be client-specific values. So if this method
65 // returns false, it is because client specified bad data or there was
66 // internal corruption. This will be treated as an error by the library.
67 //
68 // Also make use of the *logger for error messages.
69 virtual bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/,
70 const std::deque<std::string>& /*operand_list*/,
71 std::string* /*new_value*/, Logger* /*logger*/) const {
72 // deprecated, please use FullMergeV2()
73 assert(false);
74 return false;
75 }
76
77 struct MergeOperationInput {
78 explicit MergeOperationInput(const Slice& _key,
79 const Slice* _existing_value,
80 const std::vector<Slice>& _operand_list,
81 Logger* _logger)
82 : key(_key),
83 existing_value(_existing_value),
84 operand_list(_operand_list),
85 logger(_logger) {}
86
87 // The key associated with the merge operation.
88 const Slice& key;
89 // The existing value of the current key, nullptr means that the
90 // value doesn't exist.
91 const Slice* existing_value;
92 // A list of operands to apply.
93 const std::vector<Slice>& operand_list;
94 // Logger could be used by client to log any errors that happen during
95 // the merge operation.
96 Logger* logger;
97 };
98
99 struct MergeOperationOutput {
100 explicit MergeOperationOutput(std::string& _new_value,
101 Slice& _existing_operand)
102 : new_value(_new_value), existing_operand(_existing_operand) {}
103
104 // Client is responsible for filling the merge result here.
105 std::string& new_value;
106 // If the merge result is one of the existing operands (or existing_value),
107 // client can set this field to the operand (or existing_value) instead of
108 // using new_value.
109 Slice& existing_operand;
110 };
111
112 // This function applies a stack of merge operands in chrionological order
113 // on top of an existing value. There are two ways in which this method is
114 // being used:
115 // a) During Get() operation, it used to calculate the final value of a key
116 // b) During compaction, in order to collapse some operands with the based
117 // value.
118 //
119 // Note: The name of the method is somewhat misleading, as both in the cases
120 // of Get() or compaction it may be called on a subset of operands:
121 // K: 0 +1 +2 +7 +4 +5 2 +1 +2
122 // ^
123 // |
124 // snapshot
125 // In the example above, Get(K) operation will call FullMerge with a base
126 // value of 2 and operands [+1, +2]. Compaction process might decide to
127 // collapse the beginning of the history up to the snapshot by performing
128 // full Merge with base value of 0 and operands [+1, +2, +7, +4].
129 virtual bool FullMergeV2(const MergeOperationInput& merge_in,
130 MergeOperationOutput* merge_out) const;
131
132 // This function performs merge(left_op, right_op)
133 // when both the operands are themselves merge operation types
134 // that you would have passed to a DB::Merge() call in the same order
135 // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)).
136 //
137 // PartialMerge should combine them into a single merge operation that is
138 // saved into *new_value, and then it should return true.
139 // *new_value should be constructed such that a call to
140 // DB::Merge(key, *new_value) would yield the same result as a call
141 // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op).
142 //
143 // The string that new_value is pointing to will be empty.
144 //
145 // The default implementation of PartialMergeMulti will use this function
146 // as a helper, for backward compatibility. Any successor class of
147 // MergeOperator should either implement PartialMerge or PartialMergeMulti,
148 // although implementing PartialMergeMulti is suggested as it is in general
149 // more effective to merge multiple operands at a time instead of two
150 // operands at a time.
151 //
152 // If it is impossible or infeasible to combine the two operations,
153 // leave new_value unchanged and return false. The library will
154 // internally keep track of the operations, and apply them in the
155 // correct order once a base-value (a Put/Delete/End-of-Database) is seen.
156 //
157 // TODO: Presently there is no way to differentiate between error/corruption
158 // and simply "return false". For now, the client should simply return
159 // false in any case it cannot perform partial-merge, regardless of reason.
160 // If there is corruption in the data, handle it in the FullMergeV2() function
161 // and return false there. The default implementation of PartialMerge will
162 // always return false.
163 virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/,
164 const Slice& /*right_operand*/,
165 std::string* /*new_value*/,
166 Logger* /*logger*/) const {
167 return false;
168 }
169
170 // This function performs merge when all the operands are themselves merge
171 // operation types that you would have passed to a DB::Merge() call in the
172 // same order (front() first)
173 // (i.e. DB::Merge(key, operand_list[0]), followed by
174 // DB::Merge(key, operand_list[1]), ...)
175 //
176 // PartialMergeMulti should combine them into a single merge operation that is
177 // saved into *new_value, and then it should return true. *new_value should
178 // be constructed such that a call to DB::Merge(key, *new_value) would yield
179 // the same result as subquential individual calls to DB::Merge(key, operand)
180 // for each operand in operand_list from front() to back().
181 //
182 // The string that new_value is pointing to will be empty.
183 //
184 // The PartialMergeMulti function will be called when there are at least two
185 // operands.
186 //
187 // In the default implementation, PartialMergeMulti will invoke PartialMerge
188 // multiple times, where each time it only merges two operands. Developers
189 // should either implement PartialMergeMulti, or implement PartialMerge which
190 // is served as the helper function of the default PartialMergeMulti.
191 virtual bool PartialMergeMulti(const Slice& key,
192 const std::deque<Slice>& operand_list,
193 std::string* new_value, Logger* logger) const;
194
195 // The name of the MergeOperator. Used to check for MergeOperator
196 // mismatches (i.e., a DB created with one MergeOperator is
197 // accessed using a different MergeOperator)
198 // TODO: the name is currently not stored persistently and thus
199 // no checking is enforced. Client is responsible for providing
200 // consistent MergeOperator between DB opens.
201 virtual const char* Name() const = 0;
202
203 // Determines whether the PartialMerge can be called with just a single
204 // merge operand.
205 // Override and return true for allowing a single operand. PartialMerge
206 // and PartialMergeMulti should be overridden and implemented
207 // correctly to properly handle a single operand.
208 virtual bool AllowSingleOperand() const { return false; }
209
210 // Allows to control when to invoke a full merge during Get.
211 // This could be used to limit the number of merge operands that are looked at
212 // during a point lookup, thereby helping in limiting the number of levels to
213 // read from.
214 // Doesn't help with iterators.
215 //
216 // Note: the merge operands are passed to this function in the reversed order
217 // relative to how they were merged (passed to FullMerge or FullMergeV2)
218 // for performance reasons, see also:
219 // https://github.com/facebook/rocksdb/issues/3865
220 virtual bool ShouldMerge(const std::vector<Slice>& /*operands*/) const {
221 return false;
222 }
223 };
224
225 // The simpler, associative merge operator.
226 class AssociativeMergeOperator : public MergeOperator {
227 public:
228 ~AssociativeMergeOperator() override {}
229
230 // Gives the client a way to express the read -> modify -> write semantics
231 // key: (IN) The key that's associated with this merge operation.
232 // existing_value:(IN) null indicates the key does not exist before this op
233 // value: (IN) the value to update/merge the existing_value with
234 // new_value: (OUT) Client is responsible for filling the merge result
235 // here. The string that new_value is pointing to will be empty.
236 // logger: (IN) Client could use this to log errors during merge.
237 //
238 // Return true on success.
239 // All values passed in will be client-specific values. So if this method
240 // returns false, it is because client specified bad data or there was
241 // internal corruption. The client should assume that this will be treated
242 // as an error by the library.
243 virtual bool Merge(const Slice& key, const Slice* existing_value,
244 const Slice& value, std::string* new_value,
245 Logger* logger) const = 0;
246
247 private:
248 // Default implementations of the MergeOperator functions
249 bool FullMergeV2(const MergeOperationInput& merge_in,
250 MergeOperationOutput* merge_out) const override;
251
252 bool PartialMerge(const Slice& key, const Slice& left_operand,
253 const Slice& right_operand, std::string* new_value,
254 Logger* logger) const override;
255 };
256
257 } // namespace ROCKSDB_NAMESPACE