]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/compaction_filter.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / include / rocksdb / compaction_filter.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8
9 #ifndef STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
10 #define STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
11
12 #include <cassert>
13 #include <memory>
14 #include <string>
15 #include <vector>
16
17 namespace rocksdb {
18
19 class Slice;
20 class SliceTransform;
21
22 // Context information of a compaction run
23 struct CompactionFilterContext {
24 // Does this compaction run include all data files
25 bool is_full_compaction;
26 // Is this compaction requested by the client (true),
27 // or is it occurring as an automatic compaction process
28 bool is_manual_compaction;
29 };
30
31 // CompactionFilter allows an application to modify/delete a key-value at
32 // the time of compaction.
33
34 class CompactionFilter {
35 public:
36 enum ValueType {
37 kValue,
38 kMergeOperand,
39 };
40
41 enum class Decision {
42 kKeep,
43 kRemove,
44 kChangeValue,
45 kRemoveAndSkipUntil,
46 };
47
48 // Context information of a compaction run
49 struct Context {
50 // Does this compaction run include all data files
51 bool is_full_compaction;
52 // Is this compaction requested by the client (true),
53 // or is it occurring as an automatic compaction process
54 bool is_manual_compaction;
55 // Which column family this compaction is for.
56 uint32_t column_family_id;
57 };
58
59 virtual ~CompactionFilter() {}
60
61 // The compaction process invokes this
62 // method for kv that is being compacted. A return value
63 // of false indicates that the kv should be preserved in the
64 // output of this compaction run and a return value of true
65 // indicates that this key-value should be removed from the
66 // output of the compaction. The application can inspect
67 // the existing value of the key and make decision based on it.
68 //
69 // Key-Values that are results of merge operation during compaction are not
70 // passed into this function. Currently, when you have a mix of Put()s and
71 // Merge()s on a same key, we only guarantee to process the merge operands
72 // through the compaction filters. Put()s might be processed, or might not.
73 //
74 // When the value is to be preserved, the application has the option
75 // to modify the existing_value and pass it back through new_value.
76 // value_changed needs to be set to true in this case.
77 //
78 // If you use snapshot feature of RocksDB (i.e. call GetSnapshot() API on a
79 // DB* object), CompactionFilter might not be very useful for you. Due to
80 // guarantees we need to maintain, compaction process will not call Filter()
81 // on any keys that were written before the latest snapshot. In other words,
82 // compaction will only call Filter() on keys written after your most recent
83 // call to GetSnapshot(). In most cases, Filter() will not be called very
84 // often. This is something we're fixing. See the discussion at:
85 // https://www.facebook.com/groups/mysqlonrocksdb/permalink/999723240091865/
86 //
87 // If multithreaded compaction is being used *and* a single CompactionFilter
88 // instance was supplied via Options::compaction_filter, this method may be
89 // called from different threads concurrently. The application must ensure
90 // that the call is thread-safe.
91 //
92 // If the CompactionFilter was created by a factory, then it will only ever
93 // be used by a single thread that is doing the compaction run, and this
94 // call does not need to be thread-safe. However, multiple filters may be
95 // in existence and operating concurrently.
96 //
97 // The last paragraph is not true if you set max_subcompactions to more than
98 // 1. In that case, subcompaction from multiple threads may call a single
99 // CompactionFilter concurrently.
100 virtual bool Filter(int level, const Slice& key, const Slice& existing_value,
101 std::string* new_value, bool* value_changed) const {
102 return false;
103 }
104
105 // The compaction process invokes this method on every merge operand. If this
106 // method returns true, the merge operand will be ignored and not written out
107 // in the compaction output
108 //
109 // Note: If you are using a TransactionDB, it is not recommended to implement
110 // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB
111 // may not realize there is a write conflict and may allow a Transaction to
112 // Commit that should have failed. Instead, it is better to implement any
113 // Merge filtering inside the MergeOperator.
114 virtual bool FilterMergeOperand(int level, const Slice& key,
115 const Slice& operand) const {
116 return false;
117 }
118
119 // An extended API. Called for both values and merge operands.
120 // Allows changing value and skipping ranges of keys.
121 // The default implementation uses Filter() and FilterMergeOperand().
122 // If you're overriding this method, no need to override the other two.
123 // `value_type` indicates whether this key-value corresponds to a normal
124 // value (e.g. written with Put()) or a merge operand (written with Merge()).
125 //
126 // Possible return values:
127 // * kKeep - keep the key-value pair.
128 // * kRemove - remove the key-value pair or merge operand.
129 // * kChangeValue - keep the key and change the value/operand to *new_value.
130 // * kRemoveAndSkipUntil - remove this key-value pair, and also remove
131 // all key-value pairs with key in [key, *skip_until). This range
132 // of keys will be skipped without reading, potentially saving some
133 // IO operations compared to removing the keys one by one.
134 //
135 // *skip_until <= key is treated the same as Decision::kKeep
136 // (since the range [key, *skip_until) is empty).
137 //
138 // The keys are skipped even if there are snapshots containing them,
139 // as if IgnoreSnapshots() was true; i.e. values removed
140 // by kRemoveAndSkipUntil can disappear from a snapshot - beware
141 // if you're using TransactionDB or DB::GetSnapshot().
142 //
143 // Another warning: if value for a key was overwritten or merged into
144 // (multiple Put()s or Merge()s), and compaction filter skips this key
145 // with kRemoveAndSkipUntil, it's possible that it will remove only
146 // the new value, exposing the old value that was supposed to be
147 // overwritten.
148 //
149 // If you use kRemoveAndSkipUntil, consider also reducing
150 // compaction_readahead_size option.
151 //
152 // Note: If you are using a TransactionDB, it is not recommended to filter
153 // out or modify merge operands (ValueType::kMergeOperand).
154 // If a merge operation is filtered out, TransactionDB may not realize there
155 // is a write conflict and may allow a Transaction to Commit that should have
156 // failed. Instead, it is better to implement any Merge filtering inside the
157 // MergeOperator.
158 virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
159 const Slice& existing_value, std::string* new_value,
160 std::string* skip_until) const {
161 switch (value_type) {
162 case ValueType::kValue: {
163 bool value_changed = false;
164 bool rv = Filter(level, key, existing_value, new_value, &value_changed);
165 if (rv) {
166 return Decision::kRemove;
167 }
168 return value_changed ? Decision::kChangeValue : Decision::kKeep;
169 }
170 case ValueType::kMergeOperand: {
171 bool rv = FilterMergeOperand(level, key, existing_value);
172 return rv ? Decision::kRemove : Decision::kKeep;
173 }
174 }
175 assert(false);
176 return Decision::kKeep;
177 }
178
179 // By default, compaction will only call Filter() on keys written after the
180 // most recent call to GetSnapshot(). However, if the compaction filter
181 // overrides IgnoreSnapshots to make it return true, the compaction filter
182 // will be called even if the keys were written before the last snapshot.
183 // This behavior is to be used only when we want to delete a set of keys
184 // irrespective of snapshots. In particular, care should be taken
185 // to understand that the values of thesekeys will change even if we are
186 // using a snapshot.
187 virtual bool IgnoreSnapshots() const { return false; }
188
189 // Returns a name that identifies this compaction filter.
190 // The name will be printed to LOG file on start up for diagnosis.
191 virtual const char* Name() const = 0;
192 };
193
194 // Each compaction will create a new CompactionFilter allowing the
195 // application to know about different compactions
196 class CompactionFilterFactory {
197 public:
198 virtual ~CompactionFilterFactory() { }
199
200 virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
201 const CompactionFilter::Context& context) = 0;
202
203 // Returns a name that identifies this compaction filter factory.
204 virtual const char* Name() const = 0;
205 };
206
207 } // namespace rocksdb
208
209 #endif // STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_