]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/compaction_filter.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / include / rocksdb / compaction_filter.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8
9 #pragma once
10
11 #include <cassert>
12 #include <memory>
13 #include <string>
14 #include <vector>
15
16 namespace rocksdb {
17
18 class Slice;
19 class SliceTransform;
20
21 // Context information of a compaction run
22 struct CompactionFilterContext {
23 // Does this compaction run include all data files
24 bool is_full_compaction;
25 // Is this compaction requested by the client (true),
26 // or is it occurring as an automatic compaction process
27 bool is_manual_compaction;
28 };
29
30 // CompactionFilter allows an application to modify/delete a key-value at
31 // the time of compaction.
32
33 class CompactionFilter {
34 public:
35 enum ValueType {
36 kValue,
37 kMergeOperand,
38 kBlobIndex, // used internally by BlobDB.
39 };
40
41 enum class Decision {
42 kKeep,
43 kRemove,
44 kChangeValue,
45 kRemoveAndSkipUntil,
46 };
47
48 // Context information of a compaction run
49 struct Context {
50 // Does this compaction run include all data files
51 bool is_full_compaction;
52 // Is this compaction requested by the client (true),
53 // or is it occurring as an automatic compaction process
54 bool is_manual_compaction;
55 // Which column family this compaction is for.
56 uint32_t column_family_id;
57 };
58
59 virtual ~CompactionFilter() {}
60
61 // The compaction process invokes this
62 // method for kv that is being compacted. A return value
63 // of false indicates that the kv should be preserved in the
64 // output of this compaction run and a return value of true
65 // indicates that this key-value should be removed from the
66 // output of the compaction. The application can inspect
67 // the existing value of the key and make decision based on it.
68 //
69 // Key-Values that are results of merge operation during compaction are not
70 // passed into this function. Currently, when you have a mix of Put()s and
71 // Merge()s on a same key, we only guarantee to process the merge operands
72 // through the compaction filters. Put()s might be processed, or might not.
73 //
74 // When the value is to be preserved, the application has the option
75 // to modify the existing_value and pass it back through new_value.
76 // value_changed needs to be set to true in this case.
77 //
78 // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a
79 // DB* object) will not guarantee to preserve the state of the DB with
80 // CompactionFilter. Data seen from a snapshot might disppear after a
81 // compaction finishes. If you use snapshots, think twice about whether you
82 // want to use compaction filter and whether you are using it in a safe way.
83 //
84 // If multithreaded compaction is being used *and* a single CompactionFilter
85 // instance was supplied via Options::compaction_filter, this method may be
86 // called from different threads concurrently. The application must ensure
87 // that the call is thread-safe.
88 //
89 // If the CompactionFilter was created by a factory, then it will only ever
90 // be used by a single thread that is doing the compaction run, and this
91 // call does not need to be thread-safe. However, multiple filters may be
92 // in existence and operating concurrently.
93 virtual bool Filter(int /*level*/, const Slice& /*key*/,
94 const Slice& /*existing_value*/,
95 std::string* /*new_value*/,
96 bool* /*value_changed*/) const {
97 return false;
98 }
99
100 // The compaction process invokes this method on every merge operand. If this
101 // method returns true, the merge operand will be ignored and not written out
102 // in the compaction output
103 //
104 // Note: If you are using a TransactionDB, it is not recommended to implement
105 // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB
106 // may not realize there is a write conflict and may allow a Transaction to
107 // Commit that should have failed. Instead, it is better to implement any
108 // Merge filtering inside the MergeOperator.
109 virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/,
110 const Slice& /*operand*/) const {
111 return false;
112 }
113
114 // An extended API. Called for both values and merge operands.
115 // Allows changing value and skipping ranges of keys.
116 // The default implementation uses Filter() and FilterMergeOperand().
117 // If you're overriding this method, no need to override the other two.
118 // `value_type` indicates whether this key-value corresponds to a normal
119 // value (e.g. written with Put()) or a merge operand (written with Merge()).
120 //
121 // Possible return values:
122 // * kKeep - keep the key-value pair.
123 // * kRemove - remove the key-value pair or merge operand.
124 // * kChangeValue - keep the key and change the value/operand to *new_value.
125 // * kRemoveAndSkipUntil - remove this key-value pair, and also remove
126 // all key-value pairs with key in [key, *skip_until). This range
127 // of keys will be skipped without reading, potentially saving some
128 // IO operations compared to removing the keys one by one.
129 //
130 // *skip_until <= key is treated the same as Decision::kKeep
131 // (since the range [key, *skip_until) is empty).
132 //
133 // Caveats:
134 // - The keys are skipped even if there are snapshots containing them,
135 // i.e. values removed by kRemoveAndSkipUntil can disappear from a
136 // snapshot - beware if you're using TransactionDB or
137 // DB::GetSnapshot().
138 // - If value for a key was overwritten or merged into (multiple Put()s
139 // or Merge()s), and compaction filter skips this key with
140 // kRemoveAndSkipUntil, it's possible that it will remove only
141 // the new value, exposing the old value that was supposed to be
142 // overwritten.
143 // - Doesn't work with PlainTableFactory in prefix mode.
144 // - If you use kRemoveAndSkipUntil, consider also reducing
145 // compaction_readahead_size option.
146 //
147 // Note: If you are using a TransactionDB, it is not recommended to filter
148 // out or modify merge operands (ValueType::kMergeOperand).
149 // If a merge operation is filtered out, TransactionDB may not realize there
150 // is a write conflict and may allow a Transaction to Commit that should have
151 // failed. Instead, it is better to implement any Merge filtering inside the
152 // MergeOperator.
153 virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
154 const Slice& existing_value, std::string* new_value,
155 std::string* /*skip_until*/) const {
156 switch (value_type) {
157 case ValueType::kValue: {
158 bool value_changed = false;
159 bool rv = Filter(level, key, existing_value, new_value, &value_changed);
160 if (rv) {
161 return Decision::kRemove;
162 }
163 return value_changed ? Decision::kChangeValue : Decision::kKeep;
164 }
165 case ValueType::kMergeOperand: {
166 bool rv = FilterMergeOperand(level, key, existing_value);
167 return rv ? Decision::kRemove : Decision::kKeep;
168 }
169 case ValueType::kBlobIndex:
170 return Decision::kKeep;
171 }
172 assert(false);
173 return Decision::kKeep;
174 }
175
176 // This function is deprecated. Snapshots will always be ignored for
177 // compaction filters, because we realized that not ignoring snapshots doesn't
178 // provide the gurantee we initially thought it would provide. Repeatable
179 // reads will not be guaranteed anyway. If you override the function and
180 // returns false, we will fail the compaction.
181 virtual bool IgnoreSnapshots() const { return true; }
182
183 // Returns a name that identifies this compaction filter.
184 // The name will be printed to LOG file on start up for diagnosis.
185 virtual const char* Name() const = 0;
186 };
187
188 // Each compaction will create a new CompactionFilter allowing the
189 // application to know about different compactions
190 class CompactionFilterFactory {
191 public:
192 virtual ~CompactionFilterFactory() {}
193
194 virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
195 const CompactionFilter::Context& context) = 0;
196
197 // Returns a name that identifies this compaction filter factory.
198 virtual const char* Name() const = 0;
199 };
200
201 } // namespace rocksdb