]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/compaction_filter.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
16 #include "rocksdb/rocksdb_namespace.h"
18 namespace ROCKSDB_NAMESPACE
{
23 // Context information of a compaction run
24 struct CompactionFilterContext
{
25 // Does this compaction run include all data files
26 bool is_full_compaction
;
27 // Is this compaction requested by the client (true),
28 // or is it occurring as an automatic compaction process
29 bool is_manual_compaction
;
32 // CompactionFilter allows an application to modify/delete a key-value at
33 // the time of compaction.
35 class CompactionFilter
{
40 kBlobIndex
, // used internally by BlobDB.
48 kChangeBlobIndex
, // used internally by BlobDB.
49 kIOError
, // used internally by BlobDB.
52 enum class BlobDecision
{ kKeep
, kChangeValue
, kCorruption
, kIOError
};
54 // Context information of a compaction run
56 // Does this compaction run include all data files
57 bool is_full_compaction
;
58 // Is this compaction requested by the client (true),
59 // or is it occurring as an automatic compaction process
60 bool is_manual_compaction
;
61 // Which column family this compaction is for.
62 uint32_t column_family_id
;
65 virtual ~CompactionFilter() {}
67 // The compaction process invokes this
68 // method for kv that is being compacted. A return value
69 // of false indicates that the kv should be preserved in the
70 // output of this compaction run and a return value of true
71 // indicates that this key-value should be removed from the
72 // output of the compaction. The application can inspect
73 // the existing value of the key and make decision based on it.
75 // Key-Values that are results of merge operation during compaction are not
76 // passed into this function. Currently, when you have a mix of Put()s and
77 // Merge()s on a same key, we only guarantee to process the merge operands
78 // through the compaction filters. Put()s might be processed, or might not.
80 // When the value is to be preserved, the application has the option
81 // to modify the existing_value and pass it back through new_value.
82 // value_changed needs to be set to true in this case.
84 // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a
85 // DB* object) will not guarantee to preserve the state of the DB with
86 // CompactionFilter. Data seen from a snapshot might disppear after a
87 // compaction finishes. If you use snapshots, think twice about whether you
88 // want to use compaction filter and whether you are using it in a safe way.
90 // If multithreaded compaction is being used *and* a single CompactionFilter
91 // instance was supplied via Options::compaction_filter, this method may be
92 // called from different threads concurrently. The application must ensure
93 // that the call is thread-safe.
95 // If the CompactionFilter was created by a factory, then it will only ever
96 // be used by a single thread that is doing the compaction run, and this
97 // call does not need to be thread-safe. However, multiple filters may be
98 // in existence and operating concurrently.
99 virtual bool Filter(int /*level*/, const Slice
& /*key*/,
100 const Slice
& /*existing_value*/,
101 std::string
* /*new_value*/,
102 bool* /*value_changed*/) const {
106 // The compaction process invokes this method on every merge operand. If this
107 // method returns true, the merge operand will be ignored and not written out
108 // in the compaction output
110 // Note: If you are using a TransactionDB, it is not recommended to implement
111 // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB
112 // may not realize there is a write conflict and may allow a Transaction to
113 // Commit that should have failed. Instead, it is better to implement any
114 // Merge filtering inside the MergeOperator.
115 virtual bool FilterMergeOperand(int /*level*/, const Slice
& /*key*/,
116 const Slice
& /*operand*/) const {
120 // An extended API. Called for both values and merge operands.
121 // Allows changing value and skipping ranges of keys.
122 // The default implementation uses Filter() and FilterMergeOperand().
123 // If you're overriding this method, no need to override the other two.
124 // `value_type` indicates whether this key-value corresponds to a normal
125 // value (e.g. written with Put()) or a merge operand (written with Merge()).
127 // Possible return values:
128 // * kKeep - keep the key-value pair.
129 // * kRemove - remove the key-value pair or merge operand.
130 // * kChangeValue - keep the key and change the value/operand to *new_value.
131 // * kRemoveAndSkipUntil - remove this key-value pair, and also remove
132 // all key-value pairs with key in [key, *skip_until). This range
133 // of keys will be skipped without reading, potentially saving some
134 // IO operations compared to removing the keys one by one.
136 // *skip_until <= key is treated the same as Decision::kKeep
137 // (since the range [key, *skip_until) is empty).
140 // - The keys are skipped even if there are snapshots containing them,
141 // i.e. values removed by kRemoveAndSkipUntil can disappear from a
142 // snapshot - beware if you're using TransactionDB or
143 // DB::GetSnapshot().
144 // - If value for a key was overwritten or merged into (multiple Put()s
145 // or Merge()s), and compaction filter skips this key with
146 // kRemoveAndSkipUntil, it's possible that it will remove only
147 // the new value, exposing the old value that was supposed to be
149 // - Doesn't work with PlainTableFactory in prefix mode.
150 // - If you use kRemoveAndSkipUntil, consider also reducing
151 // compaction_readahead_size option.
153 // Note: If you are using a TransactionDB, it is not recommended to filter
154 // out or modify merge operands (ValueType::kMergeOperand).
155 // If a merge operation is filtered out, TransactionDB may not realize there
156 // is a write conflict and may allow a Transaction to Commit that should have
157 // failed. Instead, it is better to implement any Merge filtering inside the
159 virtual Decision
FilterV2(int level
, const Slice
& key
, ValueType value_type
,
160 const Slice
& existing_value
, std::string
* new_value
,
161 std::string
* /*skip_until*/) const {
162 switch (value_type
) {
163 case ValueType::kValue
: {
164 bool value_changed
= false;
165 bool rv
= Filter(level
, key
, existing_value
, new_value
, &value_changed
);
167 return Decision::kRemove
;
169 return value_changed
? Decision::kChangeValue
: Decision::kKeep
;
171 case ValueType::kMergeOperand
: {
172 bool rv
= FilterMergeOperand(level
, key
, existing_value
);
173 return rv
? Decision::kRemove
: Decision::kKeep
;
175 case ValueType::kBlobIndex
:
176 return Decision::kKeep
;
179 return Decision::kKeep
;
182 // Internal (BlobDB) use only. Do not override in application code.
183 virtual BlobDecision
PrepareBlobOutput(const Slice
& /* key */,
184 const Slice
& /* existing_value */,
185 std::string
* /* new_value */) const {
186 return BlobDecision::kKeep
;
189 // This function is deprecated. Snapshots will always be ignored for
190 // compaction filters, because we realized that not ignoring snapshots doesn't
191 // provide the gurantee we initially thought it would provide. Repeatable
192 // reads will not be guaranteed anyway. If you override the function and
193 // returns false, we will fail the compaction.
194 virtual bool IgnoreSnapshots() const { return true; }
196 // Returns a name that identifies this compaction filter.
197 // The name will be printed to LOG file on start up for diagnosis.
198 virtual const char* Name() const = 0;
201 // Each compaction will create a new CompactionFilter allowing the
202 // application to know about different compactions
203 class CompactionFilterFactory
{
205 virtual ~CompactionFilterFactory() {}
207 virtual std::unique_ptr
<CompactionFilter
> CreateCompactionFilter(
208 const CompactionFilter::Context
& context
) = 0;
210 // Returns a name that identifies this compaction filter factory.
211 virtual const char* Name() const = 0;
214 } // namespace ROCKSDB_NAMESPACE