]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/filter_policy.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / include / rocksdb / filter_policy.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 //
9 // A database can be configured with a custom FilterPolicy object.
10 // This object is responsible for creating a small filter from a set
11 // of keys. These filters are stored in rocksdb and are consulted
12 // automatically by rocksdb to decide whether or not to read some
13 // information from disk. In many cases, a filter can cut down the
14 // number of disk seeks form a handful to a single disk seek per
15 // DB::Get() call.
16 //
17 // Most people will want to use the builtin bloom filter support (see
18 // NewBloomFilterPolicy() below).
19
20 #pragma once
21
22 #include <stdlib.h>
23
24 #include <algorithm>
25 #include <memory>
26 #include <stdexcept>
27 #include <string>
28 #include <vector>
29
30 #include "rocksdb/advanced_options.h"
31 #include "rocksdb/customizable.h"
32 #include "rocksdb/status.h"
33 #include "rocksdb/types.h"
34
35 namespace ROCKSDB_NAMESPACE {
36
37 class Slice;
38 struct BlockBasedTableOptions;
39 struct ConfigOptions;
40
41 // As of RocksDB 7.0, the details of these classes are internal
42 class FilterBitsBuilder;
43 class FilterBitsReader;
44
45 // Contextual information passed to BloomFilterPolicy at filter building time.
46 // Used in overriding FilterPolicy::GetBuilderWithContext(). References other
47 // structs because this is expected to be a temporary, stack-allocated object.
48 struct FilterBuildingContext {
49 // This constructor is for internal use only and subject to change.
50 FilterBuildingContext(const BlockBasedTableOptions& table_options);
51
52 // Options for the table being built
53 const BlockBasedTableOptions& table_options;
54
55 // BEGIN from (DB|ColumnFamily)Options in effect at table creation time
56 CompactionStyle compaction_style = kCompactionStyleLevel;
57
58 // Number of LSM levels, or -1 if unknown
59 int num_levels = -1;
60
61 // An optional logger for reporting errors, warnings, etc.
62 Logger* info_log = nullptr;
63 // END from (DB|ColumnFamily)Options
64
65 // Name of the column family for the table (or empty string if unknown)
66 // TODO: consider changing to Slice
67 std::string column_family_name;
68
69 // The table level at time of constructing the SST file, or -1 if unknown
70 // or N/A as in SstFileWriter. (The table file could later be used at a
71 // different level.)
72 int level_at_creation = -1;
73
74 // True if known to be going into bottommost sorted run for applicable
75 // key range (which might not even be last level with data). False
76 // otherwise.
77 bool is_bottommost = false;
78
79 // Reason for creating the file with the filter
80 TableFileCreationReason reason = TableFileCreationReason::kMisc;
81 };
82
83 // Determines what kind of filter (if any) to generate in SST files, and under
84 // which conditions. API users can create custom filter policies that
85 // defer to other built-in policies (see NewBloomFilterPolicy and
86 // NewRibbonFilterPolicy) based on the context provided to
87 // GetBuilderWithContext.
88 class FilterPolicy : public Customizable {
89 public:
90 virtual ~FilterPolicy();
91 static const char* Type() { return "FilterPolicy"; }
92
93 // The name used for identifying whether a filter on disk is readable
94 // by this FilterPolicy. If this FilterPolicy is part of a family that
95 // can read each others filters, such as built-in BloomFilterPolcy and
96 // RibbonFilterPolicy, the CompatibilityName is a shared family name,
97 // while kinds of filters in the family can have distinct Customizable
98 // Names. This function is pure virtual so that wrappers around built-in
99 // policies are prompted to defer to CompatibilityName() of the wrapped
100 // policy, which is important for compatibility.
101 //
102 // For custom filter policies that are not part of a read-compatible
103 // family (rare), implementations may return Name().
104 virtual const char* CompatibilityName() const = 0;
105
106 // Creates a new FilterPolicy based on the input value string and returns the
107 // result The value might be an ID, and ID with properties, or an old-style
108 // policy string.
109 // The value describes the FilterPolicy being created.
110 // For BloomFilters, value may be a ":"-delimited value of the form:
111 // "bloomfilter:[bits_per_key]",
112 // e.g. ""bloomfilter:4"
113 // The above string is equivalent to calling NewBloomFilterPolicy(4).
114 static Status CreateFromString(const ConfigOptions& config_options,
115 const std::string& value,
116 std::shared_ptr<const FilterPolicy>* result);
117
118 // Return a new FilterBitsBuilder for constructing full or partitioned
119 // filter blocks, or return nullptr to indicate "no filter". Custom
120 // implementations should defer to a built-in FilterPolicy to get a
121 // new FilterBitsBuilder, but the FilterBuildingContext can be used
122 // to decide which built-in FilterPolicy to defer to.
123 virtual FilterBitsBuilder* GetBuilderWithContext(
124 const FilterBuildingContext&) const = 0;
125
126 // Return a new FilterBitsReader for full or partitioned filter blocks.
127 // Caller retains ownership of any buffer pointed to by the input Slice.
128 // Custom implementation should defer to GetFilterBitsReader on any
129 // built-in FilterPolicy, which can read filters generated by any other
130 // built-in FilterPolicy.
131 virtual FilterBitsReader* GetFilterBitsReader(
132 const Slice& /*contents*/) const = 0;
133 };
134
135 // Return a new filter policy that uses a bloom filter with approximately
136 // the specified number of bits per key. See
137 // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
138 //
139 // bits_per_key: average bits allocated per key in bloom filter. A good
140 // choice is 9.9, which yields a filter with ~ 1% false positive rate.
141 // When format_version < 5, the value will be rounded to the nearest
142 // integer. Recommend using no more than three decimal digits after the
143 // decimal point, as in 6.667.
144 //
145 // To avoid configurations that are unlikely to produce good filtering
146 // value for the CPU overhead, bits_per_key < 0.5 is rounded down to 0.0
147 // which means "generate no filter", and 0.5 <= bits_per_key < 1.0 is
148 // rounded up to 1.0, for a 62% FP rate.
149 //
150 // The caller is responsible for eventually deleting the result, though
151 // this is typically handled automatically with BlockBasedTableOptions:
152 // table_options.filter_policy.reset(NewBloomFilterPolicy(...));
153 //
154 // As of RocksDB 7.0, the use_block_based_builder parameter is ignored.
155 // (The old, inefficient block-based filter is no longer accessible in
156 // the public API.)
157 //
158 // Note: if you are using a custom comparator that ignores some parts
159 // of the keys being compared, you must not use NewBloomFilterPolicy()
160 // and must provide your own FilterPolicy that also ignores the
161 // corresponding parts of the keys. For example, if the comparator
162 // ignores trailing spaces, it would be incorrect to use a
163 // FilterPolicy (like NewBloomFilterPolicy) that does not ignore
164 // trailing spaces in keys.
165 extern const FilterPolicy* NewBloomFilterPolicy(
166 double bits_per_key, bool IGNORED_use_block_based_builder = false);
167
168 // A new Bloom alternative that saves about 30% space compared to
169 // Bloom filters, with similar query times but roughly 3-4x CPU time
170 // and 3x temporary space usage during construction. For example, if
171 // you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same
172 // 0.95% FP rate as Bloom filter but only using about 7 bits per key.
173 //
174 // The space savings of Ribbon filters makes sense for lower (higher
175 // numbered; larger; longer-lived) levels of LSM, whereas the speed of
176 // Bloom filters make sense for highest levels of LSM. Setting
177 // bloom_before_level allows for this design with Level and Universal
178 // compaction styles. For example, bloom_before_level=1 means that Bloom
179 // filters will be used in level 0, including flushes, and Ribbon
180 // filters elsewhere, including FIFO compaction and external SST files.
181 // For this option, memtable flushes are considered level -1 (so that
182 // flushes can be distinguished from intra-L0 compaction).
183 // bloom_before_level=0 (default) -> Generate Bloom filters only for
184 // flushes under Level and Universal compaction styles.
185 // bloom_before_level=-1 -> Always generate Ribbon filters (except in
186 // some extreme or exceptional cases).
187 //
188 // Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier
189 // versions reading the data will behave as if no filter was used
190 // (degraded performance until compaction rebuilds filters). All
191 // built-in FilterPolicies (Bloom or Ribbon) are able to read other
192 // kinds of built-in filters.
193 //
194 // Note: the current Ribbon filter schema uses some extra resources
195 // when constructing very large filters. For example, for 100 million
196 // keys in a single filter (one SST file without partitioned filters),
197 // 3GB of temporary, untracked memory is used, vs. 1GB for Bloom.
198 // However, the savings in filter space from just ~60 open SST files
199 // makes up for the additional temporary memory use.
200 //
201 // Also consider using optimize_filters_for_memory to save filter
202 // memory.
203 extern const FilterPolicy* NewRibbonFilterPolicy(
204 double bloom_equivalent_bits_per_key, int bloom_before_level = 0);
205
206 } // namespace ROCKSDB_NAMESPACE