#include <stdlib.h>
+#include <algorithm>
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include "rocksdb/advanced_options.h"
+#include "rocksdb/customizable.h"
#include "rocksdb/status.h"
+#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
struct BlockBasedTableOptions;
struct ConfigOptions;
-// A class that takes a bunch of keys, then generates filter
-class FilterBitsBuilder {
- public:
- virtual ~FilterBitsBuilder() {}
-
- // Add Key to filter, you could use any way to store the key.
- // Such as: storing hashes or original keys
- // Keys are in sorted order and duplicated keys are possible.
- virtual void AddKey(const Slice& key) = 0;
-
- // Generate the filter using the keys that are added
- // The return value of this function would be the filter bits,
- // The ownership of actual data is set to buf
- virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
-
- // Calculate num of keys that can be added and generate a filter
- // <= the specified number of bytes.
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4702) // unreachable code
-#endif
- virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
-#ifndef ROCKSDB_LITE
- throw std::runtime_error("CalculateNumEntry not Implemented");
-#else
- abort();
-#endif
- return 0;
- }
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#endif
-};
-
-// A class that checks if a key can be in filter
-// It should be initialized by Slice generated by BitsBuilder
-class FilterBitsReader {
- public:
- virtual ~FilterBitsReader() {}
-
- // Check if the entry match the bits in filter
- virtual bool MayMatch(const Slice& entry) = 0;
-
- // Check if an array of entries match the bits in filter
- virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
- for (int i = 0; i < num_keys; ++i) {
- may_match[i] = MayMatch(*keys[i]);
- }
- }
-};
+// As of RocksDB 7.0, the details of these classes are internal
+class FilterBitsBuilder;
+class FilterBitsReader;
// Contextual information passed to BloomFilterPolicy at filter building time.
// Used in overriding FilterPolicy::GetBuilderWithContext(). References other
// Options for the table being built
const BlockBasedTableOptions& table_options;
- // Name of the column family for the table (or empty string if unknown)
- std::string column_family_name;
-
- // The compactions style in effect for the table
+ // BEGIN from (DB|ColumnFamily)Options in effect at table creation time
CompactionStyle compaction_style = kCompactionStyleLevel;
- // The table level at time of constructing the SST file, or -1 if unknown.
- // (The table file could later be used at a different level.)
- int level_at_creation = -1;
+ // Number of LSM levels, or -1 if unknown
+ int num_levels = -1;
// An optional logger for reporting errors, warnings, etc.
Logger* info_log = nullptr;
+ // END from (DB|ColumnFamily)Options
+
+ // Name of the column family for the table (or empty string if unknown)
+ // TODO: consider changing to Slice
+ std::string column_family_name;
+
+ // The table level at time of constructing the SST file, or -1 if unknown
+ // or N/A as in SstFileWriter. (The table file could later be used at a
+ // different level.)
+ int level_at_creation = -1;
+
+ // True if known to be going into bottommost sorted run for applicable
+ // key range (which might not even be last level with data). False
+ // otherwise.
+ bool is_bottommost = false;
+
+ // Reason for creating the file with the filter
+ TableFileCreationReason reason = TableFileCreationReason::kMisc;
};
-// We add a new format of filter block called full filter block
-// This new interface gives you more space of customization
-//
-// For the full filter block, you can plug in your version by implement
-// the FilterBitsBuilder and FilterBitsReader
-//
-// There are two sets of interface in FilterPolicy
-// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
-// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
-// full filter.
-// Set 1 MUST be implemented correctly, Set 2 is optional
-// RocksDB would first try using functions in Set 2. if they return nullptr,
-// it would use Set 1 instead.
-// You can choose filter type in NewBloomFilterPolicy
-class FilterPolicy {
+// Determines what kind of filter (if any) to generate in SST files, and under
+// which conditions. API users can create custom filter policies that
+// defer to other built-in policies (see NewBloomFilterPolicy and
+// NewRibbonFilterPolicy) based on the context provided to
+// GetBuilderWithContext.
+class FilterPolicy : public Customizable {
public:
virtual ~FilterPolicy();
+ static const char* Type() { return "FilterPolicy"; }
+
+ // The name used for identifying whether a filter on disk is readable
+ // by this FilterPolicy. If this FilterPolicy is part of a family that
+ // can read each others filters, such as built-in BloomFilterPolcy and
+ // RibbonFilterPolicy, the CompatibilityName is a shared family name,
+ // while kinds of filters in the family can have distinct Customizable
+ // Names. This function is pure virtual so that wrappers around built-in
+ // policies are prompted to defer to CompatibilityName() of the wrapped
+ // policy, which is important for compatibility.
+ //
+ // For custom filter policies that are not part of a read-compatible
+ // family (rare), implementations may return Name().
+ virtual const char* CompatibilityName() const = 0;
// Creates a new FilterPolicy based on the input value string and returns the
// result The value might be an ID, and ID with properties, or an old-style
// policy string.
// The value describes the FilterPolicy being created.
// For BloomFilters, value may be a ":"-delimited value of the form:
- // "bloomfilter:[bits_per_key]:[use_block_based_builder]",
- // e.g. ""bloomfilter:4:true"
- // The above string is equivalent to calling NewBloomFilterPolicy(4, true).
+ // "bloomfilter:[bits_per_key]",
+ // e.g. ""bloomfilter:4"
+ // The above string is equivalent to calling NewBloomFilterPolicy(4).
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& value,
std::shared_ptr<const FilterPolicy>* result);
- // Return the name of this policy. Note that if the filter encoding
- // changes in an incompatible way, the name returned by this method
- // must be changed. Otherwise, old incompatible filters may be
- // passed to methods of this type.
- virtual const char* Name() const = 0;
-
- // keys[0,n-1] contains a list of keys (potentially with duplicates)
- // that are ordered according to the user supplied comparator.
- // Append a filter that summarizes keys[0,n-1] to *dst.
- //
- // Warning: do not change the initial contents of *dst. Instead,
- // append the newly constructed filter to *dst.
- virtual void CreateFilter(const Slice* keys, int n,
- std::string* dst) const = 0;
-
- // "filter" contains the data appended by a preceding call to
- // CreateFilter() on this class. This method must return true if
- // the key was in the list of keys passed to CreateFilter().
- // This method may return true or false if the key was not on the
- // list, but it should aim to return false with a high probability.
- virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
-
- // Return a new FilterBitsBuilder for full or partitioned filter blocks, or
- // nullptr if using block-based filter.
- // NOTE: This function is only called by GetBuilderWithContext() below for
- // custom FilterPolicy implementations. Thus, it is not necessary to
- // override this function if overriding GetBuilderWithContext().
- virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
-
- // A newer variant of GetFilterBitsBuilder that allows a FilterPolicy
- // to customize the builder for contextual constraints and hints.
- // (Name changed to avoid triggering -Werror=overloaded-virtual.)
- // If overriding GetFilterBitsBuilder() suffices, it is not necessary to
- // override this function.
+ // Return a new FilterBitsBuilder for constructing full or partitioned
+ // filter blocks, or return nullptr to indicate "no filter". Custom
+ // implementations should defer to a built-in FilterPolicy to get a
+ // new FilterBitsBuilder, but the FilterBuildingContext can be used
+ // to decide which built-in FilterPolicy to defer to.
virtual FilterBitsBuilder* GetBuilderWithContext(
- const FilterBuildingContext&) const {
- return GetFilterBitsBuilder();
- }
+ const FilterBuildingContext&) const = 0;
- // Return a new FilterBitsReader for full or partitioned filter blocks, or
- // nullptr if using block-based filter.
- // As here, the input slice should NOT be deleted by FilterPolicy.
+ // Return a new FilterBitsReader for full or partitioned filter blocks.
+ // Caller retains ownership of any buffer pointed to by the input Slice.
+ // Custom implementation should defer to GetFilterBitsReader on any
+ // built-in FilterPolicy, which can read filters generated by any other
+ // built-in FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader(
- const Slice& /*contents*/) const {
- return nullptr;
- }
+ const Slice& /*contents*/) const = 0;
};
// Return a new filter policy that uses a bloom filter with approximately
-// the specified number of bits per key.
+// the specified number of bits per key. See
+// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
//
// bits_per_key: average bits allocated per key in bloom filter. A good
// choice is 9.9, which yields a filter with ~ 1% false positive rate.
// integer. Recommend using no more than three decimal digits after the
// decimal point, as in 6.667.
//
-// use_block_based_builder: use deprecated block based filter (true) rather
-// than full or partitioned filter (false).
+// To avoid configurations that are unlikely to produce good filtering
+// value for the CPU overhead, bits_per_key < 0.5 is rounded down to 0.0
+// which means "generate no filter", and 0.5 <= bits_per_key < 1.0 is
+// rounded up to 1.0, for a 62% FP rate.
+//
+// The caller is responsible for eventually deleting the result, though
+// this is typically handled automatically with BlockBasedTableOptions:
+// table_options.filter_policy.reset(NewBloomFilterPolicy(...));
//
-// Callers must delete the result after any database that is using the
-// result has been closed.
+// As of RocksDB 7.0, the use_block_based_builder parameter is ignored.
+// (The old, inefficient block-based filter is no longer accessible in
+// the public API.)
//
// Note: if you are using a custom comparator that ignores some parts
// of the keys being compared, you must not use NewBloomFilterPolicy()
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
// trailing spaces in keys.
extern const FilterPolicy* NewBloomFilterPolicy(
- double bits_per_key, bool use_block_based_builder = false);
-
-// An EXPERIMENTAL new Bloom alternative that saves about 30% space
-// compared to Bloom filters, with about 3-4x construction time and
-// similar query times. For example, if you pass in 10 for
-// bloom_equivalent_bits_per_key, you'll get the same 0.95% FP rate
-// as Bloom filter but only using about 7 bits per key. (This
-// way of configuring the new filter is considered experimental
-// and/or transitional, so is expected to go away.)
+ double bits_per_key, bool IGNORED_use_block_based_builder = false);
+
+// A new Bloom alternative that saves about 30% space compared to
+// Bloom filters, with similar query times but roughly 3-4x CPU time
+// and 3x temporary space usage during construction. For example, if
+// you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same
+// 0.95% FP rate as Bloom filter but only using about 7 bits per key.
+//
+// The space savings of Ribbon filters makes sense for lower (higher
+// numbered; larger; longer-lived) levels of LSM, whereas the speed of
+// Bloom filters make sense for highest levels of LSM. Setting
+// bloom_before_level allows for this design with Level and Universal
+// compaction styles. For example, bloom_before_level=1 means that Bloom
+// filters will be used in level 0, including flushes, and Ribbon
+// filters elsewhere, including FIFO compaction and external SST files.
+// For this option, memtable flushes are considered level -1 (so that
+// flushes can be distinguished from intra-L0 compaction).
+// bloom_before_level=0 (default) -> Generate Bloom filters only for
+// flushes under Level and Universal compaction styles.
+// bloom_before_level=-1 -> Always generate Ribbon filters (except in
+// some extreme or exceptional cases).
+//
+// Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier
+// versions reading the data will behave as if no filter was used
+// (degraded performance until compaction rebuilds filters). All
+// built-in FilterPolicies (Bloom or Ribbon) are able to read other
+// kinds of built-in filters.
//
-// Ribbon filters are ignored by previous versions of RocksDB, as if
-// no filter was used.
+// Note: the current Ribbon filter schema uses some extra resources
+// when constructing very large filters. For example, for 100 million
+// keys in a single filter (one SST file without partitioned filters),
+// 3GB of temporary, untracked memory is used, vs. 1GB for Bloom.
+// However, the savings in filter space from just ~60 open SST files
+// makes up for the additional temporary memory use.
//
-// Note: this policy can generate Bloom filters in some cases.
-// For very small filters (well under 1KB), Bloom fallback is by
-// design, as the current Ribbon schema is not optimized to save vs.
-// Bloom for such small filters. Other cases of Bloom fallback should
-// be exceptional and log an appropriate warning.
-extern const FilterPolicy* NewExperimentalRibbonFilterPolicy(
- double bloom_equivalent_bits_per_key);
+// Also consider using optimize_filters_for_memory to save filter
+// memory.
+extern const FilterPolicy* NewRibbonFilterPolicy(
+ double bloom_equivalent_bits_per_key, int bloom_before_level = 0);
} // namespace ROCKSDB_NAMESPACE