ceph/src/rocksdb/include/rocksdb/cache.h

   1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5 //
   6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
   7 // Use of this source code is governed by a BSD-style license that can be
   8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
   9 //
  10 // A Cache is an interface that maps keys to values.  It has internal
  11 // synchronization and may be safely accessed concurrently from
  12 // multiple threads.  It may automatically evict entries to make room
  13 // for new entries.  Values have a specified charge against the cache
  14 // capacity.  For example, a cache where the values are variable
  15 // length strings, may use the length of the string as the charge for
  16 // the string.
  17 //
  18 // A builtin cache implementation with a least-recently-used eviction
  19 // policy is provided.  Clients may use their own implementations if
  20 // they want something more sophisticated (like scan-resistance, a
  21 // custom eviction policy, variable cache sizing, etc.)
  22
  23 #pragma once
  24
  25 #include <cstdint>
  26 #include <functional>
  27 #include <memory>
  28 #include <string>
  29
  30 #include "rocksdb/compression_type.h"
  31 #include "rocksdb/memory_allocator.h"
  32 #include "rocksdb/slice.h"
  33 #include "rocksdb/statistics.h"
  34 #include "rocksdb/status.h"
  35
  36 namespace ROCKSDB_NAMESPACE {
  37
  38 class Cache;
  39 struct ConfigOptions;
  40 class Logger;
  41 class SecondaryCache;
  42
  43 // Classifications of block cache entries.
  44 //
  45 // Developer notes: Adding a new enum to this class requires corresponding
  46 // updates to `kCacheEntryRoleToCamelString` and
  47 // `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since
  48 // `kNumCacheEntryRoles` assumes `kMisc` comes last.
  49 enum class CacheEntryRole {
  50   // Block-based table data block
  51   kDataBlock,
  52   // Block-based table filter block (full or partitioned)
  53   kFilterBlock,
  54   // Block-based table metadata block for partitioned filter
  55   kFilterMetaBlock,
  56   // OBSOLETE / DEPRECATED: old/removed block-based filter
  57   kDeprecatedFilterBlock,
  58   // Block-based table index block
  59   kIndexBlock,
  60   // Other kinds of block-based table block
  61   kOtherBlock,
  62   // WriteBufferManager's charge to account for its memtable usage
  63   kWriteBuffer,
  64   // Compression dictionary building buffer's charge to account for
  65   // its memory usage
  66   kCompressionDictionaryBuildingBuffer,
  67   // Filter's charge to account for
  68   // (new) bloom and ribbon filter construction's memory usage
  69   kFilterConstruction,
  70   // BlockBasedTableReader's charge to account for its memory usage
  71   kBlockBasedTableReader,
  72   // FileMetadata's charge to account for its memory usage
  73   kFileMetadata,
  74   // Blob value (when using the same cache as block cache and blob cache)
  75   kBlobValue,
  76   // Blob cache's charge to account for its memory usage (when using a
  77   // separate block cache and blob cache)
  78   kBlobCache,
  79   // Default bucket, for miscellaneous cache entries. Do not use for
  80   // entries that could potentially add up to large usage.
  81   kMisc,
  82 };
  83 constexpr uint32_t kNumCacheEntryRoles =
  84     static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
  85
  86 // Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`.
  87 const std::string& GetCacheEntryRoleName(CacheEntryRole);
  88
  89 // For use with `GetMapProperty()` for property
  90 // `DB::Properties::kBlockCacheEntryStats`. On success, the map will
  91 // be populated with all keys that can be obtained from these functions.
  92 struct BlockCacheEntryStatsMapKeys {
  93   static const std::string& CacheId();
  94   static const std::string& CacheCapacityBytes();
  95   static const std::string& LastCollectionDurationSeconds();
  96   static const std::string& LastCollectionAgeSeconds();
  97
  98   static std::string EntryCount(CacheEntryRole);
  99   static std::string UsedBytes(CacheEntryRole);
 100   static std::string UsedPercent(CacheEntryRole);
 101 };
 102
 103 extern const bool kDefaultToAdaptiveMutex;
 104
 105 enum CacheMetadataChargePolicy {
 106   // Only the `charge` of each entry inserted into a Cache counts against
 107   // the `capacity`
 108   kDontChargeCacheMetadata,
 109   // In addition to the `charge`, the approximate space overheads in the
 110   // Cache (in bytes) also count against `capacity`. These space overheads
 111   // are for supporting fast Lookup and managing the lifetime of entries.
 112   kFullChargeCacheMetadata
 113 };
 114 const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
 115     kFullChargeCacheMetadata;
 116
 117 // Options shared betweeen various cache implementations that
 118 // divide the key space into shards using hashing.
 119 struct ShardedCacheOptions {
 120   // Capacity of the cache, in the same units as the `charge` of each entry.
 121   // This is typically measured in bytes, but can be a different unit if using
 122   // kDontChargeCacheMetadata.
 123   size_t capacity = 0;
 124
 125   // Cache is sharded into 2^num_shard_bits shards, by hash of key.
 126   // If < 0, a good default is chosen based on the capacity and the
 127   // implementation. (Mutex-based implementations are much more reliant
 128   // on many shards for parallel scalability.)
 129   int num_shard_bits = -1;
 130
 131   // If strict_capacity_limit is set, Insert() will fail if there is not
 132   // enough capacity for the new entry along with all the existing referenced
 133   // (pinned) cache entries. (Unreferenced cache entries are evicted as
 134   // needed, sometimes immediately.) If strict_capacity_limit == false
 135   // (default), Insert() never fails.
 136   bool strict_capacity_limit = false;
 137
 138   // If non-nullptr, RocksDB will use this allocator instead of system
 139   // allocator when allocating memory for cache blocks.
 140   //
 141   // Caveat: when the cache is used as block cache, the memory allocator is
 142   // ignored when dealing with compression libraries that allocate memory
 143   // internally (currently only XPRESS).
 144   std::shared_ptr<MemoryAllocator> memory_allocator;
 145
 146   // See CacheMetadataChargePolicy
 147   CacheMetadataChargePolicy metadata_charge_policy =
 148       kDefaultCacheMetadataChargePolicy;
 149
 150   ShardedCacheOptions() {}
 151   ShardedCacheOptions(
 152       size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
 153       std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
 154       CacheMetadataChargePolicy _metadata_charge_policy =
 155           kDefaultCacheMetadataChargePolicy)
 156       : capacity(_capacity),
 157         num_shard_bits(_num_shard_bits),
 158         strict_capacity_limit(_strict_capacity_limit),
 159         memory_allocator(std::move(_memory_allocator)),
 160         metadata_charge_policy(_metadata_charge_policy) {}
 161 };
 162
 163 struct LRUCacheOptions : public ShardedCacheOptions {
 164   // Ratio of cache reserved for high-priority and low-priority entries,
 165   // respectively. (See Cache::Priority below more information on the levels.)
 166   // Valid values are between 0 and 1 (inclusive), and the sum of the two
 167   // values cannot exceed 1.
 168   //
 169   // If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU
 170   // list is maintained by the cache. Similarly, if low_pri_pool_ratio is
 171   // greater than zero, a dedicated low-priority LRU list is maintained.
 172   // There is also a bottom-priority LRU list, which is always enabled and not
 173   // explicitly configurable. Entries are spilled over to the next available
 174   // lower-priority pool if a certain pool's capacity is exceeded.
 175   //
 176   // Entries with cache hits are inserted into the highest priority LRU list
 177   // available regardless of the entry's priority. Entries without hits
 178   // are inserted into highest priority LRU list available whose priority
 179   // does not exceed the entry's priority. (For example, high-priority items
 180   // with no hits are placed in the high-priority pool if available;
 181   // otherwise, they are placed in the low-priority pool if available;
 182   // otherwise, they are placed in the bottom-priority pool.) This results
 183   // in lower-priority entries without hits getting evicted from the cache
 184   // sooner.
 185   //
 186   // Default values: high_pri_pool_ratio = 0.5 (which is referred to as
 187   // "midpoint insertion"), low_pri_pool_ratio = 0
 188   double high_pri_pool_ratio = 0.5;
 189   double low_pri_pool_ratio = 0.0;
 190
 191   // Whether to use adaptive mutexes for cache shards. Note that adaptive
 192   // mutexes need to be supported by the platform in order for this to have any
 193   // effect. The default value is true if RocksDB is compiled with
 194   // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
 195   bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
 196
 197   // A SecondaryCache instance to use a the non-volatile tier.
 198   std::shared_ptr<SecondaryCache> secondary_cache;
 199
 200   LRUCacheOptions() {}
 201   LRUCacheOptions(size_t _capacity, int _num_shard_bits,
 202                   bool _strict_capacity_limit, double _high_pri_pool_ratio,
 203                   std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
 204                   bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
 205                   CacheMetadataChargePolicy _metadata_charge_policy =
 206                       kDefaultCacheMetadataChargePolicy,
 207                   double _low_pri_pool_ratio = 0.0)
 208       : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
 209                             std::move(_memory_allocator),
 210                             _metadata_charge_policy),
 211         high_pri_pool_ratio(_high_pri_pool_ratio),
 212         low_pri_pool_ratio(_low_pri_pool_ratio),
 213         use_adaptive_mutex(_use_adaptive_mutex) {}
 214 };
 215
 216 // Create a new cache with a fixed size capacity. The cache is sharded
 217 // to 2^num_shard_bits shards, by hash of the key. The total capacity
 218 // is divided and evenly assigned to each shard. If strict_capacity_limit
 219 // is set, insert to the cache will fail when cache is full. User can also
 220 // set percentage of the cache reserves for high priority entries via
 221 // high_pri_pool_pct.
 222 // num_shard_bits = -1 means it is automatically determined: every shard
 223 // will be at least 512KB and number of shard bits will not exceed 6.
 224 extern std::shared_ptr<Cache> NewLRUCache(
 225     size_t capacity, int num_shard_bits = -1,
 226     bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
 227     std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
 228     bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
 229     CacheMetadataChargePolicy metadata_charge_policy =
 230         kDefaultCacheMetadataChargePolicy,
 231     double low_pri_pool_ratio = 0.0);
 232
 233 extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
 234
 235 // EXPERIMENTAL
 236 // Options structure for configuring a SecondaryCache instance based on
 237 // LRUCache. The LRUCacheOptions.secondary_cache is not used and
 238 // should not be set.
 239 struct CompressedSecondaryCacheOptions : LRUCacheOptions {
 240   // The compression method (if any) that is used to compress data.
 241   CompressionType compression_type = CompressionType::kLZ4Compression;
 242
 243   // compress_format_version can have two values:
 244   // compress_format_version == 1 -- decompressed size is not included in the
 245   // block header.
 246   // compress_format_version == 2 -- decompressed size is included in the block
 247   // header in varint32 format.
 248   uint32_t compress_format_version = 2;
 249
 250   // Enable the custom split and merge feature, which split the compressed value
 251   // into chunks so that they may better fit jemalloc bins.
 252   bool enable_custom_split_merge = false;
 253
 254   CompressedSecondaryCacheOptions() {}
 255   CompressedSecondaryCacheOptions(
 256       size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
 257       double _high_pri_pool_ratio, double _low_pri_pool_ratio = 0.0,
 258       std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
 259       bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
 260       CacheMetadataChargePolicy _metadata_charge_policy =
 261           kDefaultCacheMetadataChargePolicy,
 262       CompressionType _compression_type = CompressionType::kLZ4Compression,
 263       uint32_t _compress_format_version = 2,
 264       bool _enable_custom_split_merge = false)
 265       : LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
 266                         _high_pri_pool_ratio, std::move(_memory_allocator),
 267                         _use_adaptive_mutex, _metadata_charge_policy,
 268                         _low_pri_pool_ratio),
 269         compression_type(_compression_type),
 270         compress_format_version(_compress_format_version),
 271         enable_custom_split_merge(_enable_custom_split_merge) {}
 272 };
 273
 274 // EXPERIMENTAL
 275 // Create a new Secondary Cache that is implemented on top of LRUCache.
 276 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
 277     size_t capacity, int num_shard_bits = -1,
 278     bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
 279     double low_pri_pool_ratio = 0.0,
 280     std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
 281     bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
 282     CacheMetadataChargePolicy metadata_charge_policy =
 283         kDefaultCacheMetadataChargePolicy,
 284     CompressionType compression_type = CompressionType::kLZ4Compression,
 285     uint32_t compress_format_version = 2,
 286     bool enable_custom_split_merge = false);
 287
 288 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
 289     const CompressedSecondaryCacheOptions& opts);
 290
 291 // HyperClockCache - A lock-free Cache alternative for RocksDB block cache
 292 // that offers much improved CPU efficiency vs. LRUCache under high parallel
 293 // load or high contention, with some caveats:
 294 // * Not a general Cache implementation: can only be used for
 295 // BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
 296 // compatible with HyperClockCache.
 297 // * Requires an extra tuning parameter: see estimated_entry_charge below.
 298 // Similarly, substantially changing the capacity with SetCapacity could
 299 // harm efficiency.
 300 // * SecondaryCache is not yet supported.
 301 // * Cache priorities are less aggressively enforced, which could cause
 302 // cache dilution from long range scans (unless they use fill_cache=false).
 303 // * Can be worse for small caches, because if almost all of a cache shard is
 304 // pinned (more likely with non-partitioned filters), then CLOCK eviction
 305 // becomes very CPU intensive.
 306 //
 307 // See internal cache/clock_cache.h for full description.
 308 struct HyperClockCacheOptions : public ShardedCacheOptions {
 309   // The estimated average `charge` associated with cache entries. This is a
 310   // critical configuration parameter for good performance from the hyper
 311   // cache, because having a table size that is fixed at creation time greatly
 312   // reduces the required synchronization between threads.
 313   // * If the estimate is substantially too low (e.g. less than half the true
 314   // average) then metadata space overhead with be substantially higher (e.g.
 315   // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
 316   // can slightly reduce cache hit rates, and slightly reduce access times due
 317   // to the larger working memory size.
 318   // * If the estimate is substantially too high (e.g. 25% higher than the true
 319   // average) then there might not be sufficient slots in the hash table for
 320   // both efficient operation and capacity utilization (hit rate). The hyper
 321   // cache will evict entries to prevent load factors that could dramatically
 322   // affect lookup times, instead letting the hit rate suffer by not utilizing
 323   // the full capacity.
 324   //
 325   // A reasonable choice is the larger of block_size and metadata_block_size.
 326   // When WriteBufferManager (and similar) charge memory usage to the block
 327   // cache, this can lead to the same effect as estimate being too low, which
 328   // is better than the opposite. Therefore, the general recommendation is to
 329   // assume that other memory charged to block cache could be negligible, and
 330   // ignore it in making the estimate.
 331   //
 332   // The best parameter choice based on a cache in use is given by
 333   // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
 334   // with kDontChargeCacheMetadata. More precisely with
 335   // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
 336   // GetOccupancyCount(). However, when the average value size might vary
 337   // (e.g. balance between metadata and data blocks in cache), it is better
 338   // to estimate toward the lower side than the higher side.
 339   size_t estimated_entry_charge;
 340
 341   HyperClockCacheOptions(
 342       size_t _capacity, size_t _estimated_entry_charge,
 343       int _num_shard_bits = -1, bool _strict_capacity_limit = false,
 344       std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
 345       CacheMetadataChargePolicy _metadata_charge_policy =
 346           kDefaultCacheMetadataChargePolicy)
 347       : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
 348                             std::move(_memory_allocator),
 349                             _metadata_charge_policy),
 350         estimated_entry_charge(_estimated_entry_charge) {}
 351
 352   // Construct an instance of HyperClockCache using these options
 353   std::shared_ptr<Cache> MakeSharedCache() const;
 354 };
 355
 356 // DEPRECATED - The old Clock Cache implementation had an unresolved bug and
 357 // has been removed. The new HyperClockCache requires an additional
 358 // configuration parameter that is not provided by this API. This function
 359 // simply returns a new LRUCache for functional compatibility.
 360 extern std::shared_ptr<Cache> NewClockCache(
 361     size_t capacity, int num_shard_bits = -1,
 362     bool strict_capacity_limit = false,
 363     CacheMetadataChargePolicy metadata_charge_policy =
 364         kDefaultCacheMetadataChargePolicy);
 365
 366 class Cache {
 367  public:  // opaque types
 368   // Opaque handle to an entry stored in the cache.
 369   struct Handle {};
 370
 371  public:  // type defs
 372   // Depending on implementation, cache entries with higher priority levels
 373   // could be less likely to get evicted than entries with lower priority
 374   // levels. The "high" priority level applies to certain SST metablocks (e.g.
 375   // index and filter blocks) if the option
 376   // cache_index_and_filter_blocks_with_high_priority is set. The "low" priority
 377   // level is used for other kinds of SST blocks (most importantly, data
 378   // blocks), as well as the above metablocks in case
 379   // cache_index_and_filter_blocks_with_high_priority is
 380   // not set. The "bottom" priority level is for BlobDB's blob values.
 381   enum class Priority { HIGH, LOW, BOTTOM };
 382
 383   // A set of callbacks to allow objects in the primary block cache to be
 384   // be persisted in a secondary cache. The purpose of the secondary cache
 385   // is to support other ways of caching the object, such as persistent or
 386   // compressed data, that may require the object to be parsed and transformed
 387   // in some way. Since the primary cache holds C++ objects and the secondary
 388   // cache may only hold flat data that doesn't need relocation, these
 389   // callbacks need to be provided by the user of the block
 390   // cache to do the conversion.
 391   // The CacheItemHelper is passed to Insert() and Lookup(). It has pointers
 392   // to callback functions for size, saving and deletion of the
 393   // object. The callbacks are defined in C-style in order to make them
 394   // stateless and not add to the cache metadata size.
 395   // Saving multiple std::function objects will take up 32 bytes per
 396   // function, even if its not bound to an object and does no capture.
 397   //
 398   // All the callbacks are C-style function pointers in order to simplify
 399   // lifecycle management. Objects in the cache can outlive the parent DB,
 400   // so anything required for these operations should be contained in the
 401   // object itself.
 402   //
 403   // The SizeCallback takes a void* pointer to the object and returns the size
 404   // of the persistable data. It can be used by the secondary cache to allocate
 405   // memory if needed.
 406   //
 407   // RocksDB callbacks are NOT exception-safe. A callback completing with an
 408   // exception can lead to undefined behavior in RocksDB, including data loss,
 409   // unreported corruption, deadlocks, and more.
 410   using SizeCallback = size_t (*)(void* obj);
 411
 412   // The SaveToCallback takes a void* object pointer and saves the persistable
 413   // data into a buffer. The secondary cache may decide to not store it in a
 414   // contiguous buffer, in which case this callback will be called multiple
 415   // times with increasing offset
 416   using SaveToCallback = Status (*)(void* from_obj, size_t from_offset,
 417                                     size_t length, void* out);
 418
 419   // A function pointer type for custom destruction of an entry's
 420   // value. The Cache is responsible for copying and reclaiming space
 421   // for the key, but values are managed by the caller.
 422   using DeleterFn = void (*)(const Slice& key, void* value);
 423
 424   // A struct with pointers to helper functions for spilling items from the
 425   // cache into the secondary cache. May be extended in the future. An
 426   // instance of this struct is expected to outlive the cache.
 427   struct CacheItemHelper {
 428     SizeCallback size_cb;
 429     SaveToCallback saveto_cb;
 430     DeleterFn del_cb;
 431
 432     CacheItemHelper() : size_cb(nullptr), saveto_cb(nullptr), del_cb(nullptr) {}
 433     CacheItemHelper(SizeCallback _size_cb, SaveToCallback _saveto_cb,
 434                     DeleterFn _del_cb)
 435         : size_cb(_size_cb), saveto_cb(_saveto_cb), del_cb(_del_cb) {}
 436   };
 437
 438   // The CreateCallback is passed by the block cache user to Lookup(). It
 439   // takes in a buffer from the NVM cache and constructs an object using
 440   // it. The callback doesn't have ownership of the buffer and should
 441   // copy the contents into its own buffer.
 442   using CreateCallback = std::function<Status(const void* buf, size_t size,
 443                                               void** out_obj, size_t* charge)>;
 444
 445  public:  // ctor/dtor/create
 446   Cache(std::shared_ptr<MemoryAllocator> allocator = nullptr)
 447       : memory_allocator_(std::move(allocator)) {}
 448   // No copying allowed
 449   Cache(const Cache&) = delete;
 450   Cache& operator=(const Cache&) = delete;
 451
 452   // Destroys all remaining entries by calling the associated "deleter"
 453   virtual ~Cache() {}
 454
 455   // Creates a new Cache based on the input value string and returns the result.
 456   // Currently, this method can be used to create LRUCaches only
 457   // @param config_options
 458   // @param value  The value might be:
 459   //   - an old-style cache ("1M") -- equivalent to NewLRUCache(1024*102(
 460   //   - Name-value option pairs -- "capacity=1M; num_shard_bits=4;
 461   //     For the LRUCache, the values are defined in LRUCacheOptions.
 462   // @param result The new Cache object
 463   // @return OK if the cache was successfully created
 464   // @return NotFound if an invalid name was specified in the value
 465   // @return InvalidArgument if either the options were not valid
 466   static Status CreateFromString(const ConfigOptions& config_options,
 467                                  const std::string& value,
 468                                  std::shared_ptr<Cache>* result);
 469
 470  public:  // functions
 471   // The type of the Cache
 472   virtual const char* Name() const = 0;
 473
 474   // EXPERIMENTAL SecondaryCache support:
 475   // Some APIs here are experimental and might change in the future.
 476   // The Insert and Lookup APIs below are intended to allow cached objects
 477   // to be demoted/promoted between the primary block cache and a secondary
 478   // cache. The secondary cache could be a non-volatile cache, and will
 479   // likely store the object in a different representation. They rely on a
 480   // per object CacheItemHelper to do the conversions.
 481   // The secondary cache may persist across process and system restarts,
 482   // and may even be moved between hosts. Therefore, the cache key must
 483   // be repeatable across restarts/reboots, and globally unique if
 484   // multiple DBs share the same cache and the set of DBs can change
 485   // over time.
 486
 487   // Insert a mapping from key->value into the volatile cache only
 488   // and assign it with the specified charge against the total cache capacity.
 489   // If strict_capacity_limit is true and cache reaches its full capacity,
 490   // return Status::MemoryLimit.
 491   //
 492   // If handle is not nullptr, returns a handle that corresponds to the
 493   // mapping. The caller must call this->Release(handle) when the returned
 494   // mapping is no longer needed. In case of error caller is responsible to
 495   // cleanup the value (i.e. calling "deleter").
 496   //
 497   // If handle is nullptr, it is as if Release is called immediately after
 498   // insert. In case of error value will be cleanup.
 499   //
 500   // When the inserted entry is no longer needed, the key and
 501   // value will be passed to "deleter" which must delete the value.
 502   // (The Cache is responsible for copying and reclaiming space for
 503   // the key.)
 504   virtual Status Insert(const Slice& key, void* value, size_t charge,
 505                         DeleterFn deleter, Handle** handle = nullptr,
 506                         Priority priority = Priority::LOW) = 0;
 507
 508   // EXPERIMENTAL
 509   // Insert a mapping from key->value into the cache and assign it
 510   // the specified charge against the total cache capacity. If
 511   // strict_capacity_limit is true and cache reaches its full capacity,
 512   // return Status::MemoryLimit. `value` must be non-nullptr for this
 513   // Insert() because Value() == nullptr is reserved for indicating failure
 514   // with secondary-cache-compatible mappings.
 515   //
 516   // The helper argument is saved by the cache and will be used when the
 517   // inserted object is evicted or promoted to the secondary cache. It,
 518   // therefore, must outlive the cache.
 519   //
 520   // If handle is not nullptr, returns a handle that corresponds to the
 521   // mapping. The caller must call this->Release(handle) when the returned
 522   // mapping is no longer needed. In case of error caller is responsible to
 523   // cleanup the value (i.e. calling "deleter").
 524   //
 525   // If handle is nullptr, it is as if Release is called immediately after
 526   // insert. In case of error value will be cleanup.
 527   //
 528   // Regardless of whether the item was inserted into the cache,
 529   // it will attempt to insert it into the secondary cache if one is
 530   // configured, and the helper supports it.
 531   // The cache implementation must support a secondary cache, otherwise
 532   // the item is only inserted into the primary cache. It may
 533   // defer the insertion to the secondary cache as it sees fit.
 534   //
 535   // When the inserted entry is no longer needed, the key and
 536   // value will be passed to "deleter".
 537   virtual Status Insert(const Slice& key, void* value,
 538                         const CacheItemHelper* helper, size_t charge,
 539                         Handle** handle = nullptr,
 540                         Priority priority = Priority::LOW) {
 541     if (!helper) {
 542       return Status::InvalidArgument();
 543     }
 544     return Insert(key, value, charge, helper->del_cb, handle, priority);
 545   }
 546
 547   // If the cache has no mapping for "key", returns nullptr.
 548   //
 549   // Else return a handle that corresponds to the mapping.  The caller
 550   // must call this->Release(handle) when the returned mapping is no
 551   // longer needed.
 552   // If stats is not nullptr, relative tickers could be used inside the
 553   // function.
 554   virtual Handle* Lookup(const Slice& key, Statistics* stats = nullptr) = 0;
 555
 556   // EXPERIMENTAL
 557   // Lookup the key in the primary and secondary caches (if one is configured).
 558   // The create_cb callback function object will be used to contruct the
 559   // cached object.
 560   // If none of the caches have the mapping for the key, returns nullptr.
 561   // Else, returns a handle that corresponds to the mapping.
 562   //
 563   // This call may promote the object from the secondary cache (if one is
 564   // configured, and has the given key) to the primary cache.
 565   //
 566   // The helper argument should be provided if the caller wants the lookup
 567   // to include the secondary cache (if one is configured) and the object,
 568   // if it exists, to be promoted to the primary cache. The helper may be
 569   // saved and used later when the object is evicted. Therefore, it must
 570   // outlive the cache.
 571   //
 572   // ======================== Async Lookup (wait=false) ======================
 573   // When wait=false, the handle returned might be in any of three states:
 574   // * Present - If Value() != nullptr, then the result is present and
 575   // the handle can be used just as if wait=true.
 576   // * Pending, not ready (IsReady() == false) - secondary cache is still
 577   // working to retrieve the value. Might become ready any time.
 578   // * Pending, ready (IsReady() == true) - secondary cache has the value
 579   // but it has not been loaded into primary cache. Call to Wait()/WaitAll()
 580   // will not block.
 581   //
 582   // IMPORTANT: Pending handles are not thread-safe, and only these functions
 583   // are allowed on them: Value(), IsReady(), Wait(), WaitAll(). Even Release()
 584   // can only come after Wait() or WaitAll() even though a reference is held.
 585   //
 586   // Only Wait()/WaitAll() gets a Handle out of a Pending state. (Waiting is
 587   // safe and has no effect on other handle states.) After waiting on a Handle,
 588   // it is in one of two states:
 589   // * Present - if Value() != nullptr
 590   // * Failed - if Value() == nullptr, such as if the secondary cache
 591   // initially thought it had the value but actually did not.
 592   //
 593   // Note that given an arbitrary Handle, the only way to distinguish the
 594   // Pending+ready state from the Failed state is to Wait() on it. A cache
 595   // entry not compatible with secondary cache can also have Value()==nullptr
 596   // like the Failed state, but this is not generally a concern.
 597   virtual Handle* Lookup(const Slice& key, const CacheItemHelper* /*helper_cb*/,
 598                          const CreateCallback& /*create_cb*/,
 599                          Priority /*priority*/, bool /*wait*/,
 600                          Statistics* stats = nullptr) {
 601     return Lookup(key, stats);
 602   }
 603
 604   // Increments the reference count for the handle if it refers to an entry in
 605   // the cache. Returns true if refcount was incremented; otherwise, returns
 606   // false.
 607   // REQUIRES: handle must have been returned by a method on *this.
 608   virtual bool Ref(Handle* handle) = 0;
 609
 610   /**
 611    * Release a mapping returned by a previous Lookup(). A released entry might
 612    * still remain in cache in case it is later looked up by others. If
 613    * erase_if_last_ref is set then it also erases it from the cache if there is
 614    * no other reference to  it. Erasing it should call the deleter function that
 615    * was provided when the entry was inserted.
 616    *
 617    * Returns true if the entry was also erased.
 618    */
 619   // REQUIRES: handle must not have been released yet.
 620   // REQUIRES: handle must have been returned by a method on *this.
 621   virtual bool Release(Handle* handle, bool erase_if_last_ref = false) = 0;
 622
 623   // Return the value encapsulated in a handle returned by a
 624   // successful Lookup().
 625   // REQUIRES: handle must not have been released yet.
 626   // REQUIRES: handle must have been returned by a method on *this.
 627   virtual void* Value(Handle* handle) = 0;
 628
 629   // If the cache contains the entry for the key, erase it.  Note that the
 630   // underlying entry will be kept around until all existing handles
 631   // to it have been released.
 632   virtual void Erase(const Slice& key) = 0;
 633   // Return a new numeric id.  May be used by multiple clients who are
 634   // sharding the same cache to partition the key space.  Typically the
 635   // client will allocate a new id at startup and prepend the id to
 636   // its cache keys.
 637   virtual uint64_t NewId() = 0;
 638
 639   // sets the maximum configured capacity of the cache. When the new
 640   // capacity is less than the old capacity and the existing usage is
 641   // greater than new capacity, the implementation will do its best job to
 642   // purge the released entries from the cache in order to lower the usage
 643   virtual void SetCapacity(size_t capacity) = 0;
 644
 645   // Set whether to return error on insertion when cache reaches its full
 646   // capacity.
 647   virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
 648
 649   // Get the flag whether to return error on insertion when cache reaches its
 650   // full capacity.
 651   virtual bool HasStrictCapacityLimit() const = 0;
 652
 653   // Returns the maximum configured capacity of the cache
 654   virtual size_t GetCapacity() const = 0;
 655
 656   // Returns the memory size for the entries residing in the cache.
 657   virtual size_t GetUsage() const = 0;
 658
 659   // Returns the number of entries currently tracked in the table. SIZE_MAX
 660   // means "not supported." This is used for inspecting the load factor, along
 661   // with GetTableAddressCount().
 662   virtual size_t GetOccupancyCount() const { return SIZE_MAX; }
 663
 664   // Returns the number of ways the hash function is divided for addressing
 665   // entries. Zero means "not supported." This is used for inspecting the load
 666   // factor, along with GetOccupancyCount().
 667   virtual size_t GetTableAddressCount() const { return 0; }
 668
 669   // Returns the memory size for a specific entry in the cache.
 670   virtual size_t GetUsage(Handle* handle) const = 0;
 671
 672   // Returns the memory size for the entries in use by the system
 673   virtual size_t GetPinnedUsage() const = 0;
 674
 675   // Returns the charge for the specific entry in the cache.
 676   virtual size_t GetCharge(Handle* handle) const = 0;
 677
 678   // Returns the deleter for the specified entry. This might seem useless
 679   // as the Cache itself is responsible for calling the deleter, but
 680   // the deleter can essentially verify that a cache entry is of an
 681   // expected type from an expected code source.
 682   virtual DeleterFn GetDeleter(Handle* handle) const = 0;
 683
 684   // Call this on shutdown if you want to speed it up. Cache will disown
 685   // any underlying data and will not free it on delete. This call will leak
 686   // memory - call this only if you're shutting down the process.
 687   // Any attempts of using cache after this call will fail terribly.
 688   // Always delete the DB object before calling this method!
 689   virtual void DisownData() {
 690     // default implementation is noop
 691   }
 692
 693   struct ApplyToAllEntriesOptions {
 694     // If the Cache uses locks, setting `average_entries_per_lock` to
 695     // a higher value suggests iterating over more entries each time a lock
 696     // is acquired, likely reducing the time for ApplyToAllEntries but
 697     // increasing latency for concurrent users of the Cache. Setting
 698     // `average_entries_per_lock` to a smaller value could be helpful if
 699     // callback is relatively expensive, such as using large data structures.
 700     size_t average_entries_per_lock = 256;
 701   };
 702
 703   // Apply a callback to all entries in the cache. The Cache must ensure
 704   // thread safety but does not guarantee that a consistent snapshot of all
 705   // entries is iterated over if other threads are operating on the Cache
 706   // also.
 707   virtual void ApplyToAllEntries(
 708       const std::function<void(const Slice& key, void* value, size_t charge,
 709                                DeleterFn deleter)>& callback,
 710       const ApplyToAllEntriesOptions& opts) = 0;
 711
 712   // DEPRECATED version of above. (Default implementation uses above.)
 713   virtual void ApplyToAllCacheEntries(void (*callback)(void* value,
 714                                                        size_t charge),
 715                                       bool /*thread_safe*/) {
 716     ApplyToAllEntries([callback](const Slice&, void* value, size_t charge,
 717                                  DeleterFn) { callback(value, charge); },
 718                       {});
 719   }
 720
 721   // Remove all entries.
 722   // Prerequisite: no entry is referenced.
 723   virtual void EraseUnRefEntries() = 0;
 724
 725   virtual std::string GetPrintableOptions() const { return ""; }
 726
 727   // Check for any warnings or errors in the operation of the cache and
 728   // report them to the logger. This is intended only to be called
 729   // periodically so does not need to be very efficient. (Obscure calling
 730   // conventions for Logger inherited from env.h)
 731   virtual void ReportProblems(
 732       const std::shared_ptr<Logger>& /*info_log*/) const {}
 733
 734   MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); }
 735
 736   // EXPERIMENTAL
 737   // Release a mapping returned by a previous Lookup(). The "useful"
 738   // parameter specifies whether the data was actually used or not,
 739   // which may be used by the cache implementation to decide whether
 740   // to consider it as a hit for retention purposes. As noted elsewhere,
 741   // "pending" handles require Wait()/WaitAll() before Release().
 742   virtual bool Release(Handle* handle, bool /*useful*/,
 743                        bool erase_if_last_ref) {
 744     return Release(handle, erase_if_last_ref);
 745   }
 746
 747   // EXPERIMENTAL
 748   // Determines if the handle returned by Lookup() can give a value without
 749   // blocking, though Wait()/WaitAll() might be required to publish it to
 750   // Value(). See secondary cache compatible Lookup() above for details.
 751   // This call is not thread safe on "pending" handles.
 752   virtual bool IsReady(Handle* /*handle*/) { return true; }
 753
 754   // EXPERIMENTAL
 755   // Convert a "pending" handle into a full thread-shareable handle by
 756   // * If necessary, wait until secondary cache finishes loading the value.
 757   // * Construct the value for primary cache and set it in the handle.
 758   // Even after Wait() on a pending handle, the caller must check for
 759   // Value() == nullptr in case of failure. This call is not thread-safe
 760   // on pending handles. This call has no effect on non-pending handles.
 761   // See secondary cache compatible Lookup() above for details.
 762   virtual void Wait(Handle* /*handle*/) {}
 763
 764   // EXPERIMENTAL
 765   // Wait for a vector of handles to become ready. As with Wait(), the user
 766   // should check the Value() of each handle for nullptr. This call is not
 767   // thread-safe on pending handles.
 768   virtual void WaitAll(std::vector<Handle*>& /*handles*/) {}
 769
 770  private:
 771   std::shared_ptr<MemoryAllocator> memory_allocator_;
 772 };
 773
 774
 775 }  // namespace ROCKSDB_NAMESPACE