]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/cache.h
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / rocksdb / include / rocksdb / cache.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 // A Cache is an interface that maps keys to values. It has internal
11 // synchronization and may be safely accessed concurrently from
12 // multiple threads. It may automatically evict entries to make room
13 // for new entries. Values have a specified charge against the cache
14 // capacity. For example, a cache where the values are variable
15 // length strings, may use the length of the string as the charge for
16 // the string.
17 //
18 // A builtin cache implementation with a least-recently-used eviction
19 // policy is provided. Clients may use their own implementations if
20 // they want something more sophisticated (like scan-resistance, a
21 // custom eviction policy, variable cache sizing, etc.)
22
23 #pragma once
24
25 #include <cstdint>
26 #include <functional>
27 #include <memory>
28 #include <string>
29
30 #include "rocksdb/compression_type.h"
31 #include "rocksdb/memory_allocator.h"
32 #include "rocksdb/slice.h"
33 #include "rocksdb/statistics.h"
34 #include "rocksdb/status.h"
35
36 namespace ROCKSDB_NAMESPACE {
37
38 class Cache;
39 struct ConfigOptions;
40 class Logger;
41 class SecondaryCache;
42
43 // Classifications of block cache entries.
44 //
45 // Developer notes: Adding a new enum to this class requires corresponding
46 // updates to `kCacheEntryRoleToCamelString` and
47 // `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since
48 // `kNumCacheEntryRoles` assumes `kMisc` comes last.
49 enum class CacheEntryRole {
50 // Block-based table data block
51 kDataBlock,
52 // Block-based table filter block (full or partitioned)
53 kFilterBlock,
54 // Block-based table metadata block for partitioned filter
55 kFilterMetaBlock,
56 // OBSOLETE / DEPRECATED: old/removed block-based filter
57 kDeprecatedFilterBlock,
58 // Block-based table index block
59 kIndexBlock,
60 // Other kinds of block-based table block
61 kOtherBlock,
62 // WriteBufferManager's charge to account for its memtable usage
63 kWriteBuffer,
64 // Compression dictionary building buffer's charge to account for
65 // its memory usage
66 kCompressionDictionaryBuildingBuffer,
67 // Filter's charge to account for
68 // (new) bloom and ribbon filter construction's memory usage
69 kFilterConstruction,
70 // BlockBasedTableReader's charge to account for its memory usage
71 kBlockBasedTableReader,
72 // FileMetadata's charge to account for its memory usage
73 kFileMetadata,
74 // Blob value (when using the same cache as block cache and blob cache)
75 kBlobValue,
76 // Blob cache's charge to account for its memory usage (when using a
77 // separate block cache and blob cache)
78 kBlobCache,
79 // Default bucket, for miscellaneous cache entries. Do not use for
80 // entries that could potentially add up to large usage.
81 kMisc,
82 };
83 constexpr uint32_t kNumCacheEntryRoles =
84 static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
85
86 // Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`.
87 const std::string& GetCacheEntryRoleName(CacheEntryRole);
88
89 // For use with `GetMapProperty()` for property
90 // `DB::Properties::kBlockCacheEntryStats`. On success, the map will
91 // be populated with all keys that can be obtained from these functions.
92 struct BlockCacheEntryStatsMapKeys {
93 static const std::string& CacheId();
94 static const std::string& CacheCapacityBytes();
95 static const std::string& LastCollectionDurationSeconds();
96 static const std::string& LastCollectionAgeSeconds();
97
98 static std::string EntryCount(CacheEntryRole);
99 static std::string UsedBytes(CacheEntryRole);
100 static std::string UsedPercent(CacheEntryRole);
101 };
102
103 extern const bool kDefaultToAdaptiveMutex;
104
105 enum CacheMetadataChargePolicy {
106 // Only the `charge` of each entry inserted into a Cache counts against
107 // the `capacity`
108 kDontChargeCacheMetadata,
109 // In addition to the `charge`, the approximate space overheads in the
110 // Cache (in bytes) also count against `capacity`. These space overheads
111 // are for supporting fast Lookup and managing the lifetime of entries.
112 kFullChargeCacheMetadata
113 };
114 const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
115 kFullChargeCacheMetadata;
116
117 // Options shared betweeen various cache implementations that
118 // divide the key space into shards using hashing.
119 struct ShardedCacheOptions {
120 // Capacity of the cache, in the same units as the `charge` of each entry.
121 // This is typically measured in bytes, but can be a different unit if using
122 // kDontChargeCacheMetadata.
123 size_t capacity = 0;
124
125 // Cache is sharded into 2^num_shard_bits shards, by hash of key.
126 // If < 0, a good default is chosen based on the capacity and the
127 // implementation. (Mutex-based implementations are much more reliant
128 // on many shards for parallel scalability.)
129 int num_shard_bits = -1;
130
131 // If strict_capacity_limit is set, Insert() will fail if there is not
132 // enough capacity for the new entry along with all the existing referenced
133 // (pinned) cache entries. (Unreferenced cache entries are evicted as
134 // needed, sometimes immediately.) If strict_capacity_limit == false
135 // (default), Insert() never fails.
136 bool strict_capacity_limit = false;
137
138 // If non-nullptr, RocksDB will use this allocator instead of system
139 // allocator when allocating memory for cache blocks.
140 //
141 // Caveat: when the cache is used as block cache, the memory allocator is
142 // ignored when dealing with compression libraries that allocate memory
143 // internally (currently only XPRESS).
144 std::shared_ptr<MemoryAllocator> memory_allocator;
145
146 // See CacheMetadataChargePolicy
147 CacheMetadataChargePolicy metadata_charge_policy =
148 kDefaultCacheMetadataChargePolicy;
149
150 ShardedCacheOptions() {}
151 ShardedCacheOptions(
152 size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
153 std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
154 CacheMetadataChargePolicy _metadata_charge_policy =
155 kDefaultCacheMetadataChargePolicy)
156 : capacity(_capacity),
157 num_shard_bits(_num_shard_bits),
158 strict_capacity_limit(_strict_capacity_limit),
159 memory_allocator(std::move(_memory_allocator)),
160 metadata_charge_policy(_metadata_charge_policy) {}
161 };
162
163 struct LRUCacheOptions : public ShardedCacheOptions {
164 // Ratio of cache reserved for high-priority and low-priority entries,
165 // respectively. (See Cache::Priority below more information on the levels.)
166 // Valid values are between 0 and 1 (inclusive), and the sum of the two
167 // values cannot exceed 1.
168 //
169 // If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU
170 // list is maintained by the cache. Similarly, if low_pri_pool_ratio is
171 // greater than zero, a dedicated low-priority LRU list is maintained.
172 // There is also a bottom-priority LRU list, which is always enabled and not
173 // explicitly configurable. Entries are spilled over to the next available
174 // lower-priority pool if a certain pool's capacity is exceeded.
175 //
176 // Entries with cache hits are inserted into the highest priority LRU list
177 // available regardless of the entry's priority. Entries without hits
178 // are inserted into highest priority LRU list available whose priority
179 // does not exceed the entry's priority. (For example, high-priority items
180 // with no hits are placed in the high-priority pool if available;
181 // otherwise, they are placed in the low-priority pool if available;
182 // otherwise, they are placed in the bottom-priority pool.) This results
183 // in lower-priority entries without hits getting evicted from the cache
184 // sooner.
185 //
186 // Default values: high_pri_pool_ratio = 0.5 (which is referred to as
187 // "midpoint insertion"), low_pri_pool_ratio = 0
188 double high_pri_pool_ratio = 0.5;
189 double low_pri_pool_ratio = 0.0;
190
191 // Whether to use adaptive mutexes for cache shards. Note that adaptive
192 // mutexes need to be supported by the platform in order for this to have any
193 // effect. The default value is true if RocksDB is compiled with
194 // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
195 bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
196
197 // A SecondaryCache instance to use a the non-volatile tier.
198 std::shared_ptr<SecondaryCache> secondary_cache;
199
200 LRUCacheOptions() {}
201 LRUCacheOptions(size_t _capacity, int _num_shard_bits,
202 bool _strict_capacity_limit, double _high_pri_pool_ratio,
203 std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
204 bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
205 CacheMetadataChargePolicy _metadata_charge_policy =
206 kDefaultCacheMetadataChargePolicy,
207 double _low_pri_pool_ratio = 0.0)
208 : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
209 std::move(_memory_allocator),
210 _metadata_charge_policy),
211 high_pri_pool_ratio(_high_pri_pool_ratio),
212 low_pri_pool_ratio(_low_pri_pool_ratio),
213 use_adaptive_mutex(_use_adaptive_mutex) {}
214 };
215
216 // Create a new cache with a fixed size capacity. The cache is sharded
217 // to 2^num_shard_bits shards, by hash of the key. The total capacity
218 // is divided and evenly assigned to each shard. If strict_capacity_limit
219 // is set, insert to the cache will fail when cache is full. User can also
220 // set percentage of the cache reserves for high priority entries via
221 // high_pri_pool_pct.
222 // num_shard_bits = -1 means it is automatically determined: every shard
223 // will be at least 512KB and number of shard bits will not exceed 6.
224 extern std::shared_ptr<Cache> NewLRUCache(
225 size_t capacity, int num_shard_bits = -1,
226 bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
227 std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
228 bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
229 CacheMetadataChargePolicy metadata_charge_policy =
230 kDefaultCacheMetadataChargePolicy,
231 double low_pri_pool_ratio = 0.0);
232
233 extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
234
235 // EXPERIMENTAL
236 // Options structure for configuring a SecondaryCache instance based on
237 // LRUCache. The LRUCacheOptions.secondary_cache is not used and
238 // should not be set.
239 struct CompressedSecondaryCacheOptions : LRUCacheOptions {
240 // The compression method (if any) that is used to compress data.
241 CompressionType compression_type = CompressionType::kLZ4Compression;
242
243 // compress_format_version can have two values:
244 // compress_format_version == 1 -- decompressed size is not included in the
245 // block header.
246 // compress_format_version == 2 -- decompressed size is included in the block
247 // header in varint32 format.
248 uint32_t compress_format_version = 2;
249
250 // Enable the custom split and merge feature, which split the compressed value
251 // into chunks so that they may better fit jemalloc bins.
252 bool enable_custom_split_merge = false;
253
254 CompressedSecondaryCacheOptions() {}
255 CompressedSecondaryCacheOptions(
256 size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
257 double _high_pri_pool_ratio, double _low_pri_pool_ratio = 0.0,
258 std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
259 bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
260 CacheMetadataChargePolicy _metadata_charge_policy =
261 kDefaultCacheMetadataChargePolicy,
262 CompressionType _compression_type = CompressionType::kLZ4Compression,
263 uint32_t _compress_format_version = 2,
264 bool _enable_custom_split_merge = false)
265 : LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
266 _high_pri_pool_ratio, std::move(_memory_allocator),
267 _use_adaptive_mutex, _metadata_charge_policy,
268 _low_pri_pool_ratio),
269 compression_type(_compression_type),
270 compress_format_version(_compress_format_version),
271 enable_custom_split_merge(_enable_custom_split_merge) {}
272 };
273
274 // EXPERIMENTAL
275 // Create a new Secondary Cache that is implemented on top of LRUCache.
276 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
277 size_t capacity, int num_shard_bits = -1,
278 bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
279 double low_pri_pool_ratio = 0.0,
280 std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
281 bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
282 CacheMetadataChargePolicy metadata_charge_policy =
283 kDefaultCacheMetadataChargePolicy,
284 CompressionType compression_type = CompressionType::kLZ4Compression,
285 uint32_t compress_format_version = 2,
286 bool enable_custom_split_merge = false);
287
288 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
289 const CompressedSecondaryCacheOptions& opts);
290
291 // HyperClockCache - A lock-free Cache alternative for RocksDB block cache
292 // that offers much improved CPU efficiency vs. LRUCache under high parallel
293 // load or high contention, with some caveats:
294 // * Not a general Cache implementation: can only be used for
295 // BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
296 // compatible with HyperClockCache.
297 // * Requires an extra tuning parameter: see estimated_entry_charge below.
298 // Similarly, substantially changing the capacity with SetCapacity could
299 // harm efficiency.
300 // * SecondaryCache is not yet supported.
301 // * Cache priorities are less aggressively enforced, which could cause
302 // cache dilution from long range scans (unless they use fill_cache=false).
303 // * Can be worse for small caches, because if almost all of a cache shard is
304 // pinned (more likely with non-partitioned filters), then CLOCK eviction
305 // becomes very CPU intensive.
306 //
307 // See internal cache/clock_cache.h for full description.
308 struct HyperClockCacheOptions : public ShardedCacheOptions {
309 // The estimated average `charge` associated with cache entries. This is a
310 // critical configuration parameter for good performance from the hyper
311 // cache, because having a table size that is fixed at creation time greatly
312 // reduces the required synchronization between threads.
313 // * If the estimate is substantially too low (e.g. less than half the true
314 // average) then metadata space overhead with be substantially higher (e.g.
315 // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
316 // can slightly reduce cache hit rates, and slightly reduce access times due
317 // to the larger working memory size.
318 // * If the estimate is substantially too high (e.g. 25% higher than the true
319 // average) then there might not be sufficient slots in the hash table for
320 // both efficient operation and capacity utilization (hit rate). The hyper
321 // cache will evict entries to prevent load factors that could dramatically
322 // affect lookup times, instead letting the hit rate suffer by not utilizing
323 // the full capacity.
324 //
325 // A reasonable choice is the larger of block_size and metadata_block_size.
326 // When WriteBufferManager (and similar) charge memory usage to the block
327 // cache, this can lead to the same effect as estimate being too low, which
328 // is better than the opposite. Therefore, the general recommendation is to
329 // assume that other memory charged to block cache could be negligible, and
330 // ignore it in making the estimate.
331 //
332 // The best parameter choice based on a cache in use is given by
333 // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
334 // with kDontChargeCacheMetadata. More precisely with
335 // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
336 // GetOccupancyCount(). However, when the average value size might vary
337 // (e.g. balance between metadata and data blocks in cache), it is better
338 // to estimate toward the lower side than the higher side.
339 size_t estimated_entry_charge;
340
341 HyperClockCacheOptions(
342 size_t _capacity, size_t _estimated_entry_charge,
343 int _num_shard_bits = -1, bool _strict_capacity_limit = false,
344 std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
345 CacheMetadataChargePolicy _metadata_charge_policy =
346 kDefaultCacheMetadataChargePolicy)
347 : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
348 std::move(_memory_allocator),
349 _metadata_charge_policy),
350 estimated_entry_charge(_estimated_entry_charge) {}
351
352 // Construct an instance of HyperClockCache using these options
353 std::shared_ptr<Cache> MakeSharedCache() const;
354 };
355
356 // DEPRECATED - The old Clock Cache implementation had an unresolved bug and
357 // has been removed. The new HyperClockCache requires an additional
358 // configuration parameter that is not provided by this API. This function
359 // simply returns a new LRUCache for functional compatibility.
360 extern std::shared_ptr<Cache> NewClockCache(
361 size_t capacity, int num_shard_bits = -1,
362 bool strict_capacity_limit = false,
363 CacheMetadataChargePolicy metadata_charge_policy =
364 kDefaultCacheMetadataChargePolicy);
365
366 class Cache {
367 public: // opaque types
368 // Opaque handle to an entry stored in the cache.
369 struct Handle {};
370
371 public: // type defs
372 // Depending on implementation, cache entries with higher priority levels
373 // could be less likely to get evicted than entries with lower priority
374 // levels. The "high" priority level applies to certain SST metablocks (e.g.
375 // index and filter blocks) if the option
376 // cache_index_and_filter_blocks_with_high_priority is set. The "low" priority
377 // level is used for other kinds of SST blocks (most importantly, data
378 // blocks), as well as the above metablocks in case
379 // cache_index_and_filter_blocks_with_high_priority is
380 // not set. The "bottom" priority level is for BlobDB's blob values.
381 enum class Priority { HIGH, LOW, BOTTOM };
382
383 // A set of callbacks to allow objects in the primary block cache to be
384 // be persisted in a secondary cache. The purpose of the secondary cache
385 // is to support other ways of caching the object, such as persistent or
386 // compressed data, that may require the object to be parsed and transformed
387 // in some way. Since the primary cache holds C++ objects and the secondary
388 // cache may only hold flat data that doesn't need relocation, these
389 // callbacks need to be provided by the user of the block
390 // cache to do the conversion.
391 // The CacheItemHelper is passed to Insert() and Lookup(). It has pointers
392 // to callback functions for size, saving and deletion of the
393 // object. The callbacks are defined in C-style in order to make them
394 // stateless and not add to the cache metadata size.
395 // Saving multiple std::function objects will take up 32 bytes per
396 // function, even if its not bound to an object and does no capture.
397 //
398 // All the callbacks are C-style function pointers in order to simplify
399 // lifecycle management. Objects in the cache can outlive the parent DB,
400 // so anything required for these operations should be contained in the
401 // object itself.
402 //
403 // The SizeCallback takes a void* pointer to the object and returns the size
404 // of the persistable data. It can be used by the secondary cache to allocate
405 // memory if needed.
406 //
407 // RocksDB callbacks are NOT exception-safe. A callback completing with an
408 // exception can lead to undefined behavior in RocksDB, including data loss,
409 // unreported corruption, deadlocks, and more.
410 using SizeCallback = size_t (*)(void* obj);
411
412 // The SaveToCallback takes a void* object pointer and saves the persistable
413 // data into a buffer. The secondary cache may decide to not store it in a
414 // contiguous buffer, in which case this callback will be called multiple
415 // times with increasing offset
416 using SaveToCallback = Status (*)(void* from_obj, size_t from_offset,
417 size_t length, void* out);
418
419 // A function pointer type for custom destruction of an entry's
420 // value. The Cache is responsible for copying and reclaiming space
421 // for the key, but values are managed by the caller.
422 using DeleterFn = void (*)(const Slice& key, void* value);
423
424 // A struct with pointers to helper functions for spilling items from the
425 // cache into the secondary cache. May be extended in the future. An
426 // instance of this struct is expected to outlive the cache.
427 struct CacheItemHelper {
428 SizeCallback size_cb;
429 SaveToCallback saveto_cb;
430 DeleterFn del_cb;
431
432 CacheItemHelper() : size_cb(nullptr), saveto_cb(nullptr), del_cb(nullptr) {}
433 CacheItemHelper(SizeCallback _size_cb, SaveToCallback _saveto_cb,
434 DeleterFn _del_cb)
435 : size_cb(_size_cb), saveto_cb(_saveto_cb), del_cb(_del_cb) {}
436 };
437
438 // The CreateCallback is passed by the block cache user to Lookup(). It
439 // takes in a buffer from the NVM cache and constructs an object using
440 // it. The callback doesn't have ownership of the buffer and should
441 // copy the contents into its own buffer.
442 using CreateCallback = std::function<Status(const void* buf, size_t size,
443 void** out_obj, size_t* charge)>;
444
445 public: // ctor/dtor/create
446 Cache(std::shared_ptr<MemoryAllocator> allocator = nullptr)
447 : memory_allocator_(std::move(allocator)) {}
448 // No copying allowed
449 Cache(const Cache&) = delete;
450 Cache& operator=(const Cache&) = delete;
451
452 // Destroys all remaining entries by calling the associated "deleter"
453 virtual ~Cache() {}
454
455 // Creates a new Cache based on the input value string and returns the result.
456 // Currently, this method can be used to create LRUCaches only
457 // @param config_options
458 // @param value The value might be:
459 // - an old-style cache ("1M") -- equivalent to NewLRUCache(1024*102(
460 // - Name-value option pairs -- "capacity=1M; num_shard_bits=4;
461 // For the LRUCache, the values are defined in LRUCacheOptions.
462 // @param result The new Cache object
463 // @return OK if the cache was successfully created
464 // @return NotFound if an invalid name was specified in the value
465 // @return InvalidArgument if either the options were not valid
466 static Status CreateFromString(const ConfigOptions& config_options,
467 const std::string& value,
468 std::shared_ptr<Cache>* result);
469
470 public: // functions
471 // The type of the Cache
472 virtual const char* Name() const = 0;
473
474 // EXPERIMENTAL SecondaryCache support:
475 // Some APIs here are experimental and might change in the future.
476 // The Insert and Lookup APIs below are intended to allow cached objects
477 // to be demoted/promoted between the primary block cache and a secondary
478 // cache. The secondary cache could be a non-volatile cache, and will
479 // likely store the object in a different representation. They rely on a
480 // per object CacheItemHelper to do the conversions.
481 // The secondary cache may persist across process and system restarts,
482 // and may even be moved between hosts. Therefore, the cache key must
483 // be repeatable across restarts/reboots, and globally unique if
484 // multiple DBs share the same cache and the set of DBs can change
485 // over time.
486
487 // Insert a mapping from key->value into the volatile cache only
488 // and assign it with the specified charge against the total cache capacity.
489 // If strict_capacity_limit is true and cache reaches its full capacity,
490 // return Status::MemoryLimit.
491 //
492 // If handle is not nullptr, returns a handle that corresponds to the
493 // mapping. The caller must call this->Release(handle) when the returned
494 // mapping is no longer needed. In case of error caller is responsible to
495 // cleanup the value (i.e. calling "deleter").
496 //
497 // If handle is nullptr, it is as if Release is called immediately after
498 // insert. In case of error value will be cleanup.
499 //
500 // When the inserted entry is no longer needed, the key and
501 // value will be passed to "deleter" which must delete the value.
502 // (The Cache is responsible for copying and reclaiming space for
503 // the key.)
504 virtual Status Insert(const Slice& key, void* value, size_t charge,
505 DeleterFn deleter, Handle** handle = nullptr,
506 Priority priority = Priority::LOW) = 0;
507
508 // EXPERIMENTAL
509 // Insert a mapping from key->value into the cache and assign it
510 // the specified charge against the total cache capacity. If
511 // strict_capacity_limit is true and cache reaches its full capacity,
512 // return Status::MemoryLimit. `value` must be non-nullptr for this
513 // Insert() because Value() == nullptr is reserved for indicating failure
514 // with secondary-cache-compatible mappings.
515 //
516 // The helper argument is saved by the cache and will be used when the
517 // inserted object is evicted or promoted to the secondary cache. It,
518 // therefore, must outlive the cache.
519 //
520 // If handle is not nullptr, returns a handle that corresponds to the
521 // mapping. The caller must call this->Release(handle) when the returned
522 // mapping is no longer needed. In case of error caller is responsible to
523 // cleanup the value (i.e. calling "deleter").
524 //
525 // If handle is nullptr, it is as if Release is called immediately after
526 // insert. In case of error value will be cleanup.
527 //
528 // Regardless of whether the item was inserted into the cache,
529 // it will attempt to insert it into the secondary cache if one is
530 // configured, and the helper supports it.
531 // The cache implementation must support a secondary cache, otherwise
532 // the item is only inserted into the primary cache. It may
533 // defer the insertion to the secondary cache as it sees fit.
534 //
535 // When the inserted entry is no longer needed, the key and
536 // value will be passed to "deleter".
537 virtual Status Insert(const Slice& key, void* value,
538 const CacheItemHelper* helper, size_t charge,
539 Handle** handle = nullptr,
540 Priority priority = Priority::LOW) {
541 if (!helper) {
542 return Status::InvalidArgument();
543 }
544 return Insert(key, value, charge, helper->del_cb, handle, priority);
545 }
546
547 // If the cache has no mapping for "key", returns nullptr.
548 //
549 // Else return a handle that corresponds to the mapping. The caller
550 // must call this->Release(handle) when the returned mapping is no
551 // longer needed.
552 // If stats is not nullptr, relative tickers could be used inside the
553 // function.
554 virtual Handle* Lookup(const Slice& key, Statistics* stats = nullptr) = 0;
555
556 // EXPERIMENTAL
557 // Lookup the key in the primary and secondary caches (if one is configured).
558 // The create_cb callback function object will be used to contruct the
559 // cached object.
560 // If none of the caches have the mapping for the key, returns nullptr.
561 // Else, returns a handle that corresponds to the mapping.
562 //
563 // This call may promote the object from the secondary cache (if one is
564 // configured, and has the given key) to the primary cache.
565 //
566 // The helper argument should be provided if the caller wants the lookup
567 // to include the secondary cache (if one is configured) and the object,
568 // if it exists, to be promoted to the primary cache. The helper may be
569 // saved and used later when the object is evicted. Therefore, it must
570 // outlive the cache.
571 //
572 // ======================== Async Lookup (wait=false) ======================
573 // When wait=false, the handle returned might be in any of three states:
574 // * Present - If Value() != nullptr, then the result is present and
575 // the handle can be used just as if wait=true.
576 // * Pending, not ready (IsReady() == false) - secondary cache is still
577 // working to retrieve the value. Might become ready any time.
578 // * Pending, ready (IsReady() == true) - secondary cache has the value
579 // but it has not been loaded into primary cache. Call to Wait()/WaitAll()
580 // will not block.
581 //
582 // IMPORTANT: Pending handles are not thread-safe, and only these functions
583 // are allowed on them: Value(), IsReady(), Wait(), WaitAll(). Even Release()
584 // can only come after Wait() or WaitAll() even though a reference is held.
585 //
586 // Only Wait()/WaitAll() gets a Handle out of a Pending state. (Waiting is
587 // safe and has no effect on other handle states.) After waiting on a Handle,
588 // it is in one of two states:
589 // * Present - if Value() != nullptr
590 // * Failed - if Value() == nullptr, such as if the secondary cache
591 // initially thought it had the value but actually did not.
592 //
593 // Note that given an arbitrary Handle, the only way to distinguish the
594 // Pending+ready state from the Failed state is to Wait() on it. A cache
595 // entry not compatible with secondary cache can also have Value()==nullptr
596 // like the Failed state, but this is not generally a concern.
597 virtual Handle* Lookup(const Slice& key, const CacheItemHelper* /*helper_cb*/,
598 const CreateCallback& /*create_cb*/,
599 Priority /*priority*/, bool /*wait*/,
600 Statistics* stats = nullptr) {
601 return Lookup(key, stats);
602 }
603
604 // Increments the reference count for the handle if it refers to an entry in
605 // the cache. Returns true if refcount was incremented; otherwise, returns
606 // false.
607 // REQUIRES: handle must have been returned by a method on *this.
608 virtual bool Ref(Handle* handle) = 0;
609
610 /**
611 * Release a mapping returned by a previous Lookup(). A released entry might
612 * still remain in cache in case it is later looked up by others. If
613 * erase_if_last_ref is set then it also erases it from the cache if there is
614 * no other reference to it. Erasing it should call the deleter function that
615 * was provided when the entry was inserted.
616 *
617 * Returns true if the entry was also erased.
618 */
619 // REQUIRES: handle must not have been released yet.
620 // REQUIRES: handle must have been returned by a method on *this.
621 virtual bool Release(Handle* handle, bool erase_if_last_ref = false) = 0;
622
623 // Return the value encapsulated in a handle returned by a
624 // successful Lookup().
625 // REQUIRES: handle must not have been released yet.
626 // REQUIRES: handle must have been returned by a method on *this.
627 virtual void* Value(Handle* handle) = 0;
628
629 // If the cache contains the entry for the key, erase it. Note that the
630 // underlying entry will be kept around until all existing handles
631 // to it have been released.
632 virtual void Erase(const Slice& key) = 0;
633 // Return a new numeric id. May be used by multiple clients who are
634 // sharding the same cache to partition the key space. Typically the
635 // client will allocate a new id at startup and prepend the id to
636 // its cache keys.
637 virtual uint64_t NewId() = 0;
638
639 // sets the maximum configured capacity of the cache. When the new
640 // capacity is less than the old capacity and the existing usage is
641 // greater than new capacity, the implementation will do its best job to
642 // purge the released entries from the cache in order to lower the usage
643 virtual void SetCapacity(size_t capacity) = 0;
644
645 // Set whether to return error on insertion when cache reaches its full
646 // capacity.
647 virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
648
649 // Get the flag whether to return error on insertion when cache reaches its
650 // full capacity.
651 virtual bool HasStrictCapacityLimit() const = 0;
652
653 // Returns the maximum configured capacity of the cache
654 virtual size_t GetCapacity() const = 0;
655
656 // Returns the memory size for the entries residing in the cache.
657 virtual size_t GetUsage() const = 0;
658
659 // Returns the number of entries currently tracked in the table. SIZE_MAX
660 // means "not supported." This is used for inspecting the load factor, along
661 // with GetTableAddressCount().
662 virtual size_t GetOccupancyCount() const { return SIZE_MAX; }
663
664 // Returns the number of ways the hash function is divided for addressing
665 // entries. Zero means "not supported." This is used for inspecting the load
666 // factor, along with GetOccupancyCount().
667 virtual size_t GetTableAddressCount() const { return 0; }
668
669 // Returns the memory size for a specific entry in the cache.
670 virtual size_t GetUsage(Handle* handle) const = 0;
671
672 // Returns the memory size for the entries in use by the system
673 virtual size_t GetPinnedUsage() const = 0;
674
675 // Returns the charge for the specific entry in the cache.
676 virtual size_t GetCharge(Handle* handle) const = 0;
677
678 // Returns the deleter for the specified entry. This might seem useless
679 // as the Cache itself is responsible for calling the deleter, but
680 // the deleter can essentially verify that a cache entry is of an
681 // expected type from an expected code source.
682 virtual DeleterFn GetDeleter(Handle* handle) const = 0;
683
684 // Call this on shutdown if you want to speed it up. Cache will disown
685 // any underlying data and will not free it on delete. This call will leak
686 // memory - call this only if you're shutting down the process.
687 // Any attempts of using cache after this call will fail terribly.
688 // Always delete the DB object before calling this method!
689 virtual void DisownData() {
690 // default implementation is noop
691 }
692
693 struct ApplyToAllEntriesOptions {
694 // If the Cache uses locks, setting `average_entries_per_lock` to
695 // a higher value suggests iterating over more entries each time a lock
696 // is acquired, likely reducing the time for ApplyToAllEntries but
697 // increasing latency for concurrent users of the Cache. Setting
698 // `average_entries_per_lock` to a smaller value could be helpful if
699 // callback is relatively expensive, such as using large data structures.
700 size_t average_entries_per_lock = 256;
701 };
702
703 // Apply a callback to all entries in the cache. The Cache must ensure
704 // thread safety but does not guarantee that a consistent snapshot of all
705 // entries is iterated over if other threads are operating on the Cache
706 // also.
707 virtual void ApplyToAllEntries(
708 const std::function<void(const Slice& key, void* value, size_t charge,
709 DeleterFn deleter)>& callback,
710 const ApplyToAllEntriesOptions& opts) = 0;
711
712 // DEPRECATED version of above. (Default implementation uses above.)
713 virtual void ApplyToAllCacheEntries(void (*callback)(void* value,
714 size_t charge),
715 bool /*thread_safe*/) {
716 ApplyToAllEntries([callback](const Slice&, void* value, size_t charge,
717 DeleterFn) { callback(value, charge); },
718 {});
719 }
720
721 // Remove all entries.
722 // Prerequisite: no entry is referenced.
723 virtual void EraseUnRefEntries() = 0;
724
725 virtual std::string GetPrintableOptions() const { return ""; }
726
727 // Check for any warnings or errors in the operation of the cache and
728 // report them to the logger. This is intended only to be called
729 // periodically so does not need to be very efficient. (Obscure calling
730 // conventions for Logger inherited from env.h)
731 virtual void ReportProblems(
732 const std::shared_ptr<Logger>& /*info_log*/) const {}
733
734 MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); }
735
736 // EXPERIMENTAL
737 // Release a mapping returned by a previous Lookup(). The "useful"
738 // parameter specifies whether the data was actually used or not,
739 // which may be used by the cache implementation to decide whether
740 // to consider it as a hit for retention purposes. As noted elsewhere,
741 // "pending" handles require Wait()/WaitAll() before Release().
742 virtual bool Release(Handle* handle, bool /*useful*/,
743 bool erase_if_last_ref) {
744 return Release(handle, erase_if_last_ref);
745 }
746
747 // EXPERIMENTAL
748 // Determines if the handle returned by Lookup() can give a value without
749 // blocking, though Wait()/WaitAll() might be required to publish it to
750 // Value(). See secondary cache compatible Lookup() above for details.
751 // This call is not thread safe on "pending" handles.
752 virtual bool IsReady(Handle* /*handle*/) { return true; }
753
754 // EXPERIMENTAL
755 // Convert a "pending" handle into a full thread-shareable handle by
756 // * If necessary, wait until secondary cache finishes loading the value.
757 // * Construct the value for primary cache and set it in the handle.
758 // Even after Wait() on a pending handle, the caller must check for
759 // Value() == nullptr in case of failure. This call is not thread-safe
760 // on pending handles. This call has no effect on non-pending handles.
761 // See secondary cache compatible Lookup() above for details.
762 virtual void Wait(Handle* /*handle*/) {}
763
764 // EXPERIMENTAL
765 // Wait for a vector of handles to become ready. As with Wait(), the user
766 // should check the Value() of each handle for nullptr. This call is not
767 // thread-safe on pending handles.
768 virtual void WaitAll(std::vector<Handle*>& /*handles*/) {}
769
770 private:
771 std::shared_ptr<MemoryAllocator> memory_allocator_;
772 };
773
774
775 } // namespace ROCKSDB_NAMESPACE