]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/cache/clock_cache.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / cache / clock_cache.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "cache/clock_cache.h"
11
12 #ifndef SUPPORT_CLOCK_CACHE
13
14 namespace rocksdb {
15
16 std::shared_ptr<Cache> NewClockCache(size_t /*capacity*/, int /*num_shard_bits*/,
17 bool /*strict_capacity_limit*/) {
18 // Clock cache not supported.
19 return nullptr;
20 }
21
22 } // namespace rocksdb
23
24 #else
25
26 #include <assert.h>
27 #include <atomic>
28 #include <deque>
29
30 // "tbb/concurrent_hash_map.h" requires RTTI if exception is enabled.
31 // Disable it so users can chooose to disable RTTI.
32 #ifndef ROCKSDB_USE_RTTI
33 #define TBB_USE_EXCEPTIONS 0
34 #endif
35 #include "tbb/concurrent_hash_map.h"
36
37 #include "cache/sharded_cache.h"
38 #include "port/port.h"
39 #include "util/autovector.h"
40 #include "util/mutexlock.h"
41
42 namespace rocksdb {
43
44 namespace {
45
46 // An implementation of the Cache interface based on CLOCK algorithm, with
47 // better concurrent performance than LRUCache. The idea of CLOCK algorithm
48 // is to maintain all cache entries in a circular list, and an iterator
49 // (the "head") pointing to the last examined entry. Eviction starts from the
50 // current head. Each entry is given a second chance before eviction, if it
51 // has been access since last examine. In contrast to LRU, no modification
52 // to the internal data-structure (except for flipping the usage bit) needs
53 // to be done upon lookup. This gives us oppertunity to implement a cache
54 // with better concurrency.
55 //
56 // Each cache entry is represented by a cache handle, and all the handles
57 // are arranged in a circular list, as describe above. Upon erase of an entry,
58 // we never remove the handle. Instead, the handle is put into a recycle bin
59 // to be re-use. This is to avoid memory dealocation, which is hard to deal
60 // with in concurrent environment.
61 //
62 // The cache also maintains a concurrent hash map for lookup. Any concurrent
63 // hash map implementation should do the work. We currently use
64 // tbb::concurrent_hash_map because it supports concurrent erase.
65 //
66 // Each cache handle has the following flags and counters, which are squeeze
67 // in an atomic interger, to make sure the handle always be in a consistent
68 // state:
69 //
70 // * In-cache bit: whether the entry is reference by the cache itself. If
71 // an entry is in cache, its key would also be available in the hash map.
72 // * Usage bit: whether the entry has been access by user since last
73 // examine for eviction. Can be reset by eviction.
74 // * Reference count: reference count by user.
75 //
76 // An entry can be reference only when it's in cache. An entry can be evicted
77 // only when it is in cache, has no usage since last examine, and reference
78 // count is zero.
79 //
80 // The follow figure shows a possible layout of the cache. Boxes represents
81 // cache handles and numbers in each box being in-cache bit, usage bit and
82 // reference count respectively.
83 //
84 // hash map:
85 // +-------+--------+
86 // | key | handle |
87 // +-------+--------+
88 // | "foo" | 5 |-------------------------------------+
89 // +-------+--------+ |
90 // | "bar" | 2 |--+ |
91 // +-------+--------+ | |
92 // | |
93 // head | |
94 // | | |
95 // circular list: | | |
96 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
97 // |(0,0,0)|---|(1,1,0)|---|(0,0,0)|---|(0,1,3)|---|(1,0,0)|---| ...
98 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
99 // | |
100 // +-------+ +-----------+
101 // | |
102 // +---+---+
103 // recycle bin: | 1 | 3 |
104 // +---+---+
105 //
106 // Suppose we try to insert "baz" into the cache at this point and the cache is
107 // full. The cache will first look for entries to evict, starting from where
108 // head points to (the second entry). It resets usage bit of the second entry,
109 // skips the third and fourth entry since they are not in cache, and finally
110 // evict the fifth entry ("foo"). It looks at recycle bin for available handle,
111 // grabs handle 3, and insert the key into the handle. The following figure
112 // shows the resulting layout.
113 //
114 // hash map:
115 // +-------+--------+
116 // | key | handle |
117 // +-------+--------+
118 // | "baz" | 3 |-------------+
119 // +-------+--------+ |
120 // | "bar" | 2 |--+ |
121 // +-------+--------+ | |
122 // | |
123 // | | head
124 // | | |
125 // circular list: | | |
126 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
127 // |(0,0,0)|---|(1,0,0)|---|(1,0,0)|---|(0,1,3)|---|(0,0,0)|---| ...
128 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
129 // | |
130 // +-------+ +-----------------------------------+
131 // | |
132 // +---+---+
133 // recycle bin: | 1 | 5 |
134 // +---+---+
135 //
136 // A global mutex guards the circular list, the head, and the recycle bin.
137 // We additionally require that modifying the hash map needs to hold the mutex.
138 // As such, Modifying the cache (such as Insert() and Erase()) require to
139 // hold the mutex. Lookup() only access the hash map and the flags associated
140 // with each handle, and don't require explicit locking. Release() has to
141 // acquire the mutex only when it releases the last reference to the entry and
142 // the entry has been erased from cache explicitly. A future improvement could
143 // be to remove the mutex completely.
144 //
145 // Benchmark:
146 // We run readrandom db_bench on a test DB of size 13GB, with size of each
147 // level:
148 //
149 // Level Files Size(MB)
150 // -------------------------
151 // L0 1 0.01
152 // L1 18 17.32
153 // L2 230 182.94
154 // L3 1186 1833.63
155 // L4 4602 8140.30
156 //
157 // We test with both 32 and 16 read threads, with 2GB cache size (the whole DB
158 // doesn't fits in) and 64GB cache size (the whole DB can fit in cache), and
159 // whether to put index and filter blocks in block cache. The benchmark runs
160 // with
161 // with RocksDB 4.10. We got the following result:
162 //
163 // Threads Cache Cache ClockCache LRUCache
164 // Size Index/Filter Throughput(MB/s) Hit Throughput(MB/s) Hit
165 // 32 2GB yes 466.7 85.9% 433.7 86.5%
166 // 32 2GB no 529.9 72.7% 532.7 73.9%
167 // 32 64GB yes 649.9 99.9% 507.9 99.9%
168 // 32 64GB no 740.4 99.9% 662.8 99.9%
169 // 16 2GB yes 278.4 85.9% 283.4 86.5%
170 // 16 2GB no 318.6 72.7% 335.8 73.9%
171 // 16 64GB yes 391.9 99.9% 353.3 99.9%
172 // 16 64GB no 433.8 99.8% 419.4 99.8%
173
174 // Cache entry meta data.
175 struct CacheHandle {
176 Slice key;
177 uint32_t hash;
178 void* value;
179 size_t charge;
180 void (*deleter)(const Slice&, void* value);
181
182 // Flags and counters associated with the cache handle:
183 // lowest bit: n-cache bit
184 // second lowest bit: usage bit
185 // the rest bits: reference count
186 // The handle is unused when flags equals to 0. The thread decreases the count
187 // to 0 is responsible to put the handle back to recycle_ and cleanup memory.
188 std::atomic<uint32_t> flags;
189
190 CacheHandle() = default;
191
192 CacheHandle(const CacheHandle& a) { *this = a; }
193
194 CacheHandle(const Slice& k, void* v,
195 void (*del)(const Slice& key, void* value))
196 : key(k), value(v), deleter(del) {}
197
198 CacheHandle& operator=(const CacheHandle& a) {
199 // Only copy members needed for deletion.
200 key = a.key;
201 value = a.value;
202 deleter = a.deleter;
203 return *this;
204 }
205 };
206
207 // Key of hash map. We store hash value with the key for convenience.
208 struct CacheKey {
209 Slice key;
210 uint32_t hash_value;
211
212 CacheKey() = default;
213
214 CacheKey(const Slice& k, uint32_t h) {
215 key = k;
216 hash_value = h;
217 }
218
219 static bool equal(const CacheKey& a, const CacheKey& b) {
220 return a.hash_value == b.hash_value && a.key == b.key;
221 }
222
223 static size_t hash(const CacheKey& a) {
224 return static_cast<size_t>(a.hash_value);
225 }
226 };
227
228 struct CleanupContext {
229 // List of values to be deleted, along with the key and deleter.
230 autovector<CacheHandle> to_delete_value;
231
232 // List of keys to be deleted.
233 autovector<const char*> to_delete_key;
234 };
235
236 // A cache shard which maintains its own CLOCK cache.
237 class ClockCacheShard final : public CacheShard {
238 public:
239 // Hash map type.
240 typedef tbb::concurrent_hash_map<CacheKey, CacheHandle*, CacheKey> HashTable;
241
242 ClockCacheShard();
243 ~ClockCacheShard() override;
244
245 // Interfaces
246 void SetCapacity(size_t capacity) override;
247 void SetStrictCapacityLimit(bool strict_capacity_limit) override;
248 Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
249 void (*deleter)(const Slice& key, void* value),
250 Cache::Handle** handle, Cache::Priority priority) override;
251 Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
252 // If the entry in in cache, increase reference count and return true.
253 // Return false otherwise.
254 //
255 // Not necessary to hold mutex_ before being called.
256 bool Ref(Cache::Handle* handle) override;
257 bool Release(Cache::Handle* handle, bool force_erase = false) override;
258 void Erase(const Slice& key, uint32_t hash) override;
259 bool EraseAndConfirm(const Slice& key, uint32_t hash,
260 CleanupContext* context);
261 size_t GetUsage() const override;
262 size_t GetPinnedUsage() const override;
263 void EraseUnRefEntries() override;
264 void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
265 bool thread_safe) override;
266
267 private:
268 static const uint32_t kInCacheBit = 1;
269 static const uint32_t kUsageBit = 2;
270 static const uint32_t kRefsOffset = 2;
271 static const uint32_t kOneRef = 1 << kRefsOffset;
272
273 // Helper functions to extract cache handle flags and counters.
274 static bool InCache(uint32_t flags) { return flags & kInCacheBit; }
275 static bool HasUsage(uint32_t flags) { return flags & kUsageBit; }
276 static uint32_t CountRefs(uint32_t flags) { return flags >> kRefsOffset; }
277
278 // Decrease reference count of the entry. If this decreases the count to 0,
279 // recycle the entry. If set_usage is true, also set the usage bit.
280 //
281 // returns true if a value is erased.
282 //
283 // Not necessary to hold mutex_ before being called.
284 bool Unref(CacheHandle* handle, bool set_usage, CleanupContext* context);
285
286 // Unset in-cache bit of the entry. Recycle the handle if necessary.
287 //
288 // returns true if a value is erased.
289 //
290 // Has to hold mutex_ before being called.
291 bool UnsetInCache(CacheHandle* handle, CleanupContext* context);
292
293 // Put the handle back to recycle_ list, and put the value associated with
294 // it into to-be-deleted list. It doesn't cleanup the key as it might be
295 // reused by another handle.
296 //
297 // Has to hold mutex_ before being called.
298 void RecycleHandle(CacheHandle* handle, CleanupContext* context);
299
300 // Delete keys and values in to-be-deleted list. Call the method without
301 // holding mutex, as destructors can be expensive.
302 void Cleanup(const CleanupContext& context);
303
304 // Examine the handle for eviction. If the handle is in cache, usage bit is
305 // not set, and referece count is 0, evict it from cache. Otherwise unset
306 // the usage bit.
307 //
308 // Has to hold mutex_ before being called.
309 bool TryEvict(CacheHandle* value, CleanupContext* context);
310
311 // Scan through the circular list, evict entries until we get enough capacity
312 // for new cache entry of specific size. Return true if success, false
313 // otherwise.
314 //
315 // Has to hold mutex_ before being called.
316 bool EvictFromCache(size_t charge, CleanupContext* context);
317
318 CacheHandle* Insert(const Slice& key, uint32_t hash, void* value,
319 size_t change,
320 void (*deleter)(const Slice& key, void* value),
321 bool hold_reference, CleanupContext* context);
322
323 // Guards list_, head_, and recycle_. In addition, updating table_ also has
324 // to hold the mutex, to avoid the cache being in inconsistent state.
325 mutable port::Mutex mutex_;
326
327 // The circular list of cache handles. Initially the list is empty. Once a
328 // handle is needed by insertion, and no more handles are available in
329 // recycle bin, one more handle is appended to the end.
330 //
331 // We use std::deque for the circular list because we want to make sure
332 // pointers to handles are valid through out the life-cycle of the cache
333 // (in contrast to std::vector), and be able to grow the list (in contrast
334 // to statically allocated arrays).
335 std::deque<CacheHandle> list_;
336
337 // Pointer to the next handle in the circular list to be examine for
338 // eviction.
339 size_t head_;
340
341 // Recycle bin of cache handles.
342 autovector<CacheHandle*> recycle_;
343
344 // Maximum cache size.
345 std::atomic<size_t> capacity_;
346
347 // Current total size of the cache.
348 std::atomic<size_t> usage_;
349
350 // Total un-released cache size.
351 std::atomic<size_t> pinned_usage_;
352
353 // Whether allow insert into cache if cache is full.
354 std::atomic<bool> strict_capacity_limit_;
355
356 // Hash table (tbb::concurrent_hash_map) for lookup.
357 HashTable table_;
358 };
359
360 ClockCacheShard::ClockCacheShard()
361 : head_(0), usage_(0), pinned_usage_(0), strict_capacity_limit_(false) {}
362
363 ClockCacheShard::~ClockCacheShard() {
364 for (auto& handle : list_) {
365 uint32_t flags = handle.flags.load(std::memory_order_relaxed);
366 if (InCache(flags) || CountRefs(flags) > 0) {
367 if (handle.deleter != nullptr) {
368 (*handle.deleter)(handle.key, handle.value);
369 }
370 delete[] handle.key.data();
371 }
372 }
373 }
374
375 size_t ClockCacheShard::GetUsage() const {
376 return usage_.load(std::memory_order_relaxed);
377 }
378
379 size_t ClockCacheShard::GetPinnedUsage() const {
380 return pinned_usage_.load(std::memory_order_relaxed);
381 }
382
383 void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
384 bool thread_safe) {
385 if (thread_safe) {
386 mutex_.Lock();
387 }
388 for (auto& handle : list_) {
389 // Use relaxed semantics instead of acquire semantics since we are either
390 // holding mutex, or don't have thread safe requirement.
391 uint32_t flags = handle.flags.load(std::memory_order_relaxed);
392 if (InCache(flags)) {
393 callback(handle.value, handle.charge);
394 }
395 }
396 if (thread_safe) {
397 mutex_.Unlock();
398 }
399 }
400
401 void ClockCacheShard::RecycleHandle(CacheHandle* handle,
402 CleanupContext* context) {
403 mutex_.AssertHeld();
404 assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
405 context->to_delete_key.push_back(handle->key.data());
406 context->to_delete_value.emplace_back(*handle);
407 handle->key.clear();
408 handle->value = nullptr;
409 handle->deleter = nullptr;
410 recycle_.push_back(handle);
411 usage_.fetch_sub(handle->charge, std::memory_order_relaxed);
412 }
413
414 void ClockCacheShard::Cleanup(const CleanupContext& context) {
415 for (const CacheHandle& handle : context.to_delete_value) {
416 if (handle.deleter) {
417 (*handle.deleter)(handle.key, handle.value);
418 }
419 }
420 for (const char* key : context.to_delete_key) {
421 delete[] key;
422 }
423 }
424
425 bool ClockCacheShard::Ref(Cache::Handle* h) {
426 auto handle = reinterpret_cast<CacheHandle*>(h);
427 // CAS loop to increase reference count.
428 uint32_t flags = handle->flags.load(std::memory_order_relaxed);
429 while (InCache(flags)) {
430 // Use acquire semantics on success, as further operations on the cache
431 // entry has to be order after reference count is increased.
432 if (handle->flags.compare_exchange_weak(flags, flags + kOneRef,
433 std::memory_order_acquire,
434 std::memory_order_relaxed)) {
435 if (CountRefs(flags) == 0) {
436 // No reference count before the operation.
437 pinned_usage_.fetch_add(handle->charge, std::memory_order_relaxed);
438 }
439 return true;
440 }
441 }
442 return false;
443 }
444
445 bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
446 CleanupContext* context) {
447 if (set_usage) {
448 handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
449 }
450 // Use acquire-release semantics as previous operations on the cache entry
451 // has to be order before reference count is decreased, and potential cleanup
452 // of the entry has to be order after.
453 uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel);
454 assert(CountRefs(flags) > 0);
455 if (CountRefs(flags) == 1) {
456 // this is the last reference.
457 pinned_usage_.fetch_sub(handle->charge, std::memory_order_relaxed);
458 // Cleanup if it is the last reference.
459 if (!InCache(flags)) {
460 MutexLock l(&mutex_);
461 RecycleHandle(handle, context);
462 }
463 }
464 return context->to_delete_value.size();
465 }
466
467 bool ClockCacheShard::UnsetInCache(CacheHandle* handle,
468 CleanupContext* context) {
469 mutex_.AssertHeld();
470 // Use acquire-release semantics as previous operations on the cache entry
471 // has to be order before reference count is decreased, and potential cleanup
472 // of the entry has to be order after.
473 uint32_t flags =
474 handle->flags.fetch_and(~kInCacheBit, std::memory_order_acq_rel);
475 // Cleanup if it is the last reference.
476 if (InCache(flags) && CountRefs(flags) == 0) {
477 RecycleHandle(handle, context);
478 }
479 return context->to_delete_value.size();
480 }
481
482 bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
483 mutex_.AssertHeld();
484 uint32_t flags = kInCacheBit;
485 if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
486 std::memory_order_relaxed)) {
487 bool erased __attribute__((__unused__)) =
488 table_.erase(CacheKey(handle->key, handle->hash));
489 assert(erased);
490 RecycleHandle(handle, context);
491 return true;
492 }
493 handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed);
494 return false;
495 }
496
497 bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) {
498 size_t usage = usage_.load(std::memory_order_relaxed);
499 size_t capacity = capacity_.load(std::memory_order_relaxed);
500 if (usage == 0) {
501 return charge <= capacity;
502 }
503 size_t new_head = head_;
504 bool second_iteration = false;
505 while (usage + charge > capacity) {
506 assert(new_head < list_.size());
507 if (TryEvict(&list_[new_head], context)) {
508 usage = usage_.load(std::memory_order_relaxed);
509 }
510 new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1;
511 if (new_head == head_) {
512 if (second_iteration) {
513 return false;
514 } else {
515 second_iteration = true;
516 }
517 }
518 }
519 head_ = new_head;
520 return true;
521 }
522
523 void ClockCacheShard::SetCapacity(size_t capacity) {
524 CleanupContext context;
525 {
526 MutexLock l(&mutex_);
527 capacity_.store(capacity, std::memory_order_relaxed);
528 EvictFromCache(0, &context);
529 }
530 Cleanup(context);
531 }
532
533 void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
534 strict_capacity_limit_.store(strict_capacity_limit,
535 std::memory_order_relaxed);
536 }
537
538 CacheHandle* ClockCacheShard::Insert(
539 const Slice& key, uint32_t hash, void* value, size_t charge,
540 void (*deleter)(const Slice& key, void* value), bool hold_reference,
541 CleanupContext* context) {
542 MutexLock l(&mutex_);
543 bool success = EvictFromCache(charge, context);
544 bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
545 if (!success && (strict || !hold_reference)) {
546 context->to_delete_key.push_back(key.data());
547 if (!hold_reference) {
548 context->to_delete_value.emplace_back(key, value, deleter);
549 }
550 return nullptr;
551 }
552 // Grab available handle from recycle bin. If recycle bin is empty, create
553 // and append new handle to end of circular list.
554 CacheHandle* handle = nullptr;
555 if (!recycle_.empty()) {
556 handle = recycle_.back();
557 recycle_.pop_back();
558 } else {
559 list_.emplace_back();
560 handle = &list_.back();
561 }
562 // Fill handle.
563 handle->key = key;
564 handle->hash = hash;
565 handle->value = value;
566 handle->charge = charge;
567 handle->deleter = deleter;
568 uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit;
569 handle->flags.store(flags, std::memory_order_relaxed);
570 HashTable::accessor accessor;
571 if (table_.find(accessor, CacheKey(key, hash))) {
572 CacheHandle* existing_handle = accessor->second;
573 table_.erase(accessor);
574 UnsetInCache(existing_handle, context);
575 }
576 table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
577 if (hold_reference) {
578 pinned_usage_.fetch_add(charge, std::memory_order_relaxed);
579 }
580 usage_.fetch_add(charge, std::memory_order_relaxed);
581 return handle;
582 }
583
584 Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
585 size_t charge,
586 void (*deleter)(const Slice& key, void* value),
587 Cache::Handle** out_handle,
588 Cache::Priority /*priority*/) {
589 CleanupContext context;
590 HashTable::accessor accessor;
591 char* key_data = new char[key.size()];
592 memcpy(key_data, key.data(), key.size());
593 Slice key_copy(key_data, key.size());
594 CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter,
595 out_handle != nullptr, &context);
596 Status s;
597 if (out_handle != nullptr) {
598 if (handle == nullptr) {
599 s = Status::Incomplete("Insert failed due to LRU cache being full.");
600 } else {
601 *out_handle = reinterpret_cast<Cache::Handle*>(handle);
602 }
603 }
604 Cleanup(context);
605 return s;
606 }
607
608 Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
609 HashTable::const_accessor accessor;
610 if (!table_.find(accessor, CacheKey(key, hash))) {
611 return nullptr;
612 }
613 CacheHandle* handle = accessor->second;
614 accessor.release();
615 // Ref() could fail if another thread sneak in and evict/erase the cache
616 // entry before we are able to hold reference.
617 if (!Ref(reinterpret_cast<Cache::Handle*>(handle))) {
618 return nullptr;
619 }
620 // Double check the key since the handle may now representing another key
621 // if other threads sneak in, evict/erase the entry and re-used the handle
622 // for another cache entry.
623 if (hash != handle->hash || key != handle->key) {
624 CleanupContext context;
625 Unref(handle, false, &context);
626 // It is possible Unref() delete the entry, so we need to cleanup.
627 Cleanup(context);
628 return nullptr;
629 }
630 return reinterpret_cast<Cache::Handle*>(handle);
631 }
632
633 bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) {
634 CleanupContext context;
635 CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
636 bool erased = Unref(handle, true, &context);
637 if (force_erase && !erased) {
638 erased = EraseAndConfirm(handle->key, handle->hash, &context);
639 }
640 Cleanup(context);
641 return erased;
642 }
643
644 void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
645 CleanupContext context;
646 EraseAndConfirm(key, hash, &context);
647 Cleanup(context);
648 }
649
650 bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
651 CleanupContext* context) {
652 MutexLock l(&mutex_);
653 HashTable::accessor accessor;
654 bool erased = false;
655 if (table_.find(accessor, CacheKey(key, hash))) {
656 CacheHandle* handle = accessor->second;
657 table_.erase(accessor);
658 erased = UnsetInCache(handle, context);
659 }
660 return erased;
661 }
662
663 void ClockCacheShard::EraseUnRefEntries() {
664 CleanupContext context;
665 {
666 MutexLock l(&mutex_);
667 table_.clear();
668 for (auto& handle : list_) {
669 UnsetInCache(&handle, &context);
670 }
671 }
672 Cleanup(context);
673 }
674
675 class ClockCache final : public ShardedCache {
676 public:
677 ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit)
678 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
679 int num_shards = 1 << num_shard_bits;
680 shards_ = new ClockCacheShard[num_shards];
681 SetCapacity(capacity);
682 SetStrictCapacityLimit(strict_capacity_limit);
683 }
684
685 ~ClockCache() override { delete[] shards_; }
686
687 const char* Name() const override { return "ClockCache"; }
688
689 CacheShard* GetShard(int shard) override {
690 return reinterpret_cast<CacheShard*>(&shards_[shard]);
691 }
692
693 const CacheShard* GetShard(int shard) const override {
694 return reinterpret_cast<CacheShard*>(&shards_[shard]);
695 }
696
697 void* Value(Handle* handle) override {
698 return reinterpret_cast<const CacheHandle*>(handle)->value;
699 }
700
701 size_t GetCharge(Handle* handle) const override {
702 return reinterpret_cast<const CacheHandle*>(handle)->charge;
703 }
704
705 uint32_t GetHash(Handle* handle) const override {
706 return reinterpret_cast<const CacheHandle*>(handle)->hash;
707 }
708
709 void DisownData() override { shards_ = nullptr; }
710
711 private:
712 ClockCacheShard* shards_;
713 };
714
715 } // end anonymous namespace
716
717 std::shared_ptr<Cache> NewClockCache(size_t capacity, int num_shard_bits,
718 bool strict_capacity_limit) {
719 if (num_shard_bits < 0) {
720 num_shard_bits = GetDefaultCacheShardBits(capacity);
721 }
722 return std::make_shared<ClockCache>(capacity, num_shard_bits,
723 strict_capacity_limit);
724 }
725
726 } // namespace rocksdb
727
728 #endif // SUPPORT_CLOCK_CACHE