]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/cache/lru_cache.cc
fdcbb4e86cb8769ff9e3d647ee78b46d2483a68c
[ceph.git] / ceph / src / rocksdb / cache / lru_cache.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef __STDC_FORMAT_MACROS
11 #define __STDC_FORMAT_MACROS
12 #endif
13
14 #include "cache/lru_cache.h"
15
16 #include <assert.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string>
20
21 #include "util/mutexlock.h"
22
23 namespace rocksdb {
24
25 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
26 Resize();
27 }
28
29 LRUHandleTable::~LRUHandleTable() {
30 ApplyToAllCacheEntries([](LRUHandle* h) {
31 if (h->refs == 1) {
32 h->Free();
33 }
34 });
35 delete[] list_;
36 }
37
38 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
39 return *FindPointer(key, hash);
40 }
41
42 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
43 LRUHandle** ptr = FindPointer(h->key(), h->hash);
44 LRUHandle* old = *ptr;
45 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
46 *ptr = h;
47 if (old == nullptr) {
48 ++elems_;
49 if (elems_ > length_) {
50 // Since each cache entry is fairly large, we aim for a small
51 // average linked list length (<= 1).
52 Resize();
53 }
54 }
55 return old;
56 }
57
58 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
59 LRUHandle** ptr = FindPointer(key, hash);
60 LRUHandle* result = *ptr;
61 if (result != nullptr) {
62 *ptr = result->next_hash;
63 --elems_;
64 }
65 return result;
66 }
67
68 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
69 LRUHandle** ptr = &list_[hash & (length_ - 1)];
70 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
71 ptr = &(*ptr)->next_hash;
72 }
73 return ptr;
74 }
75
76 void LRUHandleTable::Resize() {
77 uint32_t new_length = 16;
78 while (new_length < elems_ * 1.5) {
79 new_length *= 2;
80 }
81 LRUHandle** new_list = new LRUHandle*[new_length];
82 memset(new_list, 0, sizeof(new_list[0]) * new_length);
83 uint32_t count = 0;
84 for (uint32_t i = 0; i < length_; i++) {
85 LRUHandle* h = list_[i];
86 while (h != nullptr) {
87 LRUHandle* next = h->next_hash;
88 uint32_t hash = h->hash;
89 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
90 h->next_hash = *ptr;
91 *ptr = h;
92 h = next;
93 count++;
94 }
95 }
96 assert(elems_ == count);
97 delete[] list_;
98 list_ = new_list;
99 length_ = new_length;
100 }
101
102 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
103 double high_pri_pool_ratio,
104 bool use_adaptive_mutex)
105 : capacity_(0),
106 high_pri_pool_usage_(0),
107 strict_capacity_limit_(strict_capacity_limit),
108 high_pri_pool_ratio_(high_pri_pool_ratio),
109 high_pri_pool_capacity_(0),
110 usage_(0),
111 lru_usage_(0),
112 mutex_(use_adaptive_mutex) {
113 // Make empty circular linked list
114 lru_.next = &lru_;
115 lru_.prev = &lru_;
116 lru_low_pri_ = &lru_;
117 SetCapacity(capacity);
118 }
119
120 LRUCacheShard::~LRUCacheShard() {}
121
122 bool LRUCacheShard::Unref(LRUHandle* e) {
123 assert(e->refs > 0);
124 e->refs--;
125 return e->refs == 0;
126 }
127
128 // Call deleter and free
129
130 void LRUCacheShard::EraseUnRefEntries() {
131 autovector<LRUHandle*> last_reference_list;
132 {
133 MutexLock l(&mutex_);
134 while (lru_.next != &lru_) {
135 LRUHandle* old = lru_.next;
136 assert(old->InCache());
137 assert(old->refs ==
138 1); // LRU list contains elements which may be evicted
139 LRU_Remove(old);
140 table_.Remove(old->key(), old->hash);
141 old->SetInCache(false);
142 Unref(old);
143 usage_ -= old->charge;
144 last_reference_list.push_back(old);
145 }
146 }
147
148 for (auto entry : last_reference_list) {
149 entry->Free();
150 }
151 }
152
153 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
154 bool thread_safe) {
155 if (thread_safe) {
156 mutex_.Lock();
157 }
158 table_.ApplyToAllCacheEntries(
159 [callback](LRUHandle* h) { callback(h->value, h->charge); });
160 if (thread_safe) {
161 mutex_.Unlock();
162 }
163 }
164
165 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
166 *lru = &lru_;
167 *lru_low_pri = lru_low_pri_;
168 }
169
170 size_t LRUCacheShard::TEST_GetLRUSize() {
171 LRUHandle* lru_handle = lru_.next;
172 size_t lru_size = 0;
173 while (lru_handle != &lru_) {
174 lru_size++;
175 lru_handle = lru_handle->next;
176 }
177 return lru_size;
178 }
179
180 double LRUCacheShard::GetHighPriPoolRatio() {
181 MutexLock l(&mutex_);
182 return high_pri_pool_ratio_;
183 }
184
185 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
186 assert(e->next != nullptr);
187 assert(e->prev != nullptr);
188 if (lru_low_pri_ == e) {
189 lru_low_pri_ = e->prev;
190 }
191 e->next->prev = e->prev;
192 e->prev->next = e->next;
193 e->prev = e->next = nullptr;
194 lru_usage_ -= e->charge;
195 if (e->InHighPriPool()) {
196 assert(high_pri_pool_usage_ >= e->charge);
197 high_pri_pool_usage_ -= e->charge;
198 }
199 }
200
201 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
202 assert(e->next == nullptr);
203 assert(e->prev == nullptr);
204 if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
205 // Inset "e" to head of LRU list.
206 e->next = &lru_;
207 e->prev = lru_.prev;
208 e->prev->next = e;
209 e->next->prev = e;
210 e->SetInHighPriPool(true);
211 high_pri_pool_usage_ += e->charge;
212 MaintainPoolSize();
213 } else {
214 // Insert "e" to the head of low-pri pool. Note that when
215 // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
216 e->next = lru_low_pri_->next;
217 e->prev = lru_low_pri_;
218 e->prev->next = e;
219 e->next->prev = e;
220 e->SetInHighPriPool(false);
221 lru_low_pri_ = e;
222 }
223 lru_usage_ += e->charge;
224 }
225
226 void LRUCacheShard::MaintainPoolSize() {
227 while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
228 // Overflow last entry in high-pri pool to low-pri pool.
229 lru_low_pri_ = lru_low_pri_->next;
230 assert(lru_low_pri_ != &lru_);
231 lru_low_pri_->SetInHighPriPool(false);
232 high_pri_pool_usage_ -= lru_low_pri_->charge;
233 }
234 }
235
236 void LRUCacheShard::EvictFromLRU(size_t charge,
237 autovector<LRUHandle*>* deleted) {
238 while (usage_ + charge > capacity_ && lru_.next != &lru_) {
239 LRUHandle* old = lru_.next;
240 assert(old->InCache());
241 assert(old->refs == 1); // LRU list contains elements which may be evicted
242 LRU_Remove(old);
243 table_.Remove(old->key(), old->hash);
244 old->SetInCache(false);
245 Unref(old);
246 usage_ -= old->charge;
247 deleted->push_back(old);
248 }
249 }
250
251 void LRUCacheShard::SetCapacity(size_t capacity) {
252 autovector<LRUHandle*> last_reference_list;
253 {
254 MutexLock l(&mutex_);
255 capacity_ = capacity;
256 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
257 EvictFromLRU(0, &last_reference_list);
258 }
259 // we free the entries here outside of mutex for
260 // performance reasons
261 for (auto entry : last_reference_list) {
262 entry->Free();
263 }
264 }
265
266 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
267 MutexLock l(&mutex_);
268 strict_capacity_limit_ = strict_capacity_limit;
269 }
270
271 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
272 MutexLock l(&mutex_);
273 LRUHandle* e = table_.Lookup(key, hash);
274 if (e != nullptr) {
275 assert(e->InCache());
276 if (e->refs == 1) {
277 LRU_Remove(e);
278 }
279 e->refs++;
280 e->SetHit();
281 }
282 return reinterpret_cast<Cache::Handle*>(e);
283 }
284
285 bool LRUCacheShard::Ref(Cache::Handle* h) {
286 LRUHandle* handle = reinterpret_cast<LRUHandle*>(h);
287 MutexLock l(&mutex_);
288 if (handle->InCache() && handle->refs == 1) {
289 LRU_Remove(handle);
290 }
291 handle->refs++;
292 return true;
293 }
294
295 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
296 MutexLock l(&mutex_);
297 high_pri_pool_ratio_ = high_pri_pool_ratio;
298 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
299 MaintainPoolSize();
300 }
301
302 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
303 if (handle == nullptr) {
304 return false;
305 }
306 LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
307 bool last_reference = false;
308 {
309 MutexLock l(&mutex_);
310 last_reference = Unref(e);
311 if (last_reference) {
312 usage_ -= e->charge;
313 }
314 if (e->refs == 1 && e->InCache()) {
315 // The item is still in cache, and nobody else holds a reference to it
316 if (usage_ > capacity_ || force_erase) {
317 // the cache is full
318 // The LRU list must be empty since the cache is full
319 assert(!(usage_ > capacity_) || lru_.next == &lru_);
320 // take this opportunity and remove the item
321 table_.Remove(e->key(), e->hash);
322 e->SetInCache(false);
323 Unref(e);
324 usage_ -= e->charge;
325 last_reference = true;
326 } else {
327 // put the item on the list to be potentially freed
328 LRU_Insert(e);
329 }
330 }
331 }
332
333 // free outside of mutex
334 if (last_reference) {
335 e->Free();
336 }
337 return last_reference;
338 }
339
340 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
341 size_t charge,
342 void (*deleter)(const Slice& key, void* value),
343 Cache::Handle** handle, Cache::Priority priority) {
344 // Allocate the memory here outside of the mutex
345 // If the cache is full, we'll have to release it
346 // It shouldn't happen very often though.
347 LRUHandle* e = reinterpret_cast<LRUHandle*>(
348 new char[sizeof(LRUHandle) - 1 + key.size()]);
349 Status s;
350 autovector<LRUHandle*> last_reference_list;
351
352 e->value = value;
353 e->deleter = deleter;
354 e->charge = charge;
355 e->key_length = key.size();
356 e->flags = 0;
357 e->hash = hash;
358 e->refs = (handle == nullptr
359 ? 1
360 : 2); // One from LRUCache, one for the returned handle
361 e->next = e->prev = nullptr;
362 e->SetInCache(true);
363 e->SetPriority(priority);
364 memcpy(e->key_data, key.data(), key.size());
365
366 {
367 MutexLock l(&mutex_);
368
369 // Free the space following strict LRU policy until enough space
370 // is freed or the lru list is empty
371 EvictFromLRU(charge, &last_reference_list);
372
373 if (usage_ - lru_usage_ + charge > capacity_ &&
374 (strict_capacity_limit_ || handle == nullptr)) {
375 if (handle == nullptr) {
376 // Don't insert the entry but still return ok, as if the entry inserted
377 // into cache and get evicted immediately.
378 last_reference_list.push_back(e);
379 } else {
380 delete[] reinterpret_cast<char*>(e);
381 *handle = nullptr;
382 s = Status::Incomplete("Insert failed due to LRU cache being full.");
383 }
384 } else {
385 // insert into the cache
386 // note that the cache might get larger than its capacity if not enough
387 // space was freed
388 LRUHandle* old = table_.Insert(e);
389 usage_ += e->charge;
390 if (old != nullptr) {
391 old->SetInCache(false);
392 if (Unref(old)) {
393 usage_ -= old->charge;
394 // old is on LRU because it's in cache and its reference count
395 // was just 1 (Unref returned 0)
396 LRU_Remove(old);
397 last_reference_list.push_back(old);
398 }
399 }
400 if (handle == nullptr) {
401 LRU_Insert(e);
402 } else {
403 *handle = reinterpret_cast<Cache::Handle*>(e);
404 }
405 s = Status::OK();
406 }
407 }
408
409 // we free the entries here outside of mutex for
410 // performance reasons
411 for (auto entry : last_reference_list) {
412 entry->Free();
413 }
414
415 return s;
416 }
417
418 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
419 LRUHandle* e;
420 bool last_reference = false;
421 {
422 MutexLock l(&mutex_);
423 e = table_.Remove(key, hash);
424 if (e != nullptr) {
425 last_reference = Unref(e);
426 if (last_reference) {
427 usage_ -= e->charge;
428 }
429 if (last_reference && e->InCache()) {
430 LRU_Remove(e);
431 }
432 e->SetInCache(false);
433 }
434 }
435
436 // mutex not held here
437 // last_reference will only be true if e != nullptr
438 if (last_reference) {
439 e->Free();
440 }
441 }
442
443 size_t LRUCacheShard::GetUsage() const {
444 MutexLock l(&mutex_);
445 return usage_;
446 }
447
448 size_t LRUCacheShard::GetPinnedUsage() const {
449 MutexLock l(&mutex_);
450 assert(usage_ >= lru_usage_);
451 return usage_ - lru_usage_;
452 }
453
454 std::string LRUCacheShard::GetPrintableOptions() const {
455 const int kBufferSize = 200;
456 char buffer[kBufferSize];
457 {
458 MutexLock l(&mutex_);
459 snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
460 high_pri_pool_ratio_);
461 }
462 return std::string(buffer);
463 }
464
465 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
466 bool strict_capacity_limit, double high_pri_pool_ratio,
467 std::shared_ptr<MemoryAllocator> allocator,
468 bool use_adaptive_mutex)
469 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
470 std::move(allocator)) {
471 num_shards_ = 1 << num_shard_bits;
472 shards_ = reinterpret_cast<LRUCacheShard*>(
473 port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
474 size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
475 for (int i = 0; i < num_shards_; i++) {
476 new (&shards_[i])
477 LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
478 use_adaptive_mutex);
479 }
480 }
481
482 LRUCache::~LRUCache() {
483 if (shards_ != nullptr) {
484 assert(num_shards_ > 0);
485 for (int i = 0; i < num_shards_; i++) {
486 shards_[i].~LRUCacheShard();
487 }
488 port::cacheline_aligned_free(shards_);
489 }
490 }
491
492 CacheShard* LRUCache::GetShard(int shard) {
493 return reinterpret_cast<CacheShard*>(&shards_[shard]);
494 }
495
496 const CacheShard* LRUCache::GetShard(int shard) const {
497 return reinterpret_cast<CacheShard*>(&shards_[shard]);
498 }
499
500 void* LRUCache::Value(Handle* handle) {
501 return reinterpret_cast<const LRUHandle*>(handle)->value;
502 }
503
504 size_t LRUCache::GetCharge(Handle* handle) const {
505 return reinterpret_cast<const LRUHandle*>(handle)->charge;
506 }
507
508 uint32_t LRUCache::GetHash(Handle* handle) const {
509 return reinterpret_cast<const LRUHandle*>(handle)->hash;
510 }
511
512 void LRUCache::DisownData() {
513 // Do not drop data if compile with ASAN to suppress leak warning.
514 #if defined(__clang__)
515 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
516 shards_ = nullptr;
517 num_shards_ = 0;
518 #endif
519 #else // __clang__
520 #ifndef __SANITIZE_ADDRESS__
521 shards_ = nullptr;
522 num_shards_ = 0;
523 #endif // !__SANITIZE_ADDRESS__
524 #endif // __clang__
525 }
526
527 size_t LRUCache::TEST_GetLRUSize() {
528 size_t lru_size_of_all_shards = 0;
529 for (int i = 0; i < num_shards_; i++) {
530 lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
531 }
532 return lru_size_of_all_shards;
533 }
534
535 double LRUCache::GetHighPriPoolRatio() {
536 double result = 0.0;
537 if (num_shards_ > 0) {
538 result = shards_[0].GetHighPriPoolRatio();
539 }
540 return result;
541 }
542
543 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
544 return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
545 cache_opts.strict_capacity_limit,
546 cache_opts.high_pri_pool_ratio,
547 cache_opts.memory_allocator,
548 cache_opts.use_adaptive_mutex);
549 }
550
551 std::shared_ptr<Cache> NewLRUCache(
552 size_t capacity, int num_shard_bits, bool strict_capacity_limit,
553 double high_pri_pool_ratio,
554 std::shared_ptr<MemoryAllocator> memory_allocator,
555 bool use_adaptive_mutex) {
556 if (num_shard_bits >= 20) {
557 return nullptr; // the cache cannot be sharded into too many fine pieces
558 }
559 if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
560 // invalid high_pri_pool_ratio
561 return nullptr;
562 }
563 if (num_shard_bits < 0) {
564 num_shard_bits = GetDefaultCacheShardBits(capacity);
565 }
566 return std::make_shared<LRUCache>(capacity, num_shard_bits,
567 strict_capacity_limit, high_pri_pool_ratio,
568 std::move(memory_allocator),
569 use_adaptive_mutex);
570 }
571
572 } // namespace rocksdb