]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/cache/lru_cache.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / cache / lru_cache.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "cache/lru_cache.h"
11
12 #include <assert.h>
13 #include <stdio.h>
14 #include <string>
15
16 #include "util/mutexlock.h"
17
18 namespace ROCKSDB_NAMESPACE {
19
20 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
21 Resize();
22 }
23
24 LRUHandleTable::~LRUHandleTable() {
25 ApplyToAllCacheEntries([](LRUHandle* h) {
26 if (!h->HasRefs()) {
27 h->Free();
28 }
29 });
30 delete[] list_;
31 }
32
33 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
34 return *FindPointer(key, hash);
35 }
36
37 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
38 LRUHandle** ptr = FindPointer(h->key(), h->hash);
39 LRUHandle* old = *ptr;
40 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
41 *ptr = h;
42 if (old == nullptr) {
43 ++elems_;
44 if (elems_ > length_) {
45 // Since each cache entry is fairly large, we aim for a small
46 // average linked list length (<= 1).
47 Resize();
48 }
49 }
50 return old;
51 }
52
53 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
54 LRUHandle** ptr = FindPointer(key, hash);
55 LRUHandle* result = *ptr;
56 if (result != nullptr) {
57 *ptr = result->next_hash;
58 --elems_;
59 }
60 return result;
61 }
62
63 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
64 LRUHandle** ptr = &list_[hash & (length_ - 1)];
65 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
66 ptr = &(*ptr)->next_hash;
67 }
68 return ptr;
69 }
70
71 void LRUHandleTable::Resize() {
72 uint32_t new_length = 16;
73 while (new_length < elems_ * 1.5) {
74 new_length *= 2;
75 }
76 LRUHandle** new_list = new LRUHandle*[new_length];
77 memset(new_list, 0, sizeof(new_list[0]) * new_length);
78 uint32_t count = 0;
79 for (uint32_t i = 0; i < length_; i++) {
80 LRUHandle* h = list_[i];
81 while (h != nullptr) {
82 LRUHandle* next = h->next_hash;
83 uint32_t hash = h->hash;
84 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
85 h->next_hash = *ptr;
86 *ptr = h;
87 h = next;
88 count++;
89 }
90 }
91 assert(elems_ == count);
92 delete[] list_;
93 list_ = new_list;
94 length_ = new_length;
95 }
96
97 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
98 double high_pri_pool_ratio,
99 bool use_adaptive_mutex,
100 CacheMetadataChargePolicy metadata_charge_policy)
101 : capacity_(0),
102 high_pri_pool_usage_(0),
103 strict_capacity_limit_(strict_capacity_limit),
104 high_pri_pool_ratio_(high_pri_pool_ratio),
105 high_pri_pool_capacity_(0),
106 usage_(0),
107 lru_usage_(0),
108 mutex_(use_adaptive_mutex) {
109 set_metadata_charge_policy(metadata_charge_policy);
110 // Make empty circular linked list
111 lru_.next = &lru_;
112 lru_.prev = &lru_;
113 lru_low_pri_ = &lru_;
114 SetCapacity(capacity);
115 }
116
117 void LRUCacheShard::EraseUnRefEntries() {
118 autovector<LRUHandle*> last_reference_list;
119 {
120 MutexLock l(&mutex_);
121 while (lru_.next != &lru_) {
122 LRUHandle* old = lru_.next;
123 // LRU list contains only elements which can be evicted
124 assert(old->InCache() && !old->HasRefs());
125 LRU_Remove(old);
126 table_.Remove(old->key(), old->hash);
127 old->SetInCache(false);
128 size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
129 assert(usage_ >= total_charge);
130 usage_ -= total_charge;
131 last_reference_list.push_back(old);
132 }
133 }
134
135 for (auto entry : last_reference_list) {
136 entry->Free();
137 }
138 }
139
140 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
141 bool thread_safe) {
142 const auto applyCallback = [&]() {
143 table_.ApplyToAllCacheEntries(
144 [callback](LRUHandle* h) { callback(h->value, h->charge); });
145 };
146
147 if (thread_safe) {
148 MutexLock l(&mutex_);
149 applyCallback();
150 } else {
151 applyCallback();
152 }
153 }
154
155 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
156 MutexLock l(&mutex_);
157 *lru = &lru_;
158 *lru_low_pri = lru_low_pri_;
159 }
160
161 size_t LRUCacheShard::TEST_GetLRUSize() {
162 MutexLock l(&mutex_);
163 LRUHandle* lru_handle = lru_.next;
164 size_t lru_size = 0;
165 while (lru_handle != &lru_) {
166 lru_size++;
167 lru_handle = lru_handle->next;
168 }
169 return lru_size;
170 }
171
172 double LRUCacheShard::GetHighPriPoolRatio() {
173 MutexLock l(&mutex_);
174 return high_pri_pool_ratio_;
175 }
176
177 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
178 assert(e->next != nullptr);
179 assert(e->prev != nullptr);
180 if (lru_low_pri_ == e) {
181 lru_low_pri_ = e->prev;
182 }
183 e->next->prev = e->prev;
184 e->prev->next = e->next;
185 e->prev = e->next = nullptr;
186 size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
187 assert(lru_usage_ >= total_charge);
188 lru_usage_ -= total_charge;
189 if (e->InHighPriPool()) {
190 assert(high_pri_pool_usage_ >= total_charge);
191 high_pri_pool_usage_ -= total_charge;
192 }
193 }
194
195 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
196 assert(e->next == nullptr);
197 assert(e->prev == nullptr);
198 size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
199 if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
200 // Inset "e" to head of LRU list.
201 e->next = &lru_;
202 e->prev = lru_.prev;
203 e->prev->next = e;
204 e->next->prev = e;
205 e->SetInHighPriPool(true);
206 high_pri_pool_usage_ += total_charge;
207 MaintainPoolSize();
208 } else {
209 // Insert "e" to the head of low-pri pool. Note that when
210 // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
211 e->next = lru_low_pri_->next;
212 e->prev = lru_low_pri_;
213 e->prev->next = e;
214 e->next->prev = e;
215 e->SetInHighPriPool(false);
216 lru_low_pri_ = e;
217 }
218 lru_usage_ += total_charge;
219 }
220
221 void LRUCacheShard::MaintainPoolSize() {
222 while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
223 // Overflow last entry in high-pri pool to low-pri pool.
224 lru_low_pri_ = lru_low_pri_->next;
225 assert(lru_low_pri_ != &lru_);
226 lru_low_pri_->SetInHighPriPool(false);
227 size_t total_charge =
228 lru_low_pri_->CalcTotalCharge(metadata_charge_policy_);
229 assert(high_pri_pool_usage_ >= total_charge);
230 high_pri_pool_usage_ -= total_charge;
231 }
232 }
233
234 void LRUCacheShard::EvictFromLRU(size_t charge,
235 autovector<LRUHandle*>* deleted) {
236 while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
237 LRUHandle* old = lru_.next;
238 // LRU list contains only elements which can be evicted
239 assert(old->InCache() && !old->HasRefs());
240 LRU_Remove(old);
241 table_.Remove(old->key(), old->hash);
242 old->SetInCache(false);
243 size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
244 assert(usage_ >= old_total_charge);
245 usage_ -= old_total_charge;
246 deleted->push_back(old);
247 }
248 }
249
250 void LRUCacheShard::SetCapacity(size_t capacity) {
251 autovector<LRUHandle*> last_reference_list;
252 {
253 MutexLock l(&mutex_);
254 capacity_ = capacity;
255 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
256 EvictFromLRU(0, &last_reference_list);
257 }
258
259 // Free the entries outside of mutex for performance reasons
260 for (auto entry : last_reference_list) {
261 entry->Free();
262 }
263 }
264
265 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
266 MutexLock l(&mutex_);
267 strict_capacity_limit_ = strict_capacity_limit;
268 }
269
270 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
271 MutexLock l(&mutex_);
272 LRUHandle* e = table_.Lookup(key, hash);
273 if (e != nullptr) {
274 assert(e->InCache());
275 if (!e->HasRefs()) {
276 // The entry is in LRU since it's in hash and has no external references
277 LRU_Remove(e);
278 }
279 e->Ref();
280 e->SetHit();
281 }
282 return reinterpret_cast<Cache::Handle*>(e);
283 }
284
285 bool LRUCacheShard::Ref(Cache::Handle* h) {
286 LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
287 MutexLock l(&mutex_);
288 // To create another reference - entry must be already externally referenced
289 assert(e->HasRefs());
290 e->Ref();
291 return true;
292 }
293
294 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
295 MutexLock l(&mutex_);
296 high_pri_pool_ratio_ = high_pri_pool_ratio;
297 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
298 MaintainPoolSize();
299 }
300
301 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
302 if (handle == nullptr) {
303 return false;
304 }
305 LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
306 bool last_reference = false;
307 {
308 MutexLock l(&mutex_);
309 last_reference = e->Unref();
310 if (last_reference && e->InCache()) {
311 // The item is still in cache, and nobody else holds a reference to it
312 if (usage_ > capacity_ || force_erase) {
313 // The LRU list must be empty since the cache is full
314 assert(lru_.next == &lru_ || force_erase);
315 // Take this opportunity and remove the item
316 table_.Remove(e->key(), e->hash);
317 e->SetInCache(false);
318 } else {
319 // Put the item back on the LRU list, and don't free it
320 LRU_Insert(e);
321 last_reference = false;
322 }
323 }
324 if (last_reference) {
325 size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
326 assert(usage_ >= total_charge);
327 usage_ -= total_charge;
328 }
329 }
330
331 // Free the entry here outside of mutex for performance reasons
332 if (last_reference) {
333 e->Free();
334 }
335 return last_reference;
336 }
337
338 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
339 size_t charge,
340 void (*deleter)(const Slice& key, void* value),
341 Cache::Handle** handle, Cache::Priority priority) {
342 // Allocate the memory here outside of the mutex
343 // If the cache is full, we'll have to release it
344 // It shouldn't happen very often though.
345 LRUHandle* e = reinterpret_cast<LRUHandle*>(
346 new char[sizeof(LRUHandle) - 1 + key.size()]);
347 Status s = Status::OK();
348 autovector<LRUHandle*> last_reference_list;
349
350 e->value = value;
351 e->deleter = deleter;
352 e->charge = charge;
353 e->key_length = key.size();
354 e->flags = 0;
355 e->hash = hash;
356 e->refs = 0;
357 e->next = e->prev = nullptr;
358 e->SetInCache(true);
359 e->SetPriority(priority);
360 memcpy(e->key_data, key.data(), key.size());
361 size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
362
363 {
364 MutexLock l(&mutex_);
365
366 // Free the space following strict LRU policy until enough space
367 // is freed or the lru list is empty
368 EvictFromLRU(total_charge, &last_reference_list);
369
370 if ((usage_ + total_charge) > capacity_ &&
371 (strict_capacity_limit_ || handle == nullptr)) {
372 if (handle == nullptr) {
373 // Don't insert the entry but still return ok, as if the entry inserted
374 // into cache and get evicted immediately.
375 e->SetInCache(false);
376 last_reference_list.push_back(e);
377 } else {
378 delete[] reinterpret_cast<char*>(e);
379 *handle = nullptr;
380 s = Status::Incomplete("Insert failed due to LRU cache being full.");
381 }
382 } else {
383 // Insert into the cache. Note that the cache might get larger than its
384 // capacity if not enough space was freed up.
385 LRUHandle* old = table_.Insert(e);
386 usage_ += total_charge;
387 if (old != nullptr) {
388 s = Status::OkOverwritten();
389 assert(old->InCache());
390 old->SetInCache(false);
391 if (!old->HasRefs()) {
392 // old is on LRU because it's in cache and its reference count is 0
393 LRU_Remove(old);
394 size_t old_total_charge =
395 old->CalcTotalCharge(metadata_charge_policy_);
396 assert(usage_ >= old_total_charge);
397 usage_ -= old_total_charge;
398 last_reference_list.push_back(old);
399 }
400 }
401 if (handle == nullptr) {
402 LRU_Insert(e);
403 } else {
404 e->Ref();
405 *handle = reinterpret_cast<Cache::Handle*>(e);
406 }
407 }
408 }
409
410 // Free the entries here outside of mutex for performance reasons
411 for (auto entry : last_reference_list) {
412 entry->Free();
413 }
414
415 return s;
416 }
417
418 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
419 LRUHandle* e;
420 bool last_reference = false;
421 {
422 MutexLock l(&mutex_);
423 e = table_.Remove(key, hash);
424 if (e != nullptr) {
425 assert(e->InCache());
426 e->SetInCache(false);
427 if (!e->HasRefs()) {
428 // The entry is in LRU since it's in hash and has no external references
429 LRU_Remove(e);
430 size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
431 assert(usage_ >= total_charge);
432 usage_ -= total_charge;
433 last_reference = true;
434 }
435 }
436 }
437
438 // Free the entry here outside of mutex for performance reasons
439 // last_reference will only be true if e != nullptr
440 if (last_reference) {
441 e->Free();
442 }
443 }
444
445 size_t LRUCacheShard::GetUsage() const {
446 MutexLock l(&mutex_);
447 return usage_;
448 }
449
450 size_t LRUCacheShard::GetPinnedUsage() const {
451 MutexLock l(&mutex_);
452 assert(usage_ >= lru_usage_);
453 return usage_ - lru_usage_;
454 }
455
456 std::string LRUCacheShard::GetPrintableOptions() const {
457 const int kBufferSize = 200;
458 char buffer[kBufferSize];
459 {
460 MutexLock l(&mutex_);
461 snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
462 high_pri_pool_ratio_);
463 }
464 return std::string(buffer);
465 }
466
467 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
468 bool strict_capacity_limit, double high_pri_pool_ratio,
469 std::shared_ptr<MemoryAllocator> allocator,
470 bool use_adaptive_mutex,
471 CacheMetadataChargePolicy metadata_charge_policy)
472 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
473 std::move(allocator)) {
474 num_shards_ = 1 << num_shard_bits;
475 shards_ = reinterpret_cast<LRUCacheShard*>(
476 port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
477 size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
478 for (int i = 0; i < num_shards_; i++) {
479 new (&shards_[i])
480 LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
481 use_adaptive_mutex, metadata_charge_policy);
482 }
483 }
484
485 LRUCache::~LRUCache() {
486 if (shards_ != nullptr) {
487 assert(num_shards_ > 0);
488 for (int i = 0; i < num_shards_; i++) {
489 shards_[i].~LRUCacheShard();
490 }
491 port::cacheline_aligned_free(shards_);
492 }
493 }
494
495 CacheShard* LRUCache::GetShard(int shard) {
496 return reinterpret_cast<CacheShard*>(&shards_[shard]);
497 }
498
499 const CacheShard* LRUCache::GetShard(int shard) const {
500 return reinterpret_cast<CacheShard*>(&shards_[shard]);
501 }
502
503 void* LRUCache::Value(Handle* handle) {
504 return reinterpret_cast<const LRUHandle*>(handle)->value;
505 }
506
507 size_t LRUCache::GetCharge(Handle* handle) const {
508 return reinterpret_cast<const LRUHandle*>(handle)->charge;
509 }
510
511 uint32_t LRUCache::GetHash(Handle* handle) const {
512 return reinterpret_cast<const LRUHandle*>(handle)->hash;
513 }
514
515 void LRUCache::DisownData() {
516 // Do not drop data if compile with ASAN to suppress leak warning.
517 #if defined(__clang__)
518 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
519 shards_ = nullptr;
520 num_shards_ = 0;
521 #endif
522 #else // __clang__
523 #ifndef __SANITIZE_ADDRESS__
524 shards_ = nullptr;
525 num_shards_ = 0;
526 #endif // !__SANITIZE_ADDRESS__
527 #endif // __clang__
528 }
529
530 size_t LRUCache::TEST_GetLRUSize() {
531 size_t lru_size_of_all_shards = 0;
532 for (int i = 0; i < num_shards_; i++) {
533 lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
534 }
535 return lru_size_of_all_shards;
536 }
537
538 double LRUCache::GetHighPriPoolRatio() {
539 double result = 0.0;
540 if (num_shards_ > 0) {
541 result = shards_[0].GetHighPriPoolRatio();
542 }
543 return result;
544 }
545
546 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
547 return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
548 cache_opts.strict_capacity_limit,
549 cache_opts.high_pri_pool_ratio,
550 cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
551 cache_opts.metadata_charge_policy);
552 }
553
554 std::shared_ptr<Cache> NewLRUCache(
555 size_t capacity, int num_shard_bits, bool strict_capacity_limit,
556 double high_pri_pool_ratio,
557 std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
558 CacheMetadataChargePolicy metadata_charge_policy) {
559 if (num_shard_bits >= 20) {
560 return nullptr; // the cache cannot be sharded into too many fine pieces
561 }
562 if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
563 // invalid high_pri_pool_ratio
564 return nullptr;
565 }
566 if (num_shard_bits < 0) {
567 num_shard_bits = GetDefaultCacheShardBits(capacity);
568 }
569 return std::make_shared<LRUCache>(
570 capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
571 std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy);
572 }
573
574 } // namespace ROCKSDB_NAMESPACE