ceph/src/rocksdb/cache/lru_cache.cc

   1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5 //
   6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
   7 // Use of this source code is governed by a BSD-style license that can be
   8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
   9
  10 #ifndef __STDC_FORMAT_MACROS
  11 #define __STDC_FORMAT_MACROS
  12 #endif
  13
  14 #include "cache/lru_cache.h"
  15
  16 #include <assert.h>
  17 #include <stdio.h>
  18 #include <stdlib.h>
  19 #include <string>
  20
  21 #include "util/mutexlock.h"
  22
  23 namespace rocksdb {
  24
  25 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
  26   Resize();
  27 }
  28
  29 LRUHandleTable::~LRUHandleTable() {
  30   ApplyToAllCacheEntries([](LRUHandle* h) {
  31     if (h->refs == 1) {
  32       h->Free();
  33     }
  34   });
  35   delete[] list_;
  36 }
  37
  38 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
  39   return *FindPointer(key, hash);
  40 }
  41
  42 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
  43   LRUHandle** ptr = FindPointer(h->key(), h->hash);
  44   LRUHandle* old = *ptr;
  45   h->next_hash = (old == nullptr ? nullptr : old->next_hash);
  46   *ptr = h;
  47   if (old == nullptr) {
  48     ++elems_;
  49     if (elems_ > length_) {
  50       // Since each cache entry is fairly large, we aim for a small
  51       // average linked list length (<= 1).
  52       Resize();
  53     }
  54   }
  55   return old;
  56 }
  57
  58 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
  59   LRUHandle** ptr = FindPointer(key, hash);
  60   LRUHandle* result = *ptr;
  61   if (result != nullptr) {
  62     *ptr = result->next_hash;
  63     --elems_;
  64   }
  65   return result;
  66 }
  67
  68 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
  69   LRUHandle** ptr = &list_[hash & (length_ - 1)];
  70   while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
  71     ptr = &(*ptr)->next_hash;
  72   }
  73   return ptr;
  74 }
  75
  76 void LRUHandleTable::Resize() {
  77   uint32_t new_length = 16;
  78   while (new_length < elems_ * 1.5) {
  79     new_length *= 2;
  80   }
  81   LRUHandle** new_list = new LRUHandle*[new_length];
  82   memset(new_list, 0, sizeof(new_list[0]) * new_length);
  83   uint32_t count = 0;
  84   for (uint32_t i = 0; i < length_; i++) {
  85     LRUHandle* h = list_[i];
  86     while (h != nullptr) {
  87       LRUHandle* next = h->next_hash;
  88       uint32_t hash = h->hash;
  89       LRUHandle** ptr = &new_list[hash & (new_length - 1)];
  90       h->next_hash = *ptr;
  91       *ptr = h;
  92       h = next;
  93       count++;
  94     }
  95   }
  96   assert(elems_ == count);
  97   delete[] list_;
  98   list_ = new_list;
  99   length_ = new_length;
 100 }
 101
 102 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
 103                              double high_pri_pool_ratio,
 104                              bool use_adaptive_mutex)
 105     : capacity_(0),
 106       high_pri_pool_usage_(0),
 107       strict_capacity_limit_(strict_capacity_limit),
 108       high_pri_pool_ratio_(high_pri_pool_ratio),
 109       high_pri_pool_capacity_(0),
 110       usage_(0),
 111       lru_usage_(0),
 112       mutex_(use_adaptive_mutex) {
 113   // Make empty circular linked list
 114   lru_.next = &lru_;
 115   lru_.prev = &lru_;
 116   lru_low_pri_ = &lru_;
 117   SetCapacity(capacity);
 118 }
 119
 120 LRUCacheShard::~LRUCacheShard() {}
 121
 122 bool LRUCacheShard::Unref(LRUHandle* e) {
 123   assert(e->refs > 0);
 124   e->refs--;
 125   return e->refs == 0;
 126 }
 127
 128 // Call deleter and free
 129
 130 void LRUCacheShard::EraseUnRefEntries() {
 131   autovector<LRUHandle*> last_reference_list;
 132   {
 133     MutexLock l(&mutex_);
 134     while (lru_.next != &lru_) {
 135       LRUHandle* old = lru_.next;
 136       assert(old->InCache());
 137       assert(old->refs ==
 138              1);  // LRU list contains elements which may be evicted
 139       LRU_Remove(old);
 140       table_.Remove(old->key(), old->hash);
 141       old->SetInCache(false);
 142       Unref(old);
 143       usage_ -= old->charge;
 144       last_reference_list.push_back(old);
 145     }
 146   }
 147
 148   for (auto entry : last_reference_list) {
 149     entry->Free();
 150   }
 151 }
 152
 153 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
 154                                            bool thread_safe) {
 155   if (thread_safe) {
 156     mutex_.Lock();
 157   }
 158   table_.ApplyToAllCacheEntries(
 159       [callback](LRUHandle* h) { callback(h->value, h->charge); });
 160   if (thread_safe) {
 161     mutex_.Unlock();
 162   }
 163 }
 164
 165 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
 166   *lru = &lru_;
 167   *lru_low_pri = lru_low_pri_;
 168 }
 169
 170 size_t LRUCacheShard::TEST_GetLRUSize() {
 171   LRUHandle* lru_handle = lru_.next;
 172   size_t lru_size = 0;
 173   while (lru_handle != &lru_) {
 174     lru_size++;
 175     lru_handle = lru_handle->next;
 176   }
 177   return lru_size;
 178 }
 179
 180 double LRUCacheShard::GetHighPriPoolRatio() {
 181   MutexLock l(&mutex_);
 182   return high_pri_pool_ratio_;
 183 }
 184
 185 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
 186   assert(e->next != nullptr);
 187   assert(e->prev != nullptr);
 188   if (lru_low_pri_ == e) {
 189     lru_low_pri_ = e->prev;
 190   }
 191   e->next->prev = e->prev;
 192   e->prev->next = e->next;
 193   e->prev = e->next = nullptr;
 194   lru_usage_ -= e->charge;
 195   if (e->InHighPriPool()) {
 196     assert(high_pri_pool_usage_ >= e->charge);
 197     high_pri_pool_usage_ -= e->charge;
 198   }
 199 }
 200
 201 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
 202   assert(e->next == nullptr);
 203   assert(e->prev == nullptr);
 204   if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
 205     // Inset "e" to head of LRU list.
 206     e->next = &lru_;
 207     e->prev = lru_.prev;
 208     e->prev->next = e;
 209     e->next->prev = e;
 210     e->SetInHighPriPool(true);
 211     high_pri_pool_usage_ += e->charge;
 212     MaintainPoolSize();
 213   } else {
 214     // Insert "e" to the head of low-pri pool. Note that when
 215     // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
 216     e->next = lru_low_pri_->next;
 217     e->prev = lru_low_pri_;
 218     e->prev->next = e;
 219     e->next->prev = e;
 220     e->SetInHighPriPool(false);
 221     lru_low_pri_ = e;
 222   }
 223   lru_usage_ += e->charge;
 224 }
 225
 226 void LRUCacheShard::MaintainPoolSize() {
 227   while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
 228     // Overflow last entry in high-pri pool to low-pri pool.
 229     lru_low_pri_ = lru_low_pri_->next;
 230     assert(lru_low_pri_ != &lru_);
 231     lru_low_pri_->SetInHighPriPool(false);
 232     high_pri_pool_usage_ -= lru_low_pri_->charge;
 233   }
 234 }
 235
 236 void LRUCacheShard::EvictFromLRU(size_t charge,
 237                                  autovector<LRUHandle*>* deleted) {
 238   while (usage_ + charge > capacity_ && lru_.next != &lru_) {
 239     LRUHandle* old = lru_.next;
 240     assert(old->InCache());
 241     assert(old->refs == 1);  // LRU list contains elements which may be evicted
 242     LRU_Remove(old);
 243     table_.Remove(old->key(), old->hash);
 244     old->SetInCache(false);
 245     Unref(old);
 246     usage_ -= old->charge;
 247     deleted->push_back(old);
 248   }
 249 }
 250
 251 void LRUCacheShard::SetCapacity(size_t capacity) {
 252   autovector<LRUHandle*> last_reference_list;
 253   {
 254     MutexLock l(&mutex_);
 255     capacity_ = capacity;
 256     high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 257     EvictFromLRU(0, &last_reference_list);
 258   }
 259   // we free the entries here outside of mutex for
 260   // performance reasons
 261   for (auto entry : last_reference_list) {
 262     entry->Free();
 263   }
 264 }
 265
 266 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
 267   MutexLock l(&mutex_);
 268   strict_capacity_limit_ = strict_capacity_limit;
 269 }
 270
 271 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
 272   MutexLock l(&mutex_);
 273   LRUHandle* e = table_.Lookup(key, hash);
 274   if (e != nullptr) {
 275     assert(e->InCache());
 276     if (e->refs == 1) {
 277       LRU_Remove(e);
 278     }
 279     e->refs++;
 280     e->SetHit();
 281   }
 282   return reinterpret_cast<Cache::Handle*>(e);
 283 }
 284
 285 bool LRUCacheShard::Ref(Cache::Handle* h) {
 286   LRUHandle* handle = reinterpret_cast<LRUHandle*>(h);
 287   MutexLock l(&mutex_);
 288   if (handle->InCache() && handle->refs == 1) {
 289     LRU_Remove(handle);
 290   }
 291   handle->refs++;
 292   return true;
 293 }
 294
 295 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
 296   MutexLock l(&mutex_);
 297   high_pri_pool_ratio_ = high_pri_pool_ratio;
 298   high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 299   MaintainPoolSize();
 300 }
 301
 302 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
 303   if (handle == nullptr) {
 304     return false;
 305   }
 306   LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
 307   bool last_reference = false;
 308   {
 309     MutexLock l(&mutex_);
 310     last_reference = Unref(e);
 311     if (last_reference) {
 312       usage_ -= e->charge;
 313     }
 314     if (e->refs == 1 && e->InCache()) {
 315       // The item is still in cache, and nobody else holds a reference to it
 316       if (usage_ > capacity_ || force_erase) {
 317         // the cache is full
 318         // The LRU list must be empty since the cache is full
 319         assert(!(usage_ > capacity_) || lru_.next == &lru_);
 320         // take this opportunity and remove the item
 321         table_.Remove(e->key(), e->hash);
 322         e->SetInCache(false);
 323         Unref(e);
 324         usage_ -= e->charge;
 325         last_reference = true;
 326       } else {
 327         // put the item on the list to be potentially freed
 328         LRU_Insert(e);
 329       }
 330     }
 331   }
 332
 333   // free outside of mutex
 334   if (last_reference) {
 335     e->Free();
 336   }
 337   return last_reference;
 338 }
 339
 340 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
 341                              size_t charge,
 342                              void (*deleter)(const Slice& key, void* value),
 343                              Cache::Handle** handle, Cache::Priority priority) {
 344   // Allocate the memory here outside of the mutex
 345   // If the cache is full, we'll have to release it
 346   // It shouldn't happen very often though.
 347   LRUHandle* e = reinterpret_cast<LRUHandle*>(
 348       new char[sizeof(LRUHandle) - 1 + key.size()]);
 349   Status s;
 350   autovector<LRUHandle*> last_reference_list;
 351
 352   e->value = value;
 353   e->deleter = deleter;
 354   e->charge = charge;
 355   e->key_length = key.size();
 356   e->flags = 0;
 357   e->hash = hash;
 358   e->refs = (handle == nullptr
 359                  ? 1
 360                  : 2);  // One from LRUCache, one for the returned handle
 361   e->next = e->prev = nullptr;
 362   e->SetInCache(true);
 363   e->SetPriority(priority);
 364   memcpy(e->key_data, key.data(), key.size());
 365
 366   {
 367     MutexLock l(&mutex_);
 368
 369     // Free the space following strict LRU policy until enough space
 370     // is freed or the lru list is empty
 371     EvictFromLRU(charge, &last_reference_list);
 372
 373     if (usage_ - lru_usage_ + charge > capacity_ &&
 374         (strict_capacity_limit_ || handle == nullptr)) {
 375       if (handle == nullptr) {
 376         // Don't insert the entry but still return ok, as if the entry inserted
 377         // into cache and get evicted immediately.
 378         last_reference_list.push_back(e);
 379       } else {
 380         delete[] reinterpret_cast<char*>(e);
 381         *handle = nullptr;
 382         s = Status::Incomplete("Insert failed due to LRU cache being full.");
 383       }
 384     } else {
 385       // insert into the cache
 386       // note that the cache might get larger than its capacity if not enough
 387       // space was freed
 388       LRUHandle* old = table_.Insert(e);
 389       usage_ += e->charge;
 390       if (old != nullptr) {
 391         old->SetInCache(false);
 392         if (Unref(old)) {
 393           usage_ -= old->charge;
 394           // old is on LRU because it's in cache and its reference count
 395           // was just 1 (Unref returned 0)
 396           LRU_Remove(old);
 397           last_reference_list.push_back(old);
 398         }
 399       }
 400       if (handle == nullptr) {
 401         LRU_Insert(e);
 402       } else {
 403         *handle = reinterpret_cast<Cache::Handle*>(e);
 404       }
 405       s = Status::OK();
 406     }
 407   }
 408
 409   // we free the entries here outside of mutex for
 410   // performance reasons
 411   for (auto entry : last_reference_list) {
 412     entry->Free();
 413   }
 414
 415   return s;
 416 }
 417
 418 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
 419   LRUHandle* e;
 420   bool last_reference = false;
 421   {
 422     MutexLock l(&mutex_);
 423     e = table_.Remove(key, hash);
 424     if (e != nullptr) {
 425       last_reference = Unref(e);
 426       if (last_reference) {
 427         usage_ -= e->charge;
 428       }
 429       if (last_reference && e->InCache()) {
 430         LRU_Remove(e);
 431       }
 432       e->SetInCache(false);
 433     }
 434   }
 435
 436   // mutex not held here
 437   // last_reference will only be true if e != nullptr
 438   if (last_reference) {
 439     e->Free();
 440   }
 441 }
 442
 443 size_t LRUCacheShard::GetUsage() const {
 444   MutexLock l(&mutex_);
 445   return usage_;
 446 }
 447
 448 size_t LRUCacheShard::GetPinnedUsage() const {
 449   MutexLock l(&mutex_);
 450   assert(usage_ >= lru_usage_);
 451   return usage_ - lru_usage_;
 452 }
 453
 454 std::string LRUCacheShard::GetPrintableOptions() const {
 455   const int kBufferSize = 200;
 456   char buffer[kBufferSize];
 457   {
 458     MutexLock l(&mutex_);
 459     snprintf(buffer, kBufferSize, "    high_pri_pool_ratio: %.3lf\n",
 460              high_pri_pool_ratio_);
 461   }
 462   return std::string(buffer);
 463 }
 464
 465 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
 466                    bool strict_capacity_limit, double high_pri_pool_ratio,
 467                    std::shared_ptr<MemoryAllocator> allocator,
 468                    bool use_adaptive_mutex)
 469     : ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
 470                    std::move(allocator)) {
 471   num_shards_ = 1 << num_shard_bits;
 472   shards_ = reinterpret_cast<LRUCacheShard*>(
 473       port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
 474   size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
 475   for (int i = 0; i < num_shards_; i++) {
 476     new (&shards_[i])
 477         LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
 478             use_adaptive_mutex);
 479   }
 480 }
 481
 482 LRUCache::~LRUCache() {
 483   if (shards_ != nullptr) {
 484     assert(num_shards_ > 0);
 485     for (int i = 0; i < num_shards_; i++) {
 486       shards_[i].~LRUCacheShard();
 487     }
 488     port::cacheline_aligned_free(shards_);
 489   }
 490 }
 491
 492 CacheShard* LRUCache::GetShard(int shard) {
 493   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 494 }
 495
 496 const CacheShard* LRUCache::GetShard(int shard) const {
 497   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 498 }
 499
 500 void* LRUCache::Value(Handle* handle) {
 501   return reinterpret_cast<const LRUHandle*>(handle)->value;
 502 }
 503
 504 size_t LRUCache::GetCharge(Handle* handle) const {
 505   return reinterpret_cast<const LRUHandle*>(handle)->charge;
 506 }
 507
 508 uint32_t LRUCache::GetHash(Handle* handle) const {
 509   return reinterpret_cast<const LRUHandle*>(handle)->hash;
 510 }
 511
 512 void LRUCache::DisownData() {
 513 // Do not drop data if compile with ASAN to suppress leak warning.
 514 #if defined(__clang__)
 515 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
 516   shards_ = nullptr;
 517   num_shards_ = 0;
 518 #endif
 519 #else  // __clang__
 520 #ifndef __SANITIZE_ADDRESS__
 521   shards_ = nullptr;
 522   num_shards_ = 0;
 523 #endif  // !__SANITIZE_ADDRESS__
 524 #endif  // __clang__
 525 }
 526
 527 size_t LRUCache::TEST_GetLRUSize() {
 528   size_t lru_size_of_all_shards = 0;
 529   for (int i = 0; i < num_shards_; i++) {
 530     lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
 531   }
 532   return lru_size_of_all_shards;
 533 }
 534
 535 double LRUCache::GetHighPriPoolRatio() {
 536   double result = 0.0;
 537   if (num_shards_ > 0) {
 538     result = shards_[0].GetHighPriPoolRatio();
 539   }
 540   return result;
 541 }
 542
 543 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
 544   return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
 545                      cache_opts.strict_capacity_limit,
 546                      cache_opts.high_pri_pool_ratio,
 547                      cache_opts.memory_allocator,
 548                      cache_opts.use_adaptive_mutex);
 549 }
 550
 551 std::shared_ptr<Cache> NewLRUCache(
 552     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
 553     double high_pri_pool_ratio,
 554     std::shared_ptr<MemoryAllocator> memory_allocator,
 555     bool use_adaptive_mutex) {
 556   if (num_shard_bits >= 20) {
 557     return nullptr;  // the cache cannot be sharded into too many fine pieces
 558   }
 559   if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
 560     // invalid high_pri_pool_ratio
 561     return nullptr;
 562   }
 563   if (num_shard_bits < 0) {
 564     num_shard_bits = GetDefaultCacheShardBits(capacity);
 565   }
 566   return std::make_shared<LRUCache>(capacity, num_shard_bits,
 567                                     strict_capacity_limit, high_pri_pool_ratio,
 568                                     std::move(memory_allocator),
 569                                     use_adaptive_mutex);
 570 }
 571
 572 }  // namespace rocksdb