ceph/src/rocksdb/cache/lru_cache.cc

   1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5 //
   6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
   7 // Use of this source code is governed by a BSD-style license that can be
   8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
   9
  10 #include "cache/lru_cache.h"
  11
  12 #include <assert.h>
  13 #include <stdio.h>
  14 #include <string>
  15
  16 #include "util/mutexlock.h"
  17
  18 namespace ROCKSDB_NAMESPACE {
  19
  20 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
  21   Resize();
  22 }
  23
  24 LRUHandleTable::~LRUHandleTable() {
  25   ApplyToAllCacheEntries([](LRUHandle* h) {
  26     if (!h->HasRefs()) {
  27       h->Free();
  28     }
  29   });
  30   delete[] list_;
  31 }
  32
  33 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
  34   return *FindPointer(key, hash);
  35 }
  36
  37 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
  38   LRUHandle** ptr = FindPointer(h->key(), h->hash);
  39   LRUHandle* old = *ptr;
  40   h->next_hash = (old == nullptr ? nullptr : old->next_hash);
  41   *ptr = h;
  42   if (old == nullptr) {
  43     ++elems_;
  44     if (elems_ > length_) {
  45       // Since each cache entry is fairly large, we aim for a small
  46       // average linked list length (<= 1).
  47       Resize();
  48     }
  49   }
  50   return old;
  51 }
  52
  53 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
  54   LRUHandle** ptr = FindPointer(key, hash);
  55   LRUHandle* result = *ptr;
  56   if (result != nullptr) {
  57     *ptr = result->next_hash;
  58     --elems_;
  59   }
  60   return result;
  61 }
  62
  63 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
  64   LRUHandle** ptr = &list_[hash & (length_ - 1)];
  65   while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
  66     ptr = &(*ptr)->next_hash;
  67   }
  68   return ptr;
  69 }
  70
  71 void LRUHandleTable::Resize() {
  72   uint32_t new_length = 16;
  73   while (new_length < elems_ * 1.5) {
  74     new_length *= 2;
  75   }
  76   LRUHandle** new_list = new LRUHandle*[new_length];
  77   memset(new_list, 0, sizeof(new_list[0]) * new_length);
  78   uint32_t count = 0;
  79   for (uint32_t i = 0; i < length_; i++) {
  80     LRUHandle* h = list_[i];
  81     while (h != nullptr) {
  82       LRUHandle* next = h->next_hash;
  83       uint32_t hash = h->hash;
  84       LRUHandle** ptr = &new_list[hash & (new_length - 1)];
  85       h->next_hash = *ptr;
  86       *ptr = h;
  87       h = next;
  88       count++;
  89     }
  90   }
  91   assert(elems_ == count);
  92   delete[] list_;
  93   list_ = new_list;
  94   length_ = new_length;
  95 }
  96
  97 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
  98                              double high_pri_pool_ratio,
  99                              bool use_adaptive_mutex,
 100                              CacheMetadataChargePolicy metadata_charge_policy)
 101     : capacity_(0),
 102       high_pri_pool_usage_(0),
 103       strict_capacity_limit_(strict_capacity_limit),
 104       high_pri_pool_ratio_(high_pri_pool_ratio),
 105       high_pri_pool_capacity_(0),
 106       usage_(0),
 107       lru_usage_(0),
 108       mutex_(use_adaptive_mutex) {
 109   set_metadata_charge_policy(metadata_charge_policy);
 110   // Make empty circular linked list
 111   lru_.next = &lru_;
 112   lru_.prev = &lru_;
 113   lru_low_pri_ = &lru_;
 114   SetCapacity(capacity);
 115 }
 116
 117 void LRUCacheShard::EraseUnRefEntries() {
 118   autovector<LRUHandle*> last_reference_list;
 119   {
 120     MutexLock l(&mutex_);
 121     while (lru_.next != &lru_) {
 122       LRUHandle* old = lru_.next;
 123       // LRU list contains only elements which can be evicted
 124       assert(old->InCache() && !old->HasRefs());
 125       LRU_Remove(old);
 126       table_.Remove(old->key(), old->hash);
 127       old->SetInCache(false);
 128       size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
 129       assert(usage_ >= total_charge);
 130       usage_ -= total_charge;
 131       last_reference_list.push_back(old);
 132     }
 133   }
 134
 135   for (auto entry : last_reference_list) {
 136     entry->Free();
 137   }
 138 }
 139
 140 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
 141                                            bool thread_safe) {
 142   const auto applyCallback = [&]() {
 143     table_.ApplyToAllCacheEntries(
 144         [callback](LRUHandle* h) { callback(h->value, h->charge); });
 145   };
 146
 147   if (thread_safe) {
 148     MutexLock l(&mutex_);
 149     applyCallback();
 150   } else {
 151     applyCallback();
 152   }
 153 }
 154
 155 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
 156   MutexLock l(&mutex_);
 157   *lru = &lru_;
 158   *lru_low_pri = lru_low_pri_;
 159 }
 160
 161 size_t LRUCacheShard::TEST_GetLRUSize() {
 162   MutexLock l(&mutex_);
 163   LRUHandle* lru_handle = lru_.next;
 164   size_t lru_size = 0;
 165   while (lru_handle != &lru_) {
 166     lru_size++;
 167     lru_handle = lru_handle->next;
 168   }
 169   return lru_size;
 170 }
 171
 172 double LRUCacheShard::GetHighPriPoolRatio() {
 173   MutexLock l(&mutex_);
 174   return high_pri_pool_ratio_;
 175 }
 176
 177 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
 178   assert(e->next != nullptr);
 179   assert(e->prev != nullptr);
 180   if (lru_low_pri_ == e) {
 181     lru_low_pri_ = e->prev;
 182   }
 183   e->next->prev = e->prev;
 184   e->prev->next = e->next;
 185   e->prev = e->next = nullptr;
 186   size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
 187   assert(lru_usage_ >= total_charge);
 188   lru_usage_ -= total_charge;
 189   if (e->InHighPriPool()) {
 190     assert(high_pri_pool_usage_ >= total_charge);
 191     high_pri_pool_usage_ -= total_charge;
 192   }
 193 }
 194
 195 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
 196   assert(e->next == nullptr);
 197   assert(e->prev == nullptr);
 198   size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
 199   if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
 200     // Inset "e" to head of LRU list.
 201     e->next = &lru_;
 202     e->prev = lru_.prev;
 203     e->prev->next = e;
 204     e->next->prev = e;
 205     e->SetInHighPriPool(true);
 206     high_pri_pool_usage_ += total_charge;
 207     MaintainPoolSize();
 208   } else {
 209     // Insert "e" to the head of low-pri pool. Note that when
 210     // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
 211     e->next = lru_low_pri_->next;
 212     e->prev = lru_low_pri_;
 213     e->prev->next = e;
 214     e->next->prev = e;
 215     e->SetInHighPriPool(false);
 216     lru_low_pri_ = e;
 217   }
 218   lru_usage_ += total_charge;
 219 }
 220
 221 void LRUCacheShard::MaintainPoolSize() {
 222   while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
 223     // Overflow last entry in high-pri pool to low-pri pool.
 224     lru_low_pri_ = lru_low_pri_->next;
 225     assert(lru_low_pri_ != &lru_);
 226     lru_low_pri_->SetInHighPriPool(false);
 227     size_t total_charge =
 228         lru_low_pri_->CalcTotalCharge(metadata_charge_policy_);
 229     assert(high_pri_pool_usage_ >= total_charge);
 230     high_pri_pool_usage_ -= total_charge;
 231   }
 232 }
 233
 234 void LRUCacheShard::EvictFromLRU(size_t charge,
 235                                  autovector<LRUHandle*>* deleted) {
 236   while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
 237     LRUHandle* old = lru_.next;
 238     // LRU list contains only elements which can be evicted
 239     assert(old->InCache() && !old->HasRefs());
 240     LRU_Remove(old);
 241     table_.Remove(old->key(), old->hash);
 242     old->SetInCache(false);
 243     size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
 244     assert(usage_ >= old_total_charge);
 245     usage_ -= old_total_charge;
 246     deleted->push_back(old);
 247   }
 248 }
 249
 250 void LRUCacheShard::SetCapacity(size_t capacity) {
 251   autovector<LRUHandle*> last_reference_list;
 252   {
 253     MutexLock l(&mutex_);
 254     capacity_ = capacity;
 255     high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 256     EvictFromLRU(0, &last_reference_list);
 257   }
 258
 259   // Free the entries outside of mutex for performance reasons
 260   for (auto entry : last_reference_list) {
 261     entry->Free();
 262   }
 263 }
 264
 265 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
 266   MutexLock l(&mutex_);
 267   strict_capacity_limit_ = strict_capacity_limit;
 268 }
 269
 270 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
 271   MutexLock l(&mutex_);
 272   LRUHandle* e = table_.Lookup(key, hash);
 273   if (e != nullptr) {
 274     assert(e->InCache());
 275     if (!e->HasRefs()) {
 276       // The entry is in LRU since it's in hash and has no external references
 277       LRU_Remove(e);
 278     }
 279     e->Ref();
 280     e->SetHit();
 281   }
 282   return reinterpret_cast<Cache::Handle*>(e);
 283 }
 284
 285 bool LRUCacheShard::Ref(Cache::Handle* h) {
 286   LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
 287   MutexLock l(&mutex_);
 288   // To create another reference - entry must be already externally referenced
 289   assert(e->HasRefs());
 290   e->Ref();
 291   return true;
 292 }
 293
 294 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
 295   MutexLock l(&mutex_);
 296   high_pri_pool_ratio_ = high_pri_pool_ratio;
 297   high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 298   MaintainPoolSize();
 299 }
 300
 301 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
 302   if (handle == nullptr) {
 303     return false;
 304   }
 305   LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
 306   bool last_reference = false;
 307   {
 308     MutexLock l(&mutex_);
 309     last_reference = e->Unref();
 310     if (last_reference && e->InCache()) {
 311       // The item is still in cache, and nobody else holds a reference to it
 312       if (usage_ > capacity_ || force_erase) {
 313         // The LRU list must be empty since the cache is full
 314         assert(lru_.next == &lru_ || force_erase);
 315         // Take this opportunity and remove the item
 316         table_.Remove(e->key(), e->hash);
 317         e->SetInCache(false);
 318       } else {
 319         // Put the item back on the LRU list, and don't free it
 320         LRU_Insert(e);
 321         last_reference = false;
 322       }
 323     }
 324     if (last_reference) {
 325       size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
 326       assert(usage_ >= total_charge);
 327       usage_ -= total_charge;
 328     }
 329   }
 330
 331   // Free the entry here outside of mutex for performance reasons
 332   if (last_reference) {
 333     e->Free();
 334   }
 335   return last_reference;
 336 }
 337
 338 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
 339                              size_t charge,
 340                              void (*deleter)(const Slice& key, void* value),
 341                              Cache::Handle** handle, Cache::Priority priority) {
 342   // Allocate the memory here outside of the mutex
 343   // If the cache is full, we'll have to release it
 344   // It shouldn't happen very often though.
 345   LRUHandle* e = reinterpret_cast<LRUHandle*>(
 346       new char[sizeof(LRUHandle) - 1 + key.size()]);
 347   Status s = Status::OK();
 348   autovector<LRUHandle*> last_reference_list;
 349
 350   e->value = value;
 351   e->deleter = deleter;
 352   e->charge = charge;
 353   e->key_length = key.size();
 354   e->flags = 0;
 355   e->hash = hash;
 356   e->refs = 0;
 357   e->next = e->prev = nullptr;
 358   e->SetInCache(true);
 359   e->SetPriority(priority);
 360   memcpy(e->key_data, key.data(), key.size());
 361   size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
 362
 363   {
 364     MutexLock l(&mutex_);
 365
 366     // Free the space following strict LRU policy until enough space
 367     // is freed or the lru list is empty
 368     EvictFromLRU(total_charge, &last_reference_list);
 369
 370     if ((usage_ + total_charge) > capacity_ &&
 371         (strict_capacity_limit_ || handle == nullptr)) {
 372       if (handle == nullptr) {
 373         // Don't insert the entry but still return ok, as if the entry inserted
 374         // into cache and get evicted immediately.
 375         e->SetInCache(false);
 376         last_reference_list.push_back(e);
 377       } else {
 378         delete[] reinterpret_cast<char*>(e);
 379         *handle = nullptr;
 380         s = Status::Incomplete("Insert failed due to LRU cache being full.");
 381       }
 382     } else {
 383       // Insert into the cache. Note that the cache might get larger than its
 384       // capacity if not enough space was freed up.
 385       LRUHandle* old = table_.Insert(e);
 386       usage_ += total_charge;
 387       if (old != nullptr) {
 388         s = Status::OkOverwritten();
 389         assert(old->InCache());
 390         old->SetInCache(false);
 391         if (!old->HasRefs()) {
 392           // old is on LRU because it's in cache and its reference count is 0
 393           LRU_Remove(old);
 394           size_t old_total_charge =
 395               old->CalcTotalCharge(metadata_charge_policy_);
 396           assert(usage_ >= old_total_charge);
 397           usage_ -= old_total_charge;
 398           last_reference_list.push_back(old);
 399         }
 400       }
 401       if (handle == nullptr) {
 402         LRU_Insert(e);
 403       } else {
 404         e->Ref();
 405         *handle = reinterpret_cast<Cache::Handle*>(e);
 406       }
 407     }
 408   }
 409
 410   // Free the entries here outside of mutex for performance reasons
 411   for (auto entry : last_reference_list) {
 412     entry->Free();
 413   }
 414
 415   return s;
 416 }
 417
 418 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
 419   LRUHandle* e;
 420   bool last_reference = false;
 421   {
 422     MutexLock l(&mutex_);
 423     e = table_.Remove(key, hash);
 424     if (e != nullptr) {
 425       assert(e->InCache());
 426       e->SetInCache(false);
 427       if (!e->HasRefs()) {
 428         // The entry is in LRU since it's in hash and has no external references
 429         LRU_Remove(e);
 430         size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
 431         assert(usage_ >= total_charge);
 432         usage_ -= total_charge;
 433         last_reference = true;
 434       }
 435     }
 436   }
 437
 438   // Free the entry here outside of mutex for performance reasons
 439   // last_reference will only be true if e != nullptr
 440   if (last_reference) {
 441     e->Free();
 442   }
 443 }
 444
 445 size_t LRUCacheShard::GetUsage() const {
 446   MutexLock l(&mutex_);
 447   return usage_;
 448 }
 449
 450 size_t LRUCacheShard::GetPinnedUsage() const {
 451   MutexLock l(&mutex_);
 452   assert(usage_ >= lru_usage_);
 453   return usage_ - lru_usage_;
 454 }
 455
 456 std::string LRUCacheShard::GetPrintableOptions() const {
 457   const int kBufferSize = 200;
 458   char buffer[kBufferSize];
 459   {
 460     MutexLock l(&mutex_);
 461     snprintf(buffer, kBufferSize, "    high_pri_pool_ratio: %.3lf\n",
 462              high_pri_pool_ratio_);
 463   }
 464   return std::string(buffer);
 465 }
 466
 467 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
 468                    bool strict_capacity_limit, double high_pri_pool_ratio,
 469                    std::shared_ptr<MemoryAllocator> allocator,
 470                    bool use_adaptive_mutex,
 471                    CacheMetadataChargePolicy metadata_charge_policy)
 472     : ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
 473                    std::move(allocator)) {
 474   num_shards_ = 1 << num_shard_bits;
 475   shards_ = reinterpret_cast<LRUCacheShard*>(
 476       port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
 477   size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
 478   for (int i = 0; i < num_shards_; i++) {
 479     new (&shards_[i])
 480         LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
 481                       use_adaptive_mutex, metadata_charge_policy);
 482   }
 483 }
 484
 485 LRUCache::~LRUCache() {
 486   if (shards_ != nullptr) {
 487     assert(num_shards_ > 0);
 488     for (int i = 0; i < num_shards_; i++) {
 489       shards_[i].~LRUCacheShard();
 490     }
 491     port::cacheline_aligned_free(shards_);
 492   }
 493 }
 494
 495 CacheShard* LRUCache::GetShard(int shard) {
 496   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 497 }
 498
 499 const CacheShard* LRUCache::GetShard(int shard) const {
 500   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 501 }
 502
 503 void* LRUCache::Value(Handle* handle) {
 504   return reinterpret_cast<const LRUHandle*>(handle)->value;
 505 }
 506
 507 size_t LRUCache::GetCharge(Handle* handle) const {
 508   return reinterpret_cast<const LRUHandle*>(handle)->charge;
 509 }
 510
 511 uint32_t LRUCache::GetHash(Handle* handle) const {
 512   return reinterpret_cast<const LRUHandle*>(handle)->hash;
 513 }
 514
 515 void LRUCache::DisownData() {
 516 // Do not drop data if compile with ASAN to suppress leak warning.
 517 #if defined(__clang__)
 518 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
 519   shards_ = nullptr;
 520   num_shards_ = 0;
 521 #endif
 522 #else  // __clang__
 523 #ifndef __SANITIZE_ADDRESS__
 524   shards_ = nullptr;
 525   num_shards_ = 0;
 526 #endif  // !__SANITIZE_ADDRESS__
 527 #endif  // __clang__
 528 }
 529
 530 size_t LRUCache::TEST_GetLRUSize() {
 531   size_t lru_size_of_all_shards = 0;
 532   for (int i = 0; i < num_shards_; i++) {
 533     lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
 534   }
 535   return lru_size_of_all_shards;
 536 }
 537
 538 double LRUCache::GetHighPriPoolRatio() {
 539   double result = 0.0;
 540   if (num_shards_ > 0) {
 541     result = shards_[0].GetHighPriPoolRatio();
 542   }
 543   return result;
 544 }
 545
 546 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
 547   return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
 548                      cache_opts.strict_capacity_limit,
 549                      cache_opts.high_pri_pool_ratio,
 550                      cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
 551                      cache_opts.metadata_charge_policy);
 552 }
 553
 554 std::shared_ptr<Cache> NewLRUCache(
 555     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
 556     double high_pri_pool_ratio,
 557     std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
 558     CacheMetadataChargePolicy metadata_charge_policy) {
 559   if (num_shard_bits >= 20) {
 560     return nullptr;  // the cache cannot be sharded into too many fine pieces
 561   }
 562   if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
 563     // invalid high_pri_pool_ratio
 564     return nullptr;
 565   }
 566   if (num_shard_bits < 0) {
 567     num_shard_bits = GetDefaultCacheShardBits(capacity);
 568   }
 569   return std::make_shared<LRUCache>(
 570       capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
 571       std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy);
 572 }
 573
 574 }  // namespace ROCKSDB_NAMESPACE