ceph/src/rocksdb/cache/lru_cache.cc

   1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 //  This source code is licensed under both the GPLv2 (found in the
   3 //  COPYING file in the root directory) and Apache 2.0 License
   4 //  (found in the LICENSE.Apache file in the root directory).
   5 //
   6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
   7 // Use of this source code is governed by a BSD-style license that can be
   8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
   9
  10 #ifndef __STDC_FORMAT_MACROS
  11 #define __STDC_FORMAT_MACROS
  12 #endif
  13
  14 #include "cache/lru_cache.h"
  15
  16 #include <assert.h>
  17 #include <stdio.h>
  18 #include <stdlib.h>
  19 #include <string>
  20
  21 #include "util/mutexlock.h"
  22
  23 namespace rocksdb {
  24
  25 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
  26   Resize();
  27 }
  28
  29 LRUHandleTable::~LRUHandleTable() {
  30   ApplyToAllCacheEntries([](LRUHandle* h) {
  31     if (h->refs == 1) {
  32       h->Free();
  33     }
  34   });
  35   delete[] list_;
  36 }
  37
  38 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
  39   return *FindPointer(key, hash);
  40 }
  41
  42 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
  43   LRUHandle** ptr = FindPointer(h->key(), h->hash);
  44   LRUHandle* old = *ptr;
  45   h->next_hash = (old == nullptr ? nullptr : old->next_hash);
  46   *ptr = h;
  47   if (old == nullptr) {
  48     ++elems_;
  49     if (elems_ > length_) {
  50       // Since each cache entry is fairly large, we aim for a small
  51       // average linked list length (<= 1).
  52       Resize();
  53     }
  54   }
  55   return old;
  56 }
  57
  58 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
  59   LRUHandle** ptr = FindPointer(key, hash);
  60   LRUHandle* result = *ptr;
  61   if (result != nullptr) {
  62     *ptr = result->next_hash;
  63     --elems_;
  64   }
  65   return result;
  66 }
  67
  68 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
  69   LRUHandle** ptr = &list_[hash & (length_ - 1)];
  70   while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
  71     ptr = &(*ptr)->next_hash;
  72   }
  73   return ptr;
  74 }
  75
  76 void LRUHandleTable::Resize() {
  77   uint32_t new_length = 16;
  78   while (new_length < elems_ * 1.5) {
  79     new_length *= 2;
  80   }
  81   LRUHandle** new_list = new LRUHandle*[new_length];
  82   memset(new_list, 0, sizeof(new_list[0]) * new_length);
  83   uint32_t count = 0;
  84   for (uint32_t i = 0; i < length_; i++) {
  85     LRUHandle* h = list_[i];
  86     while (h != nullptr) {
  87       LRUHandle* next = h->next_hash;
  88       uint32_t hash = h->hash;
  89       LRUHandle** ptr = &new_list[hash & (new_length - 1)];
  90       h->next_hash = *ptr;
  91       *ptr = h;
  92       h = next;
  93       count++;
  94     }
  95   }
  96   assert(elems_ == count);
  97   delete[] list_;
  98   list_ = new_list;
  99   length_ = new_length;
 100 }
 101
 102 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
 103                              double high_pri_pool_ratio)
 104     : capacity_(0),
 105       high_pri_pool_usage_(0),
 106       strict_capacity_limit_(strict_capacity_limit),
 107       high_pri_pool_ratio_(high_pri_pool_ratio),
 108       high_pri_pool_capacity_(0),
 109       usage_(0),
 110       lru_usage_(0) {
 111   // Make empty circular linked list
 112   lru_.next = &lru_;
 113   lru_.prev = &lru_;
 114   lru_low_pri_ = &lru_;
 115   SetCapacity(capacity);
 116 }
 117
 118 LRUCacheShard::~LRUCacheShard() {}
 119
 120 bool LRUCacheShard::Unref(LRUHandle* e) {
 121   assert(e->refs > 0);
 122   e->refs--;
 123   return e->refs == 0;
 124 }
 125
 126 // Call deleter and free
 127
 128 void LRUCacheShard::EraseUnRefEntries() {
 129   autovector<LRUHandle*> last_reference_list;
 130   {
 131     MutexLock l(&mutex_);
 132     while (lru_.next != &lru_) {
 133       LRUHandle* old = lru_.next;
 134       assert(old->InCache());
 135       assert(old->refs ==
 136              1);  // LRU list contains elements which may be evicted
 137       LRU_Remove(old);
 138       table_.Remove(old->key(), old->hash);
 139       old->SetInCache(false);
 140       Unref(old);
 141       usage_ -= old->charge;
 142       last_reference_list.push_back(old);
 143     }
 144   }
 145
 146   for (auto entry : last_reference_list) {
 147     entry->Free();
 148   }
 149 }
 150
 151 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
 152                                            bool thread_safe) {
 153   if (thread_safe) {
 154     mutex_.Lock();
 155   }
 156   table_.ApplyToAllCacheEntries(
 157       [callback](LRUHandle* h) { callback(h->value, h->charge); });
 158   if (thread_safe) {
 159     mutex_.Unlock();
 160   }
 161 }
 162
 163 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
 164   *lru = &lru_;
 165   *lru_low_pri = lru_low_pri_;
 166 }
 167
 168 size_t LRUCacheShard::TEST_GetLRUSize() {
 169   LRUHandle* lru_handle = lru_.next;
 170   size_t lru_size = 0;
 171   while (lru_handle != &lru_) {
 172     lru_size++;
 173     lru_handle = lru_handle->next;
 174   }
 175   return lru_size;
 176 }
 177
 178 double LRUCacheShard::GetHighPriPoolRatio() {
 179   MutexLock l(&mutex_);
 180   return high_pri_pool_ratio_;
 181 }
 182
 183 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
 184   assert(e->next != nullptr);
 185   assert(e->prev != nullptr);
 186   if (lru_low_pri_ == e) {
 187     lru_low_pri_ = e->prev;
 188   }
 189   e->next->prev = e->prev;
 190   e->prev->next = e->next;
 191   e->prev = e->next = nullptr;
 192   lru_usage_ -= e->charge;
 193   if (e->InHighPriPool()) {
 194     assert(high_pri_pool_usage_ >= e->charge);
 195     high_pri_pool_usage_ -= e->charge;
 196   }
 197 }
 198
 199 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
 200   assert(e->next == nullptr);
 201   assert(e->prev == nullptr);
 202   if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
 203     // Inset "e" to head of LRU list.
 204     e->next = &lru_;
 205     e->prev = lru_.prev;
 206     e->prev->next = e;
 207     e->next->prev = e;
 208     e->SetInHighPriPool(true);
 209     high_pri_pool_usage_ += e->charge;
 210     MaintainPoolSize();
 211   } else {
 212     // Insert "e" to the head of low-pri pool. Note that when
 213     // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
 214     e->next = lru_low_pri_->next;
 215     e->prev = lru_low_pri_;
 216     e->prev->next = e;
 217     e->next->prev = e;
 218     e->SetInHighPriPool(false);
 219     lru_low_pri_ = e;
 220   }
 221   lru_usage_ += e->charge;
 222 }
 223
 224 void LRUCacheShard::MaintainPoolSize() {
 225   while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
 226     // Overflow last entry in high-pri pool to low-pri pool.
 227     lru_low_pri_ = lru_low_pri_->next;
 228     assert(lru_low_pri_ != &lru_);
 229     lru_low_pri_->SetInHighPriPool(false);
 230     high_pri_pool_usage_ -= lru_low_pri_->charge;
 231   }
 232 }
 233
 234 void LRUCacheShard::EvictFromLRU(size_t charge,
 235                                  autovector<LRUHandle*>* deleted) {
 236   while (usage_ + charge > capacity_ && lru_.next != &lru_) {
 237     LRUHandle* old = lru_.next;
 238     assert(old->InCache());
 239     assert(old->refs == 1);  // LRU list contains elements which may be evicted
 240     LRU_Remove(old);
 241     table_.Remove(old->key(), old->hash);
 242     old->SetInCache(false);
 243     Unref(old);
 244     usage_ -= old->charge;
 245     deleted->push_back(old);
 246   }
 247 }
 248
 249 void LRUCacheShard::SetCapacity(size_t capacity) {
 250   autovector<LRUHandle*> last_reference_list;
 251   {
 252     MutexLock l(&mutex_);
 253     capacity_ = capacity;
 254     high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 255     EvictFromLRU(0, &last_reference_list);
 256   }
 257   // we free the entries here outside of mutex for
 258   // performance reasons
 259   for (auto entry : last_reference_list) {
 260     entry->Free();
 261   }
 262 }
 263
 264 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
 265   MutexLock l(&mutex_);
 266   strict_capacity_limit_ = strict_capacity_limit;
 267 }
 268
 269 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
 270   MutexLock l(&mutex_);
 271   LRUHandle* e = table_.Lookup(key, hash);
 272   if (e != nullptr) {
 273     assert(e->InCache());
 274     if (e->refs == 1) {
 275       LRU_Remove(e);
 276     }
 277     e->refs++;
 278     e->SetHit();
 279   }
 280   return reinterpret_cast<Cache::Handle*>(e);
 281 }
 282
 283 bool LRUCacheShard::Ref(Cache::Handle* h) {
 284   LRUHandle* handle = reinterpret_cast<LRUHandle*>(h);
 285   MutexLock l(&mutex_);
 286   if (handle->InCache() && handle->refs == 1) {
 287     LRU_Remove(handle);
 288   }
 289   handle->refs++;
 290   return true;
 291 }
 292
 293 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
 294   MutexLock l(&mutex_);
 295   high_pri_pool_ratio_ = high_pri_pool_ratio;
 296   high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
 297   MaintainPoolSize();
 298 }
 299
 300 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
 301   if (handle == nullptr) {
 302     return false;
 303   }
 304   LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
 305   bool last_reference = false;
 306   {
 307     MutexLock l(&mutex_);
 308     last_reference = Unref(e);
 309     if (last_reference) {
 310       usage_ -= e->charge;
 311     }
 312     if (e->refs == 1 && e->InCache()) {
 313       // The item is still in cache, and nobody else holds a reference to it
 314       if (usage_ > capacity_ || force_erase) {
 315         // the cache is full
 316         // The LRU list must be empty since the cache is full
 317         assert(!(usage_ > capacity_) || lru_.next == &lru_);
 318         // take this opportunity and remove the item
 319         table_.Remove(e->key(), e->hash);
 320         e->SetInCache(false);
 321         Unref(e);
 322         usage_ -= e->charge;
 323         last_reference = true;
 324       } else {
 325         // put the item on the list to be potentially freed
 326         LRU_Insert(e);
 327       }
 328     }
 329   }
 330
 331   // free outside of mutex
 332   if (last_reference) {
 333     e->Free();
 334   }
 335   return last_reference;
 336 }
 337
 338 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
 339                              size_t charge,
 340                              void (*deleter)(const Slice& key, void* value),
 341                              Cache::Handle** handle, Cache::Priority priority) {
 342   // Allocate the memory here outside of the mutex
 343   // If the cache is full, we'll have to release it
 344   // It shouldn't happen very often though.
 345   LRUHandle* e = reinterpret_cast<LRUHandle*>(
 346       new char[sizeof(LRUHandle) - 1 + key.size()]);
 347   Status s;
 348   autovector<LRUHandle*> last_reference_list;
 349
 350   e->value = value;
 351   e->deleter = deleter;
 352   e->charge = charge;
 353   e->key_length = key.size();
 354   e->flags = 0;
 355   e->hash = hash;
 356   e->refs = (handle == nullptr
 357                  ? 1
 358                  : 2);  // One from LRUCache, one for the returned handle
 359   e->next = e->prev = nullptr;
 360   e->SetInCache(true);
 361   e->SetPriority(priority);
 362   memcpy(e->key_data, key.data(), key.size());
 363
 364   {
 365     MutexLock l(&mutex_);
 366
 367     // Free the space following strict LRU policy until enough space
 368     // is freed or the lru list is empty
 369     EvictFromLRU(charge, &last_reference_list);
 370
 371     if (usage_ - lru_usage_ + charge > capacity_ &&
 372         (strict_capacity_limit_ || handle == nullptr)) {
 373       if (handle == nullptr) {
 374         // Don't insert the entry but still return ok, as if the entry inserted
 375         // into cache and get evicted immediately.
 376         last_reference_list.push_back(e);
 377       } else {
 378         delete[] reinterpret_cast<char*>(e);
 379         *handle = nullptr;
 380         s = Status::Incomplete("Insert failed due to LRU cache being full.");
 381       }
 382     } else {
 383       // insert into the cache
 384       // note that the cache might get larger than its capacity if not enough
 385       // space was freed
 386       LRUHandle* old = table_.Insert(e);
 387       usage_ += e->charge;
 388       if (old != nullptr) {
 389         old->SetInCache(false);
 390         if (Unref(old)) {
 391           usage_ -= old->charge;
 392           // old is on LRU because it's in cache and its reference count
 393           // was just 1 (Unref returned 0)
 394           LRU_Remove(old);
 395           last_reference_list.push_back(old);
 396         }
 397       }
 398       if (handle == nullptr) {
 399         LRU_Insert(e);
 400       } else {
 401         *handle = reinterpret_cast<Cache::Handle*>(e);
 402       }
 403       s = Status::OK();
 404     }
 405   }
 406
 407   // we free the entries here outside of mutex for
 408   // performance reasons
 409   for (auto entry : last_reference_list) {
 410     entry->Free();
 411   }
 412
 413   return s;
 414 }
 415
 416 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
 417   LRUHandle* e;
 418   bool last_reference = false;
 419   {
 420     MutexLock l(&mutex_);
 421     e = table_.Remove(key, hash);
 422     if (e != nullptr) {
 423       last_reference = Unref(e);
 424       if (last_reference) {
 425         usage_ -= e->charge;
 426       }
 427       if (last_reference && e->InCache()) {
 428         LRU_Remove(e);
 429       }
 430       e->SetInCache(false);
 431     }
 432   }
 433
 434   // mutex not held here
 435   // last_reference will only be true if e != nullptr
 436   if (last_reference) {
 437     e->Free();
 438   }
 439 }
 440
 441 size_t LRUCacheShard::GetUsage() const {
 442   MutexLock l(&mutex_);
 443   return usage_;
 444 }
 445
 446 size_t LRUCacheShard::GetPinnedUsage() const {
 447   MutexLock l(&mutex_);
 448   assert(usage_ >= lru_usage_);
 449   return usage_ - lru_usage_;
 450 }
 451
 452 std::string LRUCacheShard::GetPrintableOptions() const {
 453   const int kBufferSize = 200;
 454   char buffer[kBufferSize];
 455   {
 456     MutexLock l(&mutex_);
 457     snprintf(buffer, kBufferSize, "    high_pri_pool_ratio: %.3lf\n",
 458              high_pri_pool_ratio_);
 459   }
 460   return std::string(buffer);
 461 }
 462
 463 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
 464                    bool strict_capacity_limit, double high_pri_pool_ratio)
 465     : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
 466   num_shards_ = 1 << num_shard_bits;
 467   shards_ = reinterpret_cast<LRUCacheShard*>(
 468       port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
 469   size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
 470   for (int i = 0; i < num_shards_; i++) {
 471     new (&shards_[i])
 472         LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio);
 473   }
 474 }
 475
 476 LRUCache::~LRUCache() {
 477   if (shards_ != nullptr) {
 478     assert(num_shards_ > 0);
 479     for (int i = 0; i < num_shards_; i++) {
 480       shards_[i].~LRUCacheShard();
 481     }
 482     port::cacheline_aligned_free(shards_);
 483   }
 484 }
 485
 486 CacheShard* LRUCache::GetShard(int shard) {
 487   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 488 }
 489
 490 const CacheShard* LRUCache::GetShard(int shard) const {
 491   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 492 }
 493
 494 void* LRUCache::Value(Handle* handle) {
 495   return reinterpret_cast<const LRUHandle*>(handle)->value;
 496 }
 497
 498 size_t LRUCache::GetCharge(Handle* handle) const {
 499   return reinterpret_cast<const LRUHandle*>(handle)->charge;
 500 }
 501
 502 uint32_t LRUCache::GetHash(Handle* handle) const {
 503   return reinterpret_cast<const LRUHandle*>(handle)->hash;
 504 }
 505
 506 void LRUCache::DisownData() {
 507 // Do not drop data if compile with ASAN to suppress leak warning.
 508 #if defined(__clang__)
 509 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
 510   shards_ = nullptr;
 511   num_shards_ = 0;
 512 #endif
 513 #else  // __clang__
 514 #ifndef __SANITIZE_ADDRESS__
 515   shards_ = nullptr;
 516   num_shards_ = 0;
 517 #endif  // !__SANITIZE_ADDRESS__
 518 #endif  // __clang__
 519 }
 520
 521 size_t LRUCache::TEST_GetLRUSize() {
 522   size_t lru_size_of_all_shards = 0;
 523   for (int i = 0; i < num_shards_; i++) {
 524     lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
 525   }
 526   return lru_size_of_all_shards;
 527 }
 528
 529 double LRUCache::GetHighPriPoolRatio() {
 530   double result = 0.0;
 531   if (num_shards_ > 0) {
 532     result = shards_[0].GetHighPriPoolRatio();
 533   }
 534   return result;
 535 }
 536
 537 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
 538   return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
 539                      cache_opts.strict_capacity_limit,
 540                      cache_opts.high_pri_pool_ratio);
 541 }
 542
 543 std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
 544                                    bool strict_capacity_limit,
 545                                    double high_pri_pool_ratio) {
 546   if (num_shard_bits >= 20) {
 547     return nullptr;  // the cache cannot be sharded into too many fine pieces
 548   }
 549   if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
 550     // invalid high_pri_pool_ratio
 551     return nullptr;
 552   }
 553   if (num_shard_bits < 0) {
 554     num_shard_bits = GetDefaultCacheShardBits(capacity);
 555   }
 556   return std::make_shared<LRUCache>(capacity, num_shard_bits,
 557                                     strict_capacity_limit, high_pri_pool_ratio);
 558 }
 559
 560 }  // namespace rocksdb