update source to Ceph Pacific 16.2.2

[ceph.git] / ceph / src / rocksdb / cache / lru_cache.cc
diff --git a/ceph/src/rocksdb/cache/lru_cache.cc b/ceph/src/rocksdb/cache/lru_cache.cc

index fdcbb4e86cb8769ff9e3d647ee78b46d2483a68c..9874178063796ec846cf792f7a8e0aa4e51b8751 100644 (file)
--- a/ceph/src/rocksdb/cache/lru_cache.cc
+++ b/ceph/src/rocksdb/cache/lru_cache.cc
@@ -7,10 +7,6 @@
  // Use of this source code is governed by a BSD-style license that can be
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
  
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS
-#endif
-
  #include "cache/lru_cache.h"
  
  #include <assert.h>
@@ -20,7 +16,7 @@
  
  #include "util/mutexlock.h"
  
-namespace rocksdb {
+namespace ROCKSDB_NAMESPACE {
  
  LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
    Resize();
@@ -28,7 +24,7 @@ LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
  
  LRUHandleTable::~LRUHandleTable() {
    ApplyToAllCacheEntries([](LRUHandle* h) {
-    if (h->refs == 1) {
+    if (!h->HasRefs()) {
        h->Free();
      }
    });
@@ -101,7 +97,8 @@ void LRUHandleTable::Resize() {
  
  LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
                               double high_pri_pool_ratio,
-                             bool use_adaptive_mutex)
+                             bool use_adaptive_mutex,
+                             CacheMetadataChargePolicy metadata_charge_policy)
      : capacity_(0),
        high_pri_pool_usage_(0),
        strict_capacity_limit_(strict_capacity_limit),
@@ -110,6 +107,7 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
        usage_(0),
        lru_usage_(0),
        mutex_(use_adaptive_mutex) {
+  set_metadata_charge_policy(metadata_charge_policy);
    // Make empty circular linked list
    lru_.next = &lru_;
    lru_.prev = &lru_;
@@ -117,30 +115,20 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
    SetCapacity(capacity);
  }
  
-LRUCacheShard::~LRUCacheShard() {}
-
-bool LRUCacheShard::Unref(LRUHandle* e) {
-  assert(e->refs > 0);
-  e->refs--;
-  return e->refs == 0;
-}
-
-// Call deleter and free
-
  void LRUCacheShard::EraseUnRefEntries() {
    autovector<LRUHandle*> last_reference_list;
    {
      MutexLock l(&mutex_);
      while (lru_.next != &lru_) {
        LRUHandle* old = lru_.next;
-      assert(old->InCache());
-      assert(old->refs ==
-             1);  // LRU list contains elements which may be evicted
+      // LRU list contains only elements which can be evicted
+      assert(old->InCache() && !old->HasRefs());
        LRU_Remove(old);
        table_.Remove(old->key(), old->hash);
        old->SetInCache(false);
-      Unref(old);
-      usage_ -= old->charge;
+      size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
        last_reference_list.push_back(old);
      }
    }
@@ -152,22 +140,27 @@ void LRUCacheShard::EraseUnRefEntries() {
  
  void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
                                             bool thread_safe) {
+  const auto applyCallback = [&]() {
+    table_.ApplyToAllCacheEntries(
+        [callback](LRUHandle* h) { callback(h->value, h->charge); });
+  };
+
    if (thread_safe) {
-    mutex_.Lock();
-  }
-  table_.ApplyToAllCacheEntries(
-      [callback](LRUHandle* h) { callback(h->value, h->charge); });
-  if (thread_safe) {
-    mutex_.Unlock();
+    MutexLock l(&mutex_);
+    applyCallback();
+  } else {
+    applyCallback();
    }
  }
  
  void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
+  MutexLock l(&mutex_);
    *lru = &lru_;
    *lru_low_pri = lru_low_pri_;
  }
  
  size_t LRUCacheShard::TEST_GetLRUSize() {
+  MutexLock l(&mutex_);
    LRUHandle* lru_handle = lru_.next;
    size_t lru_size = 0;
    while (lru_handle != &lru_) {
@@ -191,16 +184,19 @@ void LRUCacheShard::LRU_Remove(LRUHandle* e) {
    e->next->prev = e->prev;
    e->prev->next = e->next;
    e->prev = e->next = nullptr;
-  lru_usage_ -= e->charge;
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+  assert(lru_usage_ >= total_charge);
+  lru_usage_ -= total_charge;
    if (e->InHighPriPool()) {
-    assert(high_pri_pool_usage_ >= e->charge);
-    high_pri_pool_usage_ -= e->charge;
+    assert(high_pri_pool_usage_ >= total_charge);
+    high_pri_pool_usage_ -= total_charge;
    }
  }
  
  void LRUCacheShard::LRU_Insert(LRUHandle* e) {
    assert(e->next == nullptr);
    assert(e->prev == nullptr);
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
    if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
      // Inset "e" to head of LRU list.
      e->next = &lru_;
@@ -208,7 +204,7 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) {
      e->prev->next = e;
      e->next->prev = e;
      e->SetInHighPriPool(true);
-    high_pri_pool_usage_ += e->charge;
+    high_pri_pool_usage_ += total_charge;
      MaintainPoolSize();
    } else {
      // Insert "e" to the head of low-pri pool. Note that when
@@ -220,7 +216,7 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) {
      e->SetInHighPriPool(false);
      lru_low_pri_ = e;
    }
-  lru_usage_ += e->charge;
+  lru_usage_ += total_charge;
  }
  
  void LRUCacheShard::MaintainPoolSize() {
@@ -229,21 +225,25 @@ void LRUCacheShard::MaintainPoolSize() {
      lru_low_pri_ = lru_low_pri_->next;
      assert(lru_low_pri_ != &lru_);
      lru_low_pri_->SetInHighPriPool(false);
-    high_pri_pool_usage_ -= lru_low_pri_->charge;
+    size_t total_charge =
+        lru_low_pri_->CalcTotalCharge(metadata_charge_policy_);
+    assert(high_pri_pool_usage_ >= total_charge);
+    high_pri_pool_usage_ -= total_charge;
    }
  }
  
  void LRUCacheShard::EvictFromLRU(size_t charge,
                                   autovector<LRUHandle*>* deleted) {
-  while (usage_ + charge > capacity_ && lru_.next != &lru_) {
+  while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
      LRUHandle* old = lru_.next;
-    assert(old->InCache());
-    assert(old->refs == 1);  // LRU list contains elements which may be evicted
+    // LRU list contains only elements which can be evicted
+    assert(old->InCache() && !old->HasRefs());
      LRU_Remove(old);
      table_.Remove(old->key(), old->hash);
      old->SetInCache(false);
-    Unref(old);
-    usage_ -= old->charge;
+    size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+    assert(usage_ >= old_total_charge);
+    usage_ -= old_total_charge;
      deleted->push_back(old);
    }
  }
@@ -256,8 +256,8 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
      high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
      EvictFromLRU(0, &last_reference_list);
    }
-  // we free the entries here outside of mutex for
-  // performance reasons
+
+  // Free the entries outside of mutex for performance reasons
    for (auto entry : last_reference_list) {
      entry->Free();
    }
@@ -273,22 +273,22 @@ Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
    LRUHandle* e = table_.Lookup(key, hash);
    if (e != nullptr) {
      assert(e->InCache());
-    if (e->refs == 1) {
+    if (!e->HasRefs()) {
+      // The entry is in LRU since it's in hash and has no external references
        LRU_Remove(e);
      }
-    e->refs++;
+    e->Ref();
      e->SetHit();
    }
    return reinterpret_cast<Cache::Handle*>(e);
  }
  
  bool LRUCacheShard::Ref(Cache::Handle* h) {
-  LRUHandle* handle = reinterpret_cast<LRUHandle*>(h);
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
    MutexLock l(&mutex_);
-  if (handle->InCache() && handle->refs == 1) {
-    LRU_Remove(handle);
-  }
-  handle->refs++;
+  // To create another reference - entry must be already externally referenced
+  assert(e->HasRefs());
+  e->Ref();
    return true;
  }
  
@@ -307,30 +307,29 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
    bool last_reference = false;
    {
      MutexLock l(&mutex_);
-    last_reference = Unref(e);
-    if (last_reference) {
-      usage_ -= e->charge;
-    }
-    if (e->refs == 1 && e->InCache()) {
+    last_reference = e->Unref();
+    if (last_reference && e->InCache()) {
        // The item is still in cache, and nobody else holds a reference to it
        if (usage_ > capacity_ || force_erase) {
-        // the cache is full
          // The LRU list must be empty since the cache is full
-        assert(!(usage_ > capacity_) || lru_.next == &lru_);
-        // take this opportunity and remove the item
+        assert(lru_.next == &lru_ || force_erase);
+        // Take this opportunity and remove the item
          table_.Remove(e->key(), e->hash);
          e->SetInCache(false);
-        Unref(e);
-        usage_ -= e->charge;
-        last_reference = true;
        } else {
-        // put the item on the list to be potentially freed
+        // Put the item back on the LRU list, and don't free it
          LRU_Insert(e);
+        last_reference = false;
        }
      }
+    if (last_reference) {
+      size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
+    }
    }
  
-  // free outside of mutex
+  // Free the entry here outside of mutex for performance reasons
    if (last_reference) {
      e->Free();
    }
@@ -346,7 +345,7 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
    // It shouldn't happen very often though.
    LRUHandle* e = reinterpret_cast<LRUHandle*>(
        new char[sizeof(LRUHandle) - 1 + key.size()]);
-  Status s;
+  Status s = Status::OK();
    autovector<LRUHandle*> last_reference_list;
  
    e->value = value;
@@ -355,26 +354,26 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
    e->key_length = key.size();
    e->flags = 0;
    e->hash = hash;
-  e->refs = (handle == nullptr
-                 ? 1
-                 : 2);  // One from LRUCache, one for the returned handle
+  e->refs = 0;
    e->next = e->prev = nullptr;
    e->SetInCache(true);
    e->SetPriority(priority);
    memcpy(e->key_data, key.data(), key.size());
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
  
    {
      MutexLock l(&mutex_);
  
      // Free the space following strict LRU policy until enough space
      // is freed or the lru list is empty
-    EvictFromLRU(charge, &last_reference_list);
+    EvictFromLRU(total_charge, &last_reference_list);
  
-    if (usage_ - lru_usage_ + charge > capacity_ &&
+    if ((usage_ + total_charge) > capacity_ &&
          (strict_capacity_limit_ || handle == nullptr)) {
        if (handle == nullptr) {
          // Don't insert the entry but still return ok, as if the entry inserted
          // into cache and get evicted immediately.
+        e->SetInCache(false);
          last_reference_list.push_back(e);
        } else {
          delete[] reinterpret_cast<char*>(e);
@@ -382,32 +381,33 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
          s = Status::Incomplete("Insert failed due to LRU cache being full.");
        }
      } else {
-      // insert into the cache
-      // note that the cache might get larger than its capacity if not enough
-      // space was freed
+      // Insert into the cache. Note that the cache might get larger than its
+      // capacity if not enough space was freed up.
        LRUHandle* old = table_.Insert(e);
-      usage_ += e->charge;
+      usage_ += total_charge;
        if (old != nullptr) {
+        assert(old->InCache());
          old->SetInCache(false);
-        if (Unref(old)) {
-          usage_ -= old->charge;
-          // old is on LRU because it's in cache and its reference count
-          // was just 1 (Unref returned 0)
+        if (!old->HasRefs()) {
+          // old is on LRU because it's in cache and its reference count is 0
            LRU_Remove(old);
+          size_t old_total_charge =
+              old->CalcTotalCharge(metadata_charge_policy_);
+          assert(usage_ >= old_total_charge);
+          usage_ -= old_total_charge;
            last_reference_list.push_back(old);
          }
        }
        if (handle == nullptr) {
          LRU_Insert(e);
        } else {
+        e->Ref();
          *handle = reinterpret_cast<Cache::Handle*>(e);
        }
-      s = Status::OK();
      }
    }
  
-  // we free the entries here outside of mutex for
-  // performance reasons
+  // Free the entries here outside of mutex for performance reasons
    for (auto entry : last_reference_list) {
      entry->Free();
    }
@@ -422,18 +422,20 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
      MutexLock l(&mutex_);
      e = table_.Remove(key, hash);
      if (e != nullptr) {
-      last_reference = Unref(e);
-      if (last_reference) {
-        usage_ -= e->charge;
-      }
-      if (last_reference && e->InCache()) {
+      assert(e->InCache());
+      e->SetInCache(false);
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
          LRU_Remove(e);
+        size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+        assert(usage_ >= total_charge);
+        usage_ -= total_charge;
+        last_reference = true;
        }
-      e->SetInCache(false);
      }
    }
  
-  // mutex not held here
+  // Free the entry here outside of mutex for performance reasons
    // last_reference will only be true if e != nullptr
    if (last_reference) {
      e->Free();
@@ -465,7 +467,8 @@ std::string LRUCacheShard::GetPrintableOptions() const {
  LRUCache::LRUCache(size_t capacity, int num_shard_bits,
                     bool strict_capacity_limit, double high_pri_pool_ratio,
                     std::shared_ptr<MemoryAllocator> allocator,
-                   bool use_adaptive_mutex)
+                   bool use_adaptive_mutex,
+                   CacheMetadataChargePolicy metadata_charge_policy)
      : ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
                     std::move(allocator)) {
    num_shards_ = 1 << num_shard_bits;
@@ -475,7 +478,7 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
    for (int i = 0; i < num_shards_; i++) {
      new (&shards_[i])
          LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
-            use_adaptive_mutex);
+                      use_adaptive_mutex, metadata_charge_policy);
    }
  }
  
@@ -544,15 +547,15 @@ std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
    return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
                       cache_opts.strict_capacity_limit,
                       cache_opts.high_pri_pool_ratio,
-                     cache_opts.memory_allocator,
-                     cache_opts.use_adaptive_mutex);
+                     cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
+                     cache_opts.metadata_charge_policy);
  }
  
  std::shared_ptr<Cache> NewLRUCache(
      size_t capacity, int num_shard_bits, bool strict_capacity_limit,
      double high_pri_pool_ratio,
-    std::shared_ptr<MemoryAllocator> memory_allocator,
-    bool use_adaptive_mutex) {
+    std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
+    CacheMetadataChargePolicy metadata_charge_policy) {
    if (num_shard_bits >= 20) {
      return nullptr;  // the cache cannot be sharded into too many fine pieces
    }
@@ -563,10 +566,9 @@ std::shared_ptr<Cache> NewLRUCache(
    if (num_shard_bits < 0) {
      num_shard_bits = GetDefaultCacheShardBits(capacity);
    }
-  return std::make_shared<LRUCache>(capacity, num_shard_bits,
-                                    strict_capacity_limit, high_pri_pool_ratio,
-                                    std::move(memory_allocator),
-                                    use_adaptive_mutex);
+  return std::make_shared<LRUCache>(
+      capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
+      std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy);
  }
  
-}  // namespace rocksdb
+}  // namespace ROCKSDB_NAMESPACE