]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/kv/rocksdb_cache/BinnedLRUCache.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / kv / rocksdb_cache / BinnedLRUCache.cc
index 0d657883e92debb85ac1d9d9b2757fbfcf6eb080..fce26c7b07dec0597ca6ba9fabca9b5d39e74b78 100644 (file)
@@ -110,7 +110,9 @@ BinnedLRUCacheShard::BinnedLRUCacheShard(CephContext *c, size_t capacity, bool s
       high_pri_pool_ratio_(high_pri_pool_ratio),
       high_pri_pool_capacity_(0),
       usage_(0),
-      lru_usage_(0) {
+      lru_usage_(0),
+      age_bins(1) {
+  shift_bins();
   // Make empty circular linked list
   lru_.next = &lru_;
   lru_.prev = &lru_;
@@ -151,13 +153,20 @@ void BinnedLRUCacheShard::EraseUnRefEntries() {
   }
 }
 
-void BinnedLRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
-                                           bool thread_safe) {
+void BinnedLRUCacheShard::ApplyToAllCacheEntries(
+  const std::function<void(const rocksdb::Slice& key,
+                           void* value,
+                           size_t charge,
+                           DeleterFn)>& callback,
+  bool thread_safe)
+{
   if (thread_safe) {
     mutex_.lock();
   }
   table_.ApplyToAllCacheEntries(
-      [callback](BinnedLRUHandle* h) { callback(h->value, h->charge); });
+    [callback](BinnedLRUHandle* h) {
+      callback(h->key(), h->value, h->charge, h->deleter);
+    });
   if (thread_safe) {
     mutex_.unlock();
   }
@@ -201,12 +210,17 @@ void BinnedLRUCacheShard::LRU_Remove(BinnedLRUHandle* e) {
   if (e->InHighPriPool()) {
     ceph_assert(high_pri_pool_usage_ >= e->charge);
     high_pri_pool_usage_ -= e->charge;
+  } else {
+    ceph_assert(*(e->age_bin) >= e->charge);
+    *(e->age_bin) -= e->charge;
   }
 }
 
 void BinnedLRUCacheShard::LRU_Insert(BinnedLRUHandle* e) {
   ceph_assert(e->next == nullptr);
   ceph_assert(e->prev == nullptr);
+  e->age_bin = age_bins.front();
+
   if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) {
     // Inset "e" to head of LRU list.
     e->next = &lru_;
@@ -225,10 +239,25 @@ void BinnedLRUCacheShard::LRU_Insert(BinnedLRUHandle* e) {
     e->next->prev = e;
     e->SetInHighPriPool(false);
     lru_low_pri_ = e;
+    *(e->age_bin) += e->charge;
   }
   lru_usage_ += e->charge;
 }
 
+uint64_t BinnedLRUCacheShard::sum_bins(uint32_t start, uint32_t end) const {
+  std::lock_guard<std::mutex> l(mutex_);
+  auto size = age_bins.size();
+  if (size < start) {
+    return 0;
+  }
+  uint64_t bytes = 0;
+  end = (size < end) ? size : end;
+  for (auto i = start; i < end; i++) {
+    bytes += *(age_bins[i]);
+  }
+  return bytes;
+}
+
 void BinnedLRUCacheShard::MaintainPoolSize() {
   while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
     // Overflow last entry in high-pri pool to low-pri pool.
@@ -236,6 +265,7 @@ void BinnedLRUCacheShard::MaintainPoolSize() {
     ceph_assert(lru_low_pri_ != &lru_);
     lru_low_pri_->SetInHighPriPool(false);
     high_pri_pool_usage_ -= lru_low_pri_->charge;
+    *(lru_low_pri_->age_bin) += lru_low_pri_->charge;
   }
 }
 
@@ -345,7 +375,7 @@ bool BinnedLRUCacheShard::Release(rocksdb::Cache::Handle* handle, bool force_era
 
 rocksdb::Status BinnedLRUCacheShard::Insert(const rocksdb::Slice& key, uint32_t hash, void* value,
                              size_t charge,
-                             void (*deleter)(const rocksdb::Slice& key, void* value),
+                             DeleterFn deleter,
                              rocksdb::Cache::Handle** handle, rocksdb::Cache::Priority priority) {
   auto e = new BinnedLRUHandle();
   rocksdb::Status s;
@@ -453,6 +483,21 @@ size_t BinnedLRUCacheShard::GetPinnedUsage() const {
   return usage_ - lru_usage_;
 }
 
+void BinnedLRUCacheShard::shift_bins() {
+  std::lock_guard<std::mutex> l(mutex_);
+  age_bins.push_front(std::make_shared<uint64_t>(0));
+}
+
+uint32_t BinnedLRUCacheShard::get_bin_count() const {
+  std::lock_guard<std::mutex> l(mutex_);
+  return age_bins.capacity();
+}
+
+void BinnedLRUCacheShard::set_bin_count(uint32_t count) {
+  std::lock_guard<std::mutex> l(mutex_);
+  age_bins.set_capacity(count);
+}
+
 std::string BinnedLRUCacheShard::GetPrintableOptions() const {
   const int kBufferSize = 200;
   char buffer[kBufferSize];
@@ -464,6 +509,12 @@ std::string BinnedLRUCacheShard::GetPrintableOptions() const {
   return std::string(buffer);
 }
 
+DeleterFn BinnedLRUCacheShard::GetDeleter(rocksdb::Cache::Handle* h) const
+{
+  auto* handle = reinterpret_cast<BinnedLRUHandle*>(h);
+  return handle->deleter;
+}
+
 BinnedLRUCache::BinnedLRUCache(CephContext *c, 
                                size_t capacity, 
                                int num_shard_bits,
@@ -519,6 +570,13 @@ void BinnedLRUCache::DisownData() {
 #endif  // !__SANITIZE_ADDRESS__
 }
 
+#if (ROCKSDB_MAJOR >= 6 && ROCKSDB_MINOR >= 22)
+DeleterFn BinnedLRUCache::GetDeleter(Handle* handle) const
+{
+  return reinterpret_cast<const BinnedLRUHandle*>(handle)->deleter;
+}
+#endif
+
 size_t BinnedLRUCache::TEST_GetLRUSize() {
   size_t lru_size_of_all_shards = 0;
   for (int i = 0; i < num_shards_; i++) {
@@ -557,22 +615,33 @@ int64_t BinnedLRUCache::request_cache_bytes(PriorityCache::Priority pri, uint64_
   int64_t assigned = get_cache_bytes(pri);
   int64_t request = 0;
 
-  switch (pri) {
+  switch(pri) {
   // PRI0 is for rocksdb's high priority items (indexes/filters)
   case PriorityCache::Priority::PRI0:
     {
-      request = GetHighPriPoolUsage();
+      // Because we want the high pri cache to grow independently of the low
+      // pri cache, request a chunky allocation independent of the other
+      // priorities.
+      request = PriorityCache::get_chunk(GetHighPriPoolUsage(), total_cache);
       break;
     }
-  // All other cache items are currently shoved into the PRI1 priority. 
-  case PriorityCache::Priority::PRI1:
+  case PriorityCache::Priority::LAST:
     {
+      auto max = get_bin_count();
       request = GetUsage();
       request -= GetHighPriPoolUsage();
+      request -= sum_bins(0, max);
       break;
     }
   default:
-    break;
+    {
+      ceph_assert(pri > 0 && pri < PriorityCache::Priority::LAST);
+      auto prev_pri = static_cast<PriorityCache::Priority>(pri - 1);
+      uint64_t start = get_bins(prev_pri);
+      uint64_t end = get_bins(pri);
+      request = sum_bins(start, end);
+      break;
+    }
   }
   request = (request > assigned) ? request - assigned : 0;
   ldout(cct, 10) << __func__ << " Priority: " << static_cast<uint32_t>(pri)
@@ -592,15 +661,41 @@ int64_t BinnedLRUCache::commit_cache_size(uint64_t total_bytes)
   double ratio = 0;
   if (new_bytes > 0) {
     int64_t pri0_bytes = get_cache_bytes(PriorityCache::Priority::PRI0);
-    // Add 10% of the "reserved" bytes so the ratio can't get stuck at 0 
-    pri0_bytes += (new_bytes - get_cache_bytes()) / 10;
     ratio = (double) pri0_bytes / new_bytes;
   }
-  ldout(cct, 10) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
+  ldout(cct, 5) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
   SetHighPriPoolRatio(ratio);
   return new_bytes;
 }
 
+void BinnedLRUCache::shift_bins() {
+  for (int s = 0; s < num_shards_; s++) {
+    shards_[s].shift_bins();
+  }
+}
+
+uint64_t BinnedLRUCache::sum_bins(uint32_t start, uint32_t end) const {
+  uint64_t bytes = 0;
+  for (int s = 0; s < num_shards_; s++) {
+    bytes += shards_[s].sum_bins(start, end);
+  }
+  return bytes;
+}
+
+uint32_t BinnedLRUCache::get_bin_count() const {
+  uint32_t result = 0;
+  if (num_shards_ > 0) {
+    result = shards_[0].get_bin_count();
+  }
+  return result;
+}
+
+void BinnedLRUCache::set_bin_count(uint32_t count) {
+  for (int s = 0; s < num_shards_; s++) {
+    shards_[s].set_bin_count(count);
+  }
+}
+
 std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache(
     CephContext *c, 
     size_t capacity,