high_pri_pool_ratio_(high_pri_pool_ratio),
high_pri_pool_capacity_(0),
usage_(0),
- lru_usage_(0) {
+ lru_usage_(0),
+ age_bins(1) {
+ shift_bins();
// Make empty circular linked list
lru_.next = &lru_;
lru_.prev = &lru_;
}
}
-void BinnedLRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
- bool thread_safe) {
+void BinnedLRUCacheShard::ApplyToAllCacheEntries(
+ const std::function<void(const rocksdb::Slice& key,
+ void* value,
+ size_t charge,
+ DeleterFn)>& callback,
+ bool thread_safe)
+{
if (thread_safe) {
mutex_.lock();
}
table_.ApplyToAllCacheEntries(
- [callback](BinnedLRUHandle* h) { callback(h->value, h->charge); });
+ [callback](BinnedLRUHandle* h) {
+ callback(h->key(), h->value, h->charge, h->deleter);
+ });
if (thread_safe) {
mutex_.unlock();
}
if (e->InHighPriPool()) {
ceph_assert(high_pri_pool_usage_ >= e->charge);
high_pri_pool_usage_ -= e->charge;
+ } else {
+ ceph_assert(*(e->age_bin) >= e->charge);
+ *(e->age_bin) -= e->charge;
}
}
void BinnedLRUCacheShard::LRU_Insert(BinnedLRUHandle* e) {
ceph_assert(e->next == nullptr);
ceph_assert(e->prev == nullptr);
+ e->age_bin = age_bins.front();
+
if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) {
// Inset "e" to head of LRU list.
e->next = &lru_;
e->next->prev = e;
e->SetInHighPriPool(false);
lru_low_pri_ = e;
+ *(e->age_bin) += e->charge;
}
lru_usage_ += e->charge;
}
+uint64_t BinnedLRUCacheShard::sum_bins(uint32_t start, uint32_t end) const {
+ std::lock_guard<std::mutex> l(mutex_);
+ auto size = age_bins.size();
+ if (size < start) {
+ return 0;
+ }
+ uint64_t bytes = 0;
+ end = (size < end) ? size : end;
+ for (auto i = start; i < end; i++) {
+ bytes += *(age_bins[i]);
+ }
+ return bytes;
+}
+
void BinnedLRUCacheShard::MaintainPoolSize() {
while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
// Overflow last entry in high-pri pool to low-pri pool.
ceph_assert(lru_low_pri_ != &lru_);
lru_low_pri_->SetInHighPriPool(false);
high_pri_pool_usage_ -= lru_low_pri_->charge;
+ *(lru_low_pri_->age_bin) += lru_low_pri_->charge;
}
}
rocksdb::Status BinnedLRUCacheShard::Insert(const rocksdb::Slice& key, uint32_t hash, void* value,
size_t charge,
- void (*deleter)(const rocksdb::Slice& key, void* value),
+ DeleterFn deleter,
rocksdb::Cache::Handle** handle, rocksdb::Cache::Priority priority) {
auto e = new BinnedLRUHandle();
rocksdb::Status s;
return usage_ - lru_usage_;
}
+void BinnedLRUCacheShard::shift_bins() {
+ std::lock_guard<std::mutex> l(mutex_);
+ age_bins.push_front(std::make_shared<uint64_t>(0));
+}
+
+uint32_t BinnedLRUCacheShard::get_bin_count() const {
+ std::lock_guard<std::mutex> l(mutex_);
+ return age_bins.capacity();
+}
+
+void BinnedLRUCacheShard::set_bin_count(uint32_t count) {
+ std::lock_guard<std::mutex> l(mutex_);
+ age_bins.set_capacity(count);
+}
+
std::string BinnedLRUCacheShard::GetPrintableOptions() const {
const int kBufferSize = 200;
char buffer[kBufferSize];
return std::string(buffer);
}
+DeleterFn BinnedLRUCacheShard::GetDeleter(rocksdb::Cache::Handle* h) const
+{
+ auto* handle = reinterpret_cast<BinnedLRUHandle*>(h);
+ return handle->deleter;
+}
+
BinnedLRUCache::BinnedLRUCache(CephContext *c,
size_t capacity,
int num_shard_bits,
#endif // !__SANITIZE_ADDRESS__
}
+#if (ROCKSDB_MAJOR >= 6 && ROCKSDB_MINOR >= 22)
+DeleterFn BinnedLRUCache::GetDeleter(Handle* handle) const
+{
+ return reinterpret_cast<const BinnedLRUHandle*>(handle)->deleter;
+}
+#endif
+
size_t BinnedLRUCache::TEST_GetLRUSize() {
size_t lru_size_of_all_shards = 0;
for (int i = 0; i < num_shards_; i++) {
int64_t assigned = get_cache_bytes(pri);
int64_t request = 0;
- switch (pri) {
+ switch(pri) {
// PRI0 is for rocksdb's high priority items (indexes/filters)
case PriorityCache::Priority::PRI0:
{
- request = GetHighPriPoolUsage();
+ // Because we want the high pri cache to grow independently of the low
+ // pri cache, request a chunky allocation independent of the other
+ // priorities.
+ request = PriorityCache::get_chunk(GetHighPriPoolUsage(), total_cache);
break;
}
- // All other cache items are currently shoved into the PRI1 priority.
- case PriorityCache::Priority::PRI1:
+ case PriorityCache::Priority::LAST:
{
+ auto max = get_bin_count();
request = GetUsage();
request -= GetHighPriPoolUsage();
+ request -= sum_bins(0, max);
break;
}
default:
- break;
+ {
+ ceph_assert(pri > 0 && pri < PriorityCache::Priority::LAST);
+ auto prev_pri = static_cast<PriorityCache::Priority>(pri - 1);
+ uint64_t start = get_bins(prev_pri);
+ uint64_t end = get_bins(pri);
+ request = sum_bins(start, end);
+ break;
+ }
}
request = (request > assigned) ? request - assigned : 0;
ldout(cct, 10) << __func__ << " Priority: " << static_cast<uint32_t>(pri)
double ratio = 0;
if (new_bytes > 0) {
int64_t pri0_bytes = get_cache_bytes(PriorityCache::Priority::PRI0);
- // Add 10% of the "reserved" bytes so the ratio can't get stuck at 0
- pri0_bytes += (new_bytes - get_cache_bytes()) / 10;
ratio = (double) pri0_bytes / new_bytes;
}
- ldout(cct, 10) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
+ ldout(cct, 5) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
SetHighPriPoolRatio(ratio);
return new_bytes;
}
+void BinnedLRUCache::shift_bins() {
+ for (int s = 0; s < num_shards_; s++) {
+ shards_[s].shift_bins();
+ }
+}
+
+uint64_t BinnedLRUCache::sum_bins(uint32_t start, uint32_t end) const {
+ uint64_t bytes = 0;
+ for (int s = 0; s < num_shards_; s++) {
+ bytes += shards_[s].sum_bins(start, end);
+ }
+ return bytes;
+}
+
+uint32_t BinnedLRUCache::get_bin_count() const {
+ uint32_t result = 0;
+ if (num_shards_ > 0) {
+ result = shards_[0].get_bin_count();
+ }
+ return result;
+}
+
+void BinnedLRUCache::set_bin_count(uint32_t count) {
+ for (int s = 0; s < num_shards_; s++) {
+ shards_[s].set_bin_count(count);
+ }
+}
+
std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache(
CephContext *c,
size_t capacity,