]> git.proxmox.com Git - ceph.git/blame - ceph/src/kv/rocksdb_cache/BinnedLRUCache.cc
import ceph quincy 17.2.1
[ceph.git] / ceph / src / kv / rocksdb_cache / BinnedLRUCache.cc
CommitLineData
91327a77
AA
1// Copyright (c) 2018-Present Red Hat Inc. All rights reserved.
2//
3// Copyright (c) 2011-2018, Facebook, Inc. All rights reserved.
4// This source code is licensed under both the GPLv2 and Apache 2.0 License
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#ifndef __STDC_FORMAT_MACROS
11#define __STDC_FORMAT_MACROS
12#endif
13
14#include "BinnedLRUCache.h"
15
91327a77
AA
16#include <stdio.h>
17#include <stdlib.h>
18#include <string>
19
11fdf7f2
TL
20#define dout_context cct
21#define dout_subsys ceph_subsys_rocksdb
22#undef dout_prefix
23#define dout_prefix *_dout << "rocksdb: "
24
91327a77
AA
25namespace rocksdb_cache {
26
27BinnedLRUHandleTable::BinnedLRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
28 Resize();
29}
30
31BinnedLRUHandleTable::~BinnedLRUHandleTable() {
32 ApplyToAllCacheEntries([](BinnedLRUHandle* h) {
33 if (h->refs == 1) {
34 h->Free();
35 }
36 });
37 delete[] list_;
38}
39
40BinnedLRUHandle* BinnedLRUHandleTable::Lookup(const rocksdb::Slice& key, uint32_t hash) {
41 return *FindPointer(key, hash);
42}
43
44BinnedLRUHandle* BinnedLRUHandleTable::Insert(BinnedLRUHandle* h) {
45 BinnedLRUHandle** ptr = FindPointer(h->key(), h->hash);
46 BinnedLRUHandle* old = *ptr;
47 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
48 *ptr = h;
49 if (old == nullptr) {
50 ++elems_;
51 if (elems_ > length_) {
52 // Since each cache entry is fairly large, we aim for a small
53 // average linked list length (<= 1).
54 Resize();
55 }
56 }
57 return old;
58}
59
60BinnedLRUHandle* BinnedLRUHandleTable::Remove(const rocksdb::Slice& key, uint32_t hash) {
61 BinnedLRUHandle** ptr = FindPointer(key, hash);
62 BinnedLRUHandle* result = *ptr;
63 if (result != nullptr) {
64 *ptr = result->next_hash;
65 --elems_;
66 }
67 return result;
68}
69
70BinnedLRUHandle** BinnedLRUHandleTable::FindPointer(const rocksdb::Slice& key, uint32_t hash) {
71 BinnedLRUHandle** ptr = &list_[hash & (length_ - 1)];
72 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
73 ptr = &(*ptr)->next_hash;
74 }
75 return ptr;
76}
77
78void BinnedLRUHandleTable::Resize() {
79 uint32_t new_length = 16;
80 while (new_length < elems_ * 1.5) {
81 new_length *= 2;
82 }
83 BinnedLRUHandle** new_list = new BinnedLRUHandle*[new_length];
84 memset(new_list, 0, sizeof(new_list[0]) * new_length);
85 uint32_t count = 0;
86 for (uint32_t i = 0; i < length_; i++) {
87 BinnedLRUHandle* h = list_[i];
88 while (h != nullptr) {
89 BinnedLRUHandle* next = h->next_hash;
90 uint32_t hash = h->hash;
91 BinnedLRUHandle** ptr = &new_list[hash & (new_length - 1)];
92 h->next_hash = *ptr;
93 *ptr = h;
94 h = next;
95 count++;
96 }
97 }
11fdf7f2 98 ceph_assert(elems_ == count);
91327a77
AA
99 delete[] list_;
100 list_ = new_list;
101 length_ = new_length;
102}
103
f67539c2 104BinnedLRUCacheShard::BinnedLRUCacheShard(CephContext *c, size_t capacity, bool strict_capacity_limit,
91327a77 105 double high_pri_pool_ratio)
f67539c2
TL
106 : cct(c),
107 capacity_(0),
91327a77
AA
108 high_pri_pool_usage_(0),
109 strict_capacity_limit_(strict_capacity_limit),
110 high_pri_pool_ratio_(high_pri_pool_ratio),
111 high_pri_pool_capacity_(0),
112 usage_(0),
20effc67
TL
113 lru_usage_(0),
114 age_bins(1) {
115 shift_bins();
91327a77
AA
116 // Make empty circular linked list
117 lru_.next = &lru_;
118 lru_.prev = &lru_;
119 lru_low_pri_ = &lru_;
120 SetCapacity(capacity);
121}
122
123BinnedLRUCacheShard::~BinnedLRUCacheShard() {}
124
125bool BinnedLRUCacheShard::Unref(BinnedLRUHandle* e) {
11fdf7f2 126 ceph_assert(e->refs > 0);
91327a77
AA
127 e->refs--;
128 return e->refs == 0;
129}
130
131// Call deleter and free
132
133void BinnedLRUCacheShard::EraseUnRefEntries() {
134 ceph::autovector<BinnedLRUHandle*> last_reference_list;
135 {
136 std::lock_guard<std::mutex> l(mutex_);
137 while (lru_.next != &lru_) {
138 BinnedLRUHandle* old = lru_.next;
11fdf7f2
TL
139 ceph_assert(old->InCache());
140 ceph_assert(old->refs ==
91327a77
AA
141 1); // LRU list contains elements which may be evicted
142 LRU_Remove(old);
143 table_.Remove(old->key(), old->hash);
144 old->SetInCache(false);
145 Unref(old);
146 usage_ -= old->charge;
147 last_reference_list.push_back(old);
148 }
149 }
150
151 for (auto entry : last_reference_list) {
152 entry->Free();
153 }
154}
155
20effc67
TL
156void BinnedLRUCacheShard::ApplyToAllCacheEntries(
157 const std::function<void(const rocksdb::Slice& key,
158 void* value,
159 size_t charge,
160 DeleterFn)>& callback,
161 bool thread_safe)
162{
91327a77
AA
163 if (thread_safe) {
164 mutex_.lock();
165 }
166 table_.ApplyToAllCacheEntries(
20effc67
TL
167 [callback](BinnedLRUHandle* h) {
168 callback(h->key(), h->value, h->charge, h->deleter);
169 });
91327a77
AA
170 if (thread_safe) {
171 mutex_.unlock();
172 }
173}
174
175void BinnedLRUCacheShard::TEST_GetLRUList(BinnedLRUHandle** lru, BinnedLRUHandle** lru_low_pri) {
176 *lru = &lru_;
177 *lru_low_pri = lru_low_pri_;
178}
179
180size_t BinnedLRUCacheShard::TEST_GetLRUSize() {
181 BinnedLRUHandle* lru_handle = lru_.next;
182 size_t lru_size = 0;
183 while (lru_handle != &lru_) {
184 lru_size++;
185 lru_handle = lru_handle->next;
186 }
187 return lru_size;
188}
189
190double BinnedLRUCacheShard::GetHighPriPoolRatio() const {
191 std::lock_guard<std::mutex> l(mutex_);
192 return high_pri_pool_ratio_;
193}
194
195size_t BinnedLRUCacheShard::GetHighPriPoolUsage() const {
196 std::lock_guard<std::mutex> l(mutex_);
197 return high_pri_pool_usage_;
198}
199
200void BinnedLRUCacheShard::LRU_Remove(BinnedLRUHandle* e) {
11fdf7f2
TL
201 ceph_assert(e->next != nullptr);
202 ceph_assert(e->prev != nullptr);
91327a77
AA
203 if (lru_low_pri_ == e) {
204 lru_low_pri_ = e->prev;
205 }
206 e->next->prev = e->prev;
207 e->prev->next = e->next;
208 e->prev = e->next = nullptr;
209 lru_usage_ -= e->charge;
210 if (e->InHighPriPool()) {
11fdf7f2 211 ceph_assert(high_pri_pool_usage_ >= e->charge);
91327a77 212 high_pri_pool_usage_ -= e->charge;
20effc67
TL
213 } else {
214 ceph_assert(*(e->age_bin) >= e->charge);
215 *(e->age_bin) -= e->charge;
91327a77
AA
216 }
217}
218
219void BinnedLRUCacheShard::LRU_Insert(BinnedLRUHandle* e) {
11fdf7f2
TL
220 ceph_assert(e->next == nullptr);
221 ceph_assert(e->prev == nullptr);
20effc67
TL
222 e->age_bin = age_bins.front();
223
91327a77
AA
224 if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) {
225 // Inset "e" to head of LRU list.
226 e->next = &lru_;
227 e->prev = lru_.prev;
228 e->prev->next = e;
229 e->next->prev = e;
230 e->SetInHighPriPool(true);
231 high_pri_pool_usage_ += e->charge;
232 MaintainPoolSize();
233 } else {
234 // Insert "e" to the head of low-pri pool. Note that when
235 // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
236 e->next = lru_low_pri_->next;
237 e->prev = lru_low_pri_;
238 e->prev->next = e;
239 e->next->prev = e;
240 e->SetInHighPriPool(false);
241 lru_low_pri_ = e;
20effc67 242 *(e->age_bin) += e->charge;
91327a77
AA
243 }
244 lru_usage_ += e->charge;
245}
246
20effc67
TL
247uint64_t BinnedLRUCacheShard::sum_bins(uint32_t start, uint32_t end) const {
248 std::lock_guard<std::mutex> l(mutex_);
249 auto size = age_bins.size();
250 if (size < start) {
251 return 0;
252 }
253 uint64_t bytes = 0;
254 end = (size < end) ? size : end;
255 for (auto i = start; i < end; i++) {
256 bytes += *(age_bins[i]);
257 }
258 return bytes;
259}
260
91327a77
AA
261void BinnedLRUCacheShard::MaintainPoolSize() {
262 while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
263 // Overflow last entry in high-pri pool to low-pri pool.
264 lru_low_pri_ = lru_low_pri_->next;
11fdf7f2 265 ceph_assert(lru_low_pri_ != &lru_);
91327a77
AA
266 lru_low_pri_->SetInHighPriPool(false);
267 high_pri_pool_usage_ -= lru_low_pri_->charge;
20effc67 268 *(lru_low_pri_->age_bin) += lru_low_pri_->charge;
91327a77
AA
269 }
270}
271
272void BinnedLRUCacheShard::EvictFromLRU(size_t charge,
273 ceph::autovector<BinnedLRUHandle*>* deleted) {
274 while (usage_ + charge > capacity_ && lru_.next != &lru_) {
275 BinnedLRUHandle* old = lru_.next;
11fdf7f2
TL
276 ceph_assert(old->InCache());
277 ceph_assert(old->refs == 1); // LRU list contains elements which may be evicted
91327a77
AA
278 LRU_Remove(old);
279 table_.Remove(old->key(), old->hash);
280 old->SetInCache(false);
281 Unref(old);
282 usage_ -= old->charge;
283 deleted->push_back(old);
284 }
285}
286
287void BinnedLRUCacheShard::SetCapacity(size_t capacity) {
288 ceph::autovector<BinnedLRUHandle*> last_reference_list;
289 {
290 std::lock_guard<std::mutex> l(mutex_);
291 capacity_ = capacity;
292 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
293 EvictFromLRU(0, &last_reference_list);
294 }
295 // we free the entries here outside of mutex for
296 // performance reasons
297 for (auto entry : last_reference_list) {
298 entry->Free();
299 }
300}
301
302void BinnedLRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
303 std::lock_guard<std::mutex> l(mutex_);
304 strict_capacity_limit_ = strict_capacity_limit;
305}
306
307rocksdb::Cache::Handle* BinnedLRUCacheShard::Lookup(const rocksdb::Slice& key, uint32_t hash) {
308 std::lock_guard<std::mutex> l(mutex_);
309 BinnedLRUHandle* e = table_.Lookup(key, hash);
310 if (e != nullptr) {
11fdf7f2 311 ceph_assert(e->InCache());
91327a77
AA
312 if (e->refs == 1) {
313 LRU_Remove(e);
314 }
315 e->refs++;
316 e->SetHit();
317 }
318 return reinterpret_cast<rocksdb::Cache::Handle*>(e);
319}
320
321bool BinnedLRUCacheShard::Ref(rocksdb::Cache::Handle* h) {
322 BinnedLRUHandle* handle = reinterpret_cast<BinnedLRUHandle*>(h);
323 std::lock_guard<std::mutex> l(mutex_);
324 if (handle->InCache() && handle->refs == 1) {
325 LRU_Remove(handle);
326 }
327 handle->refs++;
328 return true;
329}
330
331void BinnedLRUCacheShard::SetHighPriPoolRatio(double high_pri_pool_ratio) {
332 std::lock_guard<std::mutex> l(mutex_);
333 high_pri_pool_ratio_ = high_pri_pool_ratio;
334 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
335 MaintainPoolSize();
336}
337
338bool BinnedLRUCacheShard::Release(rocksdb::Cache::Handle* handle, bool force_erase) {
339 if (handle == nullptr) {
340 return false;
341 }
342 BinnedLRUHandle* e = reinterpret_cast<BinnedLRUHandle*>(handle);
343 bool last_reference = false;
344 {
345 std::lock_guard<std::mutex> l(mutex_);
346 last_reference = Unref(e);
347 if (last_reference) {
348 usage_ -= e->charge;
349 }
350 if (e->refs == 1 && e->InCache()) {
351 // The item is still in cache, and nobody else holds a reference to it
352 if (usage_ > capacity_ || force_erase) {
353 // the cache is full
354 // The LRU list must be empty since the cache is full
11fdf7f2 355 ceph_assert(!(usage_ > capacity_) || lru_.next == &lru_);
91327a77
AA
356 // take this opportunity and remove the item
357 table_.Remove(e->key(), e->hash);
358 e->SetInCache(false);
359 Unref(e);
360 usage_ -= e->charge;
361 last_reference = true;
362 } else {
363 // put the item on the list to be potentially freed
364 LRU_Insert(e);
365 }
366 }
367 }
368
369 // free outside of mutex
370 if (last_reference) {
371 e->Free();
372 }
373 return last_reference;
374}
375
376rocksdb::Status BinnedLRUCacheShard::Insert(const rocksdb::Slice& key, uint32_t hash, void* value,
377 size_t charge,
20effc67 378 DeleterFn deleter,
91327a77 379 rocksdb::Cache::Handle** handle, rocksdb::Cache::Priority priority) {
11fdf7f2 380 auto e = new BinnedLRUHandle();
91327a77
AA
381 rocksdb::Status s;
382 ceph::autovector<BinnedLRUHandle*> last_reference_list;
383
384 e->value = value;
385 e->deleter = deleter;
386 e->charge = charge;
387 e->key_length = key.size();
11fdf7f2 388 e->key_data = new char[e->key_length];
91327a77
AA
389 e->flags = 0;
390 e->hash = hash;
391 e->refs = (handle == nullptr
392 ? 1
393 : 2); // One from BinnedLRUCache, one for the returned handle
394 e->next = e->prev = nullptr;
395 e->SetInCache(true);
396 e->SetPriority(priority);
11fdf7f2 397 std::copy_n(key.data(), e->key_length, e->key_data);
91327a77
AA
398
399 {
400 std::lock_guard<std::mutex> l(mutex_);
401 // Free the space following strict LRU policy until enough space
402 // is freed or the lru list is empty
403 EvictFromLRU(charge, &last_reference_list);
404
405 if (usage_ - lru_usage_ + charge > capacity_ &&
406 (strict_capacity_limit_ || handle == nullptr)) {
407 if (handle == nullptr) {
408 // Don't insert the entry but still return ok, as if the entry inserted
409 // into cache and get evicted immediately.
410 last_reference_list.push_back(e);
411 } else {
11fdf7f2 412 delete e;
91327a77
AA
413 *handle = nullptr;
414 s = rocksdb::Status::Incomplete("Insert failed due to LRU cache being full.");
415 }
416 } else {
417 // insert into the cache
418 // note that the cache might get larger than its capacity if not enough
419 // space was freed
420 BinnedLRUHandle* old = table_.Insert(e);
421 usage_ += e->charge;
422 if (old != nullptr) {
423 old->SetInCache(false);
424 if (Unref(old)) {
425 usage_ -= old->charge;
426 // old is on LRU because it's in cache and its reference count
427 // was just 1 (Unref returned 0)
428 LRU_Remove(old);
429 last_reference_list.push_back(old);
430 }
431 }
432 if (handle == nullptr) {
433 LRU_Insert(e);
434 } else {
435 *handle = reinterpret_cast<rocksdb::Cache::Handle*>(e);
436 }
437 s = rocksdb::Status::OK();
438 }
439 }
440
441 // we free the entries here outside of mutex for
442 // performance reasons
443 for (auto entry : last_reference_list) {
444 entry->Free();
445 }
446
447 return s;
448}
449
450void BinnedLRUCacheShard::Erase(const rocksdb::Slice& key, uint32_t hash) {
451 BinnedLRUHandle* e;
452 bool last_reference = false;
453 {
454 std::lock_guard<std::mutex> l(mutex_);
455 e = table_.Remove(key, hash);
456 if (e != nullptr) {
457 last_reference = Unref(e);
458 if (last_reference) {
459 usage_ -= e->charge;
460 }
461 if (last_reference && e->InCache()) {
462 LRU_Remove(e);
463 }
464 e->SetInCache(false);
465 }
466 }
467
468 // mutex not held here
469 // last_reference will only be true if e != nullptr
470 if (last_reference) {
471 e->Free();
472 }
473}
474
475size_t BinnedLRUCacheShard::GetUsage() const {
476 std::lock_guard<std::mutex> l(mutex_);
477 return usage_;
478}
479
480size_t BinnedLRUCacheShard::GetPinnedUsage() const {
481 std::lock_guard<std::mutex> l(mutex_);
11fdf7f2 482 ceph_assert(usage_ >= lru_usage_);
91327a77
AA
483 return usage_ - lru_usage_;
484}
485
20effc67
TL
486void BinnedLRUCacheShard::shift_bins() {
487 std::lock_guard<std::mutex> l(mutex_);
488 age_bins.push_front(std::make_shared<uint64_t>(0));
489}
490
491uint32_t BinnedLRUCacheShard::get_bin_count() const {
492 std::lock_guard<std::mutex> l(mutex_);
493 return age_bins.capacity();
494}
495
496void BinnedLRUCacheShard::set_bin_count(uint32_t count) {
497 std::lock_guard<std::mutex> l(mutex_);
498 age_bins.set_capacity(count);
499}
500
91327a77
AA
501std::string BinnedLRUCacheShard::GetPrintableOptions() const {
502 const int kBufferSize = 200;
503 char buffer[kBufferSize];
504 {
505 std::lock_guard<std::mutex> l(mutex_);
506 snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
507 high_pri_pool_ratio_);
508 }
509 return std::string(buffer);
510}
511
20effc67
TL
512DeleterFn BinnedLRUCacheShard::GetDeleter(rocksdb::Cache::Handle* h) const
513{
514 auto* handle = reinterpret_cast<BinnedLRUHandle*>(h);
515 return handle->deleter;
516}
517
11fdf7f2
TL
518BinnedLRUCache::BinnedLRUCache(CephContext *c,
519 size_t capacity,
520 int num_shard_bits,
521 bool strict_capacity_limit,
522 double high_pri_pool_ratio)
523 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit), cct(c) {
91327a77
AA
524 num_shards_ = 1 << num_shard_bits;
525 // TODO: Switch over to use mempool
526 int rc = posix_memalign((void**) &shards_,
527 CACHE_LINE_SIZE,
528 sizeof(BinnedLRUCacheShard) * num_shards_);
529 if (rc != 0) {
530 throw std::bad_alloc();
531 }
532 size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
533 for (int i = 0; i < num_shards_; i++) {
534 new (&shards_[i])
f67539c2 535 BinnedLRUCacheShard(c, per_shard, strict_capacity_limit, high_pri_pool_ratio);
91327a77
AA
536 }
537}
538
539BinnedLRUCache::~BinnedLRUCache() {
540 for (int i = 0; i < num_shards_; i++) {
541 shards_[i].~BinnedLRUCacheShard();
542 }
f67539c2 543 aligned_free(shards_);
91327a77
AA
544}
545
546CacheShard* BinnedLRUCache::GetShard(int shard) {
547 return reinterpret_cast<CacheShard*>(&shards_[shard]);
548}
549
550const CacheShard* BinnedLRUCache::GetShard(int shard) const {
551 return reinterpret_cast<CacheShard*>(&shards_[shard]);
552}
553
554void* BinnedLRUCache::Value(Handle* handle) {
555 return reinterpret_cast<const BinnedLRUHandle*>(handle)->value;
556}
557
558size_t BinnedLRUCache::GetCharge(Handle* handle) const {
559 return reinterpret_cast<const BinnedLRUHandle*>(handle)->charge;
560}
561
562uint32_t BinnedLRUCache::GetHash(Handle* handle) const {
563 return reinterpret_cast<const BinnedLRUHandle*>(handle)->hash;
564}
565
566void BinnedLRUCache::DisownData() {
567// Do not drop data if compile with ASAN to suppress leak warning.
568#ifndef __SANITIZE_ADDRESS__
569 shards_ = nullptr;
570#endif // !__SANITIZE_ADDRESS__
571}
572
33c7a0ef 573#if (ROCKSDB_MAJOR >= 7 || (ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22))
20effc67
TL
574DeleterFn BinnedLRUCache::GetDeleter(Handle* handle) const
575{
576 return reinterpret_cast<const BinnedLRUHandle*>(handle)->deleter;
577}
578#endif
579
91327a77
AA
580size_t BinnedLRUCache::TEST_GetLRUSize() {
581 size_t lru_size_of_all_shards = 0;
582 for (int i = 0; i < num_shards_; i++) {
583 lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
584 }
585 return lru_size_of_all_shards;
586}
587
588void BinnedLRUCache::SetHighPriPoolRatio(double high_pri_pool_ratio) {
589 for (int i = 0; i < num_shards_; i++) {
590 shards_[i].SetHighPriPoolRatio(high_pri_pool_ratio);
591 }
592}
593
594double BinnedLRUCache::GetHighPriPoolRatio() const {
595 double result = 0.0;
596 if (num_shards_ > 0) {
597 result = shards_[0].GetHighPriPoolRatio();
598 }
599 return result;
600}
601
602size_t BinnedLRUCache::GetHighPriPoolUsage() const {
603 // We will not lock the cache when getting the usage from shards.
604 size_t usage = 0;
605 for (int s = 0; s < num_shards_; s++) {
606 usage += shards_[s].GetHighPriPoolUsage();
607 }
608 return usage;
609}
610
11fdf7f2
TL
611// PriCache
612
613int64_t BinnedLRUCache::request_cache_bytes(PriorityCache::Priority pri, uint64_t total_cache) const
614{
615 int64_t assigned = get_cache_bytes(pri);
616 int64_t request = 0;
617
20effc67 618 switch(pri) {
11fdf7f2
TL
619 // PRI0 is for rocksdb's high priority items (indexes/filters)
620 case PriorityCache::Priority::PRI0:
621 {
20effc67
TL
622 // Because we want the high pri cache to grow independently of the low
623 // pri cache, request a chunky allocation independent of the other
624 // priorities.
625 request = PriorityCache::get_chunk(GetHighPriPoolUsage(), total_cache);
11fdf7f2
TL
626 break;
627 }
20effc67 628 case PriorityCache::Priority::LAST:
11fdf7f2 629 {
20effc67 630 auto max = get_bin_count();
11fdf7f2
TL
631 request = GetUsage();
632 request -= GetHighPriPoolUsage();
20effc67 633 request -= sum_bins(0, max);
11fdf7f2
TL
634 break;
635 }
636 default:
20effc67
TL
637 {
638 ceph_assert(pri > 0 && pri < PriorityCache::Priority::LAST);
639 auto prev_pri = static_cast<PriorityCache::Priority>(pri - 1);
640 uint64_t start = get_bins(prev_pri);
641 uint64_t end = get_bins(pri);
642 request = sum_bins(start, end);
643 break;
644 }
11fdf7f2
TL
645 }
646 request = (request > assigned) ? request - assigned : 0;
647 ldout(cct, 10) << __func__ << " Priority: " << static_cast<uint32_t>(pri)
648 << " Request: " << request << dendl;
649 return request;
650}
651
652int64_t BinnedLRUCache::commit_cache_size(uint64_t total_bytes)
653{
654 size_t old_bytes = GetCapacity();
655 int64_t new_bytes = PriorityCache::get_chunk(
656 get_cache_bytes(), total_bytes);
657 ldout(cct, 10) << __func__ << " old: " << old_bytes
658 << " new: " << new_bytes << dendl;
659 SetCapacity((size_t) new_bytes);
eafe8130
TL
660
661 double ratio = 0;
662 if (new_bytes > 0) {
663 int64_t pri0_bytes = get_cache_bytes(PriorityCache::Priority::PRI0);
eafe8130
TL
664 ratio = (double) pri0_bytes / new_bytes;
665 }
20effc67 666 ldout(cct, 5) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
11fdf7f2
TL
667 SetHighPriPoolRatio(ratio);
668 return new_bytes;
669}
670
20effc67
TL
671void BinnedLRUCache::shift_bins() {
672 for (int s = 0; s < num_shards_; s++) {
673 shards_[s].shift_bins();
674 }
675}
676
677uint64_t BinnedLRUCache::sum_bins(uint32_t start, uint32_t end) const {
678 uint64_t bytes = 0;
679 for (int s = 0; s < num_shards_; s++) {
680 bytes += shards_[s].sum_bins(start, end);
681 }
682 return bytes;
683}
684
685uint32_t BinnedLRUCache::get_bin_count() const {
686 uint32_t result = 0;
687 if (num_shards_ > 0) {
688 result = shards_[0].get_bin_count();
689 }
690 return result;
691}
692
693void BinnedLRUCache::set_bin_count(uint32_t count) {
694 for (int s = 0; s < num_shards_; s++) {
695 shards_[s].set_bin_count(count);
696 }
697}
698
11fdf7f2
TL
699std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache(
700 CephContext *c,
701 size_t capacity,
702 int num_shard_bits,
703 bool strict_capacity_limit,
704 double high_pri_pool_ratio) {
91327a77
AA
705 if (num_shard_bits >= 20) {
706 return nullptr; // the cache cannot be sharded into too many fine pieces
707 }
708 if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
709 // invalid high_pri_pool_ratio
710 return nullptr;
711 }
712 if (num_shard_bits < 0) {
713 num_shard_bits = GetDefaultCacheShardBits(capacity);
714 }
11fdf7f2
TL
715 return std::make_shared<BinnedLRUCache>(
716 c, capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio);
91327a77
AA
717}
718
719} // namespace rocksdb_cache