]>
Commit | Line | Data |
---|---|---|
91327a77 AA |
1 | // Copyright (c) 2018-Present Red Hat Inc. All rights reserved. |
2 | // | |
3 | // Copyright (c) 2011-2018, Facebook, Inc. All rights reserved. | |
4 | // This source code is licensed under both the GPLv2 and Apache 2.0 License | |
5 | // | |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #ifndef __STDC_FORMAT_MACROS | |
11 | #define __STDC_FORMAT_MACROS | |
12 | #endif | |
13 | ||
14 | #include "BinnedLRUCache.h" | |
15 | ||
91327a77 AA |
16 | #include <stdio.h> |
17 | #include <stdlib.h> | |
18 | #include <string> | |
19 | ||
11fdf7f2 TL |
20 | #define dout_context cct |
21 | #define dout_subsys ceph_subsys_rocksdb | |
22 | #undef dout_prefix | |
23 | #define dout_prefix *_dout << "rocksdb: " | |
24 | ||
91327a77 AA |
25 | namespace rocksdb_cache { |
26 | ||
27 | BinnedLRUHandleTable::BinnedLRUHandleTable() : list_(nullptr), length_(0), elems_(0) { | |
28 | Resize(); | |
29 | } | |
30 | ||
31 | BinnedLRUHandleTable::~BinnedLRUHandleTable() { | |
32 | ApplyToAllCacheEntries([](BinnedLRUHandle* h) { | |
33 | if (h->refs == 1) { | |
34 | h->Free(); | |
35 | } | |
36 | }); | |
37 | delete[] list_; | |
38 | } | |
39 | ||
40 | BinnedLRUHandle* BinnedLRUHandleTable::Lookup(const rocksdb::Slice& key, uint32_t hash) { | |
41 | return *FindPointer(key, hash); | |
42 | } | |
43 | ||
44 | BinnedLRUHandle* BinnedLRUHandleTable::Insert(BinnedLRUHandle* h) { | |
45 | BinnedLRUHandle** ptr = FindPointer(h->key(), h->hash); | |
46 | BinnedLRUHandle* old = *ptr; | |
47 | h->next_hash = (old == nullptr ? nullptr : old->next_hash); | |
48 | *ptr = h; | |
49 | if (old == nullptr) { | |
50 | ++elems_; | |
51 | if (elems_ > length_) { | |
52 | // Since each cache entry is fairly large, we aim for a small | |
53 | // average linked list length (<= 1). | |
54 | Resize(); | |
55 | } | |
56 | } | |
57 | return old; | |
58 | } | |
59 | ||
60 | BinnedLRUHandle* BinnedLRUHandleTable::Remove(const rocksdb::Slice& key, uint32_t hash) { | |
61 | BinnedLRUHandle** ptr = FindPointer(key, hash); | |
62 | BinnedLRUHandle* result = *ptr; | |
63 | if (result != nullptr) { | |
64 | *ptr = result->next_hash; | |
65 | --elems_; | |
66 | } | |
67 | return result; | |
68 | } | |
69 | ||
70 | BinnedLRUHandle** BinnedLRUHandleTable::FindPointer(const rocksdb::Slice& key, uint32_t hash) { | |
71 | BinnedLRUHandle** ptr = &list_[hash & (length_ - 1)]; | |
72 | while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { | |
73 | ptr = &(*ptr)->next_hash; | |
74 | } | |
75 | return ptr; | |
76 | } | |
77 | ||
78 | void BinnedLRUHandleTable::Resize() { | |
79 | uint32_t new_length = 16; | |
80 | while (new_length < elems_ * 1.5) { | |
81 | new_length *= 2; | |
82 | } | |
83 | BinnedLRUHandle** new_list = new BinnedLRUHandle*[new_length]; | |
84 | memset(new_list, 0, sizeof(new_list[0]) * new_length); | |
85 | uint32_t count = 0; | |
86 | for (uint32_t i = 0; i < length_; i++) { | |
87 | BinnedLRUHandle* h = list_[i]; | |
88 | while (h != nullptr) { | |
89 | BinnedLRUHandle* next = h->next_hash; | |
90 | uint32_t hash = h->hash; | |
91 | BinnedLRUHandle** ptr = &new_list[hash & (new_length - 1)]; | |
92 | h->next_hash = *ptr; | |
93 | *ptr = h; | |
94 | h = next; | |
95 | count++; | |
96 | } | |
97 | } | |
11fdf7f2 | 98 | ceph_assert(elems_ == count); |
91327a77 AA |
99 | delete[] list_; |
100 | list_ = new_list; | |
101 | length_ = new_length; | |
102 | } | |
103 | ||
f67539c2 | 104 | BinnedLRUCacheShard::BinnedLRUCacheShard(CephContext *c, size_t capacity, bool strict_capacity_limit, |
91327a77 | 105 | double high_pri_pool_ratio) |
f67539c2 TL |
106 | : cct(c), |
107 | capacity_(0), | |
91327a77 AA |
108 | high_pri_pool_usage_(0), |
109 | strict_capacity_limit_(strict_capacity_limit), | |
110 | high_pri_pool_ratio_(high_pri_pool_ratio), | |
111 | high_pri_pool_capacity_(0), | |
112 | usage_(0), | |
20effc67 TL |
113 | lru_usage_(0), |
114 | age_bins(1) { | |
115 | shift_bins(); | |
91327a77 AA |
116 | // Make empty circular linked list |
117 | lru_.next = &lru_; | |
118 | lru_.prev = &lru_; | |
119 | lru_low_pri_ = &lru_; | |
120 | SetCapacity(capacity); | |
121 | } | |
122 | ||
123 | BinnedLRUCacheShard::~BinnedLRUCacheShard() {} | |
124 | ||
125 | bool BinnedLRUCacheShard::Unref(BinnedLRUHandle* e) { | |
11fdf7f2 | 126 | ceph_assert(e->refs > 0); |
91327a77 AA |
127 | e->refs--; |
128 | return e->refs == 0; | |
129 | } | |
130 | ||
131 | // Call deleter and free | |
132 | ||
133 | void BinnedLRUCacheShard::EraseUnRefEntries() { | |
134 | ceph::autovector<BinnedLRUHandle*> last_reference_list; | |
135 | { | |
136 | std::lock_guard<std::mutex> l(mutex_); | |
137 | while (lru_.next != &lru_) { | |
138 | BinnedLRUHandle* old = lru_.next; | |
11fdf7f2 TL |
139 | ceph_assert(old->InCache()); |
140 | ceph_assert(old->refs == | |
91327a77 AA |
141 | 1); // LRU list contains elements which may be evicted |
142 | LRU_Remove(old); | |
143 | table_.Remove(old->key(), old->hash); | |
144 | old->SetInCache(false); | |
145 | Unref(old); | |
146 | usage_ -= old->charge; | |
147 | last_reference_list.push_back(old); | |
148 | } | |
149 | } | |
150 | ||
151 | for (auto entry : last_reference_list) { | |
152 | entry->Free(); | |
153 | } | |
154 | } | |
155 | ||
20effc67 TL |
156 | void BinnedLRUCacheShard::ApplyToAllCacheEntries( |
157 | const std::function<void(const rocksdb::Slice& key, | |
158 | void* value, | |
159 | size_t charge, | |
160 | DeleterFn)>& callback, | |
161 | bool thread_safe) | |
162 | { | |
91327a77 AA |
163 | if (thread_safe) { |
164 | mutex_.lock(); | |
165 | } | |
166 | table_.ApplyToAllCacheEntries( | |
20effc67 TL |
167 | [callback](BinnedLRUHandle* h) { |
168 | callback(h->key(), h->value, h->charge, h->deleter); | |
169 | }); | |
91327a77 AA |
170 | if (thread_safe) { |
171 | mutex_.unlock(); | |
172 | } | |
173 | } | |
174 | ||
175 | void BinnedLRUCacheShard::TEST_GetLRUList(BinnedLRUHandle** lru, BinnedLRUHandle** lru_low_pri) { | |
176 | *lru = &lru_; | |
177 | *lru_low_pri = lru_low_pri_; | |
178 | } | |
179 | ||
180 | size_t BinnedLRUCacheShard::TEST_GetLRUSize() { | |
181 | BinnedLRUHandle* lru_handle = lru_.next; | |
182 | size_t lru_size = 0; | |
183 | while (lru_handle != &lru_) { | |
184 | lru_size++; | |
185 | lru_handle = lru_handle->next; | |
186 | } | |
187 | return lru_size; | |
188 | } | |
189 | ||
190 | double BinnedLRUCacheShard::GetHighPriPoolRatio() const { | |
191 | std::lock_guard<std::mutex> l(mutex_); | |
192 | return high_pri_pool_ratio_; | |
193 | } | |
194 | ||
195 | size_t BinnedLRUCacheShard::GetHighPriPoolUsage() const { | |
196 | std::lock_guard<std::mutex> l(mutex_); | |
197 | return high_pri_pool_usage_; | |
198 | } | |
199 | ||
200 | void BinnedLRUCacheShard::LRU_Remove(BinnedLRUHandle* e) { | |
11fdf7f2 TL |
201 | ceph_assert(e->next != nullptr); |
202 | ceph_assert(e->prev != nullptr); | |
91327a77 AA |
203 | if (lru_low_pri_ == e) { |
204 | lru_low_pri_ = e->prev; | |
205 | } | |
206 | e->next->prev = e->prev; | |
207 | e->prev->next = e->next; | |
208 | e->prev = e->next = nullptr; | |
209 | lru_usage_ -= e->charge; | |
210 | if (e->InHighPriPool()) { | |
11fdf7f2 | 211 | ceph_assert(high_pri_pool_usage_ >= e->charge); |
91327a77 | 212 | high_pri_pool_usage_ -= e->charge; |
20effc67 TL |
213 | } else { |
214 | ceph_assert(*(e->age_bin) >= e->charge); | |
215 | *(e->age_bin) -= e->charge; | |
91327a77 AA |
216 | } |
217 | } | |
218 | ||
219 | void BinnedLRUCacheShard::LRU_Insert(BinnedLRUHandle* e) { | |
11fdf7f2 TL |
220 | ceph_assert(e->next == nullptr); |
221 | ceph_assert(e->prev == nullptr); | |
20effc67 TL |
222 | e->age_bin = age_bins.front(); |
223 | ||
91327a77 AA |
224 | if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) { |
225 | // Inset "e" to head of LRU list. | |
226 | e->next = &lru_; | |
227 | e->prev = lru_.prev; | |
228 | e->prev->next = e; | |
229 | e->next->prev = e; | |
230 | e->SetInHighPriPool(true); | |
231 | high_pri_pool_usage_ += e->charge; | |
232 | MaintainPoolSize(); | |
233 | } else { | |
234 | // Insert "e" to the head of low-pri pool. Note that when | |
235 | // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list. | |
236 | e->next = lru_low_pri_->next; | |
237 | e->prev = lru_low_pri_; | |
238 | e->prev->next = e; | |
239 | e->next->prev = e; | |
240 | e->SetInHighPriPool(false); | |
241 | lru_low_pri_ = e; | |
20effc67 | 242 | *(e->age_bin) += e->charge; |
91327a77 AA |
243 | } |
244 | lru_usage_ += e->charge; | |
245 | } | |
246 | ||
20effc67 TL |
247 | uint64_t BinnedLRUCacheShard::sum_bins(uint32_t start, uint32_t end) const { |
248 | std::lock_guard<std::mutex> l(mutex_); | |
249 | auto size = age_bins.size(); | |
250 | if (size < start) { | |
251 | return 0; | |
252 | } | |
253 | uint64_t bytes = 0; | |
254 | end = (size < end) ? size : end; | |
255 | for (auto i = start; i < end; i++) { | |
256 | bytes += *(age_bins[i]); | |
257 | } | |
258 | return bytes; | |
259 | } | |
260 | ||
91327a77 AA |
261 | void BinnedLRUCacheShard::MaintainPoolSize() { |
262 | while (high_pri_pool_usage_ > high_pri_pool_capacity_) { | |
263 | // Overflow last entry in high-pri pool to low-pri pool. | |
264 | lru_low_pri_ = lru_low_pri_->next; | |
11fdf7f2 | 265 | ceph_assert(lru_low_pri_ != &lru_); |
91327a77 AA |
266 | lru_low_pri_->SetInHighPriPool(false); |
267 | high_pri_pool_usage_ -= lru_low_pri_->charge; | |
20effc67 | 268 | *(lru_low_pri_->age_bin) += lru_low_pri_->charge; |
91327a77 AA |
269 | } |
270 | } | |
271 | ||
272 | void BinnedLRUCacheShard::EvictFromLRU(size_t charge, | |
273 | ceph::autovector<BinnedLRUHandle*>* deleted) { | |
274 | while (usage_ + charge > capacity_ && lru_.next != &lru_) { | |
275 | BinnedLRUHandle* old = lru_.next; | |
11fdf7f2 TL |
276 | ceph_assert(old->InCache()); |
277 | ceph_assert(old->refs == 1); // LRU list contains elements which may be evicted | |
91327a77 AA |
278 | LRU_Remove(old); |
279 | table_.Remove(old->key(), old->hash); | |
280 | old->SetInCache(false); | |
281 | Unref(old); | |
282 | usage_ -= old->charge; | |
283 | deleted->push_back(old); | |
284 | } | |
285 | } | |
286 | ||
287 | void BinnedLRUCacheShard::SetCapacity(size_t capacity) { | |
288 | ceph::autovector<BinnedLRUHandle*> last_reference_list; | |
289 | { | |
290 | std::lock_guard<std::mutex> l(mutex_); | |
291 | capacity_ = capacity; | |
292 | high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_; | |
293 | EvictFromLRU(0, &last_reference_list); | |
294 | } | |
295 | // we free the entries here outside of mutex for | |
296 | // performance reasons | |
297 | for (auto entry : last_reference_list) { | |
298 | entry->Free(); | |
299 | } | |
300 | } | |
301 | ||
302 | void BinnedLRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { | |
303 | std::lock_guard<std::mutex> l(mutex_); | |
304 | strict_capacity_limit_ = strict_capacity_limit; | |
305 | } | |
306 | ||
307 | rocksdb::Cache::Handle* BinnedLRUCacheShard::Lookup(const rocksdb::Slice& key, uint32_t hash) { | |
308 | std::lock_guard<std::mutex> l(mutex_); | |
309 | BinnedLRUHandle* e = table_.Lookup(key, hash); | |
310 | if (e != nullptr) { | |
11fdf7f2 | 311 | ceph_assert(e->InCache()); |
91327a77 AA |
312 | if (e->refs == 1) { |
313 | LRU_Remove(e); | |
314 | } | |
315 | e->refs++; | |
316 | e->SetHit(); | |
317 | } | |
318 | return reinterpret_cast<rocksdb::Cache::Handle*>(e); | |
319 | } | |
320 | ||
321 | bool BinnedLRUCacheShard::Ref(rocksdb::Cache::Handle* h) { | |
322 | BinnedLRUHandle* handle = reinterpret_cast<BinnedLRUHandle*>(h); | |
323 | std::lock_guard<std::mutex> l(mutex_); | |
324 | if (handle->InCache() && handle->refs == 1) { | |
325 | LRU_Remove(handle); | |
326 | } | |
327 | handle->refs++; | |
328 | return true; | |
329 | } | |
330 | ||
331 | void BinnedLRUCacheShard::SetHighPriPoolRatio(double high_pri_pool_ratio) { | |
332 | std::lock_guard<std::mutex> l(mutex_); | |
333 | high_pri_pool_ratio_ = high_pri_pool_ratio; | |
334 | high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_; | |
335 | MaintainPoolSize(); | |
336 | } | |
337 | ||
338 | bool BinnedLRUCacheShard::Release(rocksdb::Cache::Handle* handle, bool force_erase) { | |
339 | if (handle == nullptr) { | |
340 | return false; | |
341 | } | |
342 | BinnedLRUHandle* e = reinterpret_cast<BinnedLRUHandle*>(handle); | |
343 | bool last_reference = false; | |
344 | { | |
345 | std::lock_guard<std::mutex> l(mutex_); | |
346 | last_reference = Unref(e); | |
347 | if (last_reference) { | |
348 | usage_ -= e->charge; | |
349 | } | |
350 | if (e->refs == 1 && e->InCache()) { | |
351 | // The item is still in cache, and nobody else holds a reference to it | |
352 | if (usage_ > capacity_ || force_erase) { | |
353 | // the cache is full | |
354 | // The LRU list must be empty since the cache is full | |
11fdf7f2 | 355 | ceph_assert(!(usage_ > capacity_) || lru_.next == &lru_); |
91327a77 AA |
356 | // take this opportunity and remove the item |
357 | table_.Remove(e->key(), e->hash); | |
358 | e->SetInCache(false); | |
359 | Unref(e); | |
360 | usage_ -= e->charge; | |
361 | last_reference = true; | |
362 | } else { | |
363 | // put the item on the list to be potentially freed | |
364 | LRU_Insert(e); | |
365 | } | |
366 | } | |
367 | } | |
368 | ||
369 | // free outside of mutex | |
370 | if (last_reference) { | |
371 | e->Free(); | |
372 | } | |
373 | return last_reference; | |
374 | } | |
375 | ||
376 | rocksdb::Status BinnedLRUCacheShard::Insert(const rocksdb::Slice& key, uint32_t hash, void* value, | |
377 | size_t charge, | |
20effc67 | 378 | DeleterFn deleter, |
91327a77 | 379 | rocksdb::Cache::Handle** handle, rocksdb::Cache::Priority priority) { |
11fdf7f2 | 380 | auto e = new BinnedLRUHandle(); |
91327a77 AA |
381 | rocksdb::Status s; |
382 | ceph::autovector<BinnedLRUHandle*> last_reference_list; | |
383 | ||
384 | e->value = value; | |
385 | e->deleter = deleter; | |
386 | e->charge = charge; | |
387 | e->key_length = key.size(); | |
11fdf7f2 | 388 | e->key_data = new char[e->key_length]; |
91327a77 AA |
389 | e->flags = 0; |
390 | e->hash = hash; | |
391 | e->refs = (handle == nullptr | |
392 | ? 1 | |
393 | : 2); // One from BinnedLRUCache, one for the returned handle | |
394 | e->next = e->prev = nullptr; | |
395 | e->SetInCache(true); | |
396 | e->SetPriority(priority); | |
11fdf7f2 | 397 | std::copy_n(key.data(), e->key_length, e->key_data); |
91327a77 AA |
398 | |
399 | { | |
400 | std::lock_guard<std::mutex> l(mutex_); | |
401 | // Free the space following strict LRU policy until enough space | |
402 | // is freed or the lru list is empty | |
403 | EvictFromLRU(charge, &last_reference_list); | |
404 | ||
405 | if (usage_ - lru_usage_ + charge > capacity_ && | |
406 | (strict_capacity_limit_ || handle == nullptr)) { | |
407 | if (handle == nullptr) { | |
408 | // Don't insert the entry but still return ok, as if the entry inserted | |
409 | // into cache and get evicted immediately. | |
410 | last_reference_list.push_back(e); | |
411 | } else { | |
11fdf7f2 | 412 | delete e; |
91327a77 AA |
413 | *handle = nullptr; |
414 | s = rocksdb::Status::Incomplete("Insert failed due to LRU cache being full."); | |
415 | } | |
416 | } else { | |
417 | // insert into the cache | |
418 | // note that the cache might get larger than its capacity if not enough | |
419 | // space was freed | |
420 | BinnedLRUHandle* old = table_.Insert(e); | |
421 | usage_ += e->charge; | |
422 | if (old != nullptr) { | |
423 | old->SetInCache(false); | |
424 | if (Unref(old)) { | |
425 | usage_ -= old->charge; | |
426 | // old is on LRU because it's in cache and its reference count | |
427 | // was just 1 (Unref returned 0) | |
428 | LRU_Remove(old); | |
429 | last_reference_list.push_back(old); | |
430 | } | |
431 | } | |
432 | if (handle == nullptr) { | |
433 | LRU_Insert(e); | |
434 | } else { | |
435 | *handle = reinterpret_cast<rocksdb::Cache::Handle*>(e); | |
436 | } | |
437 | s = rocksdb::Status::OK(); | |
438 | } | |
439 | } | |
440 | ||
441 | // we free the entries here outside of mutex for | |
442 | // performance reasons | |
443 | for (auto entry : last_reference_list) { | |
444 | entry->Free(); | |
445 | } | |
446 | ||
447 | return s; | |
448 | } | |
449 | ||
450 | void BinnedLRUCacheShard::Erase(const rocksdb::Slice& key, uint32_t hash) { | |
451 | BinnedLRUHandle* e; | |
452 | bool last_reference = false; | |
453 | { | |
454 | std::lock_guard<std::mutex> l(mutex_); | |
455 | e = table_.Remove(key, hash); | |
456 | if (e != nullptr) { | |
457 | last_reference = Unref(e); | |
458 | if (last_reference) { | |
459 | usage_ -= e->charge; | |
460 | } | |
461 | if (last_reference && e->InCache()) { | |
462 | LRU_Remove(e); | |
463 | } | |
464 | e->SetInCache(false); | |
465 | } | |
466 | } | |
467 | ||
468 | // mutex not held here | |
469 | // last_reference will only be true if e != nullptr | |
470 | if (last_reference) { | |
471 | e->Free(); | |
472 | } | |
473 | } | |
474 | ||
475 | size_t BinnedLRUCacheShard::GetUsage() const { | |
476 | std::lock_guard<std::mutex> l(mutex_); | |
477 | return usage_; | |
478 | } | |
479 | ||
480 | size_t BinnedLRUCacheShard::GetPinnedUsage() const { | |
481 | std::lock_guard<std::mutex> l(mutex_); | |
11fdf7f2 | 482 | ceph_assert(usage_ >= lru_usage_); |
91327a77 AA |
483 | return usage_ - lru_usage_; |
484 | } | |
485 | ||
20effc67 TL |
486 | void BinnedLRUCacheShard::shift_bins() { |
487 | std::lock_guard<std::mutex> l(mutex_); | |
488 | age_bins.push_front(std::make_shared<uint64_t>(0)); | |
489 | } | |
490 | ||
491 | uint32_t BinnedLRUCacheShard::get_bin_count() const { | |
492 | std::lock_guard<std::mutex> l(mutex_); | |
493 | return age_bins.capacity(); | |
494 | } | |
495 | ||
496 | void BinnedLRUCacheShard::set_bin_count(uint32_t count) { | |
497 | std::lock_guard<std::mutex> l(mutex_); | |
498 | age_bins.set_capacity(count); | |
499 | } | |
500 | ||
91327a77 AA |
501 | std::string BinnedLRUCacheShard::GetPrintableOptions() const { |
502 | const int kBufferSize = 200; | |
503 | char buffer[kBufferSize]; | |
504 | { | |
505 | std::lock_guard<std::mutex> l(mutex_); | |
506 | snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n", | |
507 | high_pri_pool_ratio_); | |
508 | } | |
509 | return std::string(buffer); | |
510 | } | |
511 | ||
20effc67 TL |
512 | DeleterFn BinnedLRUCacheShard::GetDeleter(rocksdb::Cache::Handle* h) const |
513 | { | |
514 | auto* handle = reinterpret_cast<BinnedLRUHandle*>(h); | |
515 | return handle->deleter; | |
516 | } | |
517 | ||
11fdf7f2 TL |
518 | BinnedLRUCache::BinnedLRUCache(CephContext *c, |
519 | size_t capacity, | |
520 | int num_shard_bits, | |
521 | bool strict_capacity_limit, | |
522 | double high_pri_pool_ratio) | |
523 | : ShardedCache(capacity, num_shard_bits, strict_capacity_limit), cct(c) { | |
91327a77 AA |
524 | num_shards_ = 1 << num_shard_bits; |
525 | // TODO: Switch over to use mempool | |
526 | int rc = posix_memalign((void**) &shards_, | |
527 | CACHE_LINE_SIZE, | |
528 | sizeof(BinnedLRUCacheShard) * num_shards_); | |
529 | if (rc != 0) { | |
530 | throw std::bad_alloc(); | |
531 | } | |
532 | size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; | |
533 | for (int i = 0; i < num_shards_; i++) { | |
534 | new (&shards_[i]) | |
f67539c2 | 535 | BinnedLRUCacheShard(c, per_shard, strict_capacity_limit, high_pri_pool_ratio); |
91327a77 AA |
536 | } |
537 | } | |
538 | ||
539 | BinnedLRUCache::~BinnedLRUCache() { | |
540 | for (int i = 0; i < num_shards_; i++) { | |
541 | shards_[i].~BinnedLRUCacheShard(); | |
542 | } | |
f67539c2 | 543 | aligned_free(shards_); |
91327a77 AA |
544 | } |
545 | ||
546 | CacheShard* BinnedLRUCache::GetShard(int shard) { | |
547 | return reinterpret_cast<CacheShard*>(&shards_[shard]); | |
548 | } | |
549 | ||
550 | const CacheShard* BinnedLRUCache::GetShard(int shard) const { | |
551 | return reinterpret_cast<CacheShard*>(&shards_[shard]); | |
552 | } | |
553 | ||
554 | void* BinnedLRUCache::Value(Handle* handle) { | |
555 | return reinterpret_cast<const BinnedLRUHandle*>(handle)->value; | |
556 | } | |
557 | ||
558 | size_t BinnedLRUCache::GetCharge(Handle* handle) const { | |
559 | return reinterpret_cast<const BinnedLRUHandle*>(handle)->charge; | |
560 | } | |
561 | ||
562 | uint32_t BinnedLRUCache::GetHash(Handle* handle) const { | |
563 | return reinterpret_cast<const BinnedLRUHandle*>(handle)->hash; | |
564 | } | |
565 | ||
566 | void BinnedLRUCache::DisownData() { | |
567 | // Do not drop data if compile with ASAN to suppress leak warning. | |
568 | #ifndef __SANITIZE_ADDRESS__ | |
569 | shards_ = nullptr; | |
570 | #endif // !__SANITIZE_ADDRESS__ | |
571 | } | |
572 | ||
33c7a0ef | 573 | #if (ROCKSDB_MAJOR >= 7 || (ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22)) |
20effc67 TL |
574 | DeleterFn BinnedLRUCache::GetDeleter(Handle* handle) const |
575 | { | |
576 | return reinterpret_cast<const BinnedLRUHandle*>(handle)->deleter; | |
577 | } | |
578 | #endif | |
579 | ||
91327a77 AA |
580 | size_t BinnedLRUCache::TEST_GetLRUSize() { |
581 | size_t lru_size_of_all_shards = 0; | |
582 | for (int i = 0; i < num_shards_; i++) { | |
583 | lru_size_of_all_shards += shards_[i].TEST_GetLRUSize(); | |
584 | } | |
585 | return lru_size_of_all_shards; | |
586 | } | |
587 | ||
588 | void BinnedLRUCache::SetHighPriPoolRatio(double high_pri_pool_ratio) { | |
589 | for (int i = 0; i < num_shards_; i++) { | |
590 | shards_[i].SetHighPriPoolRatio(high_pri_pool_ratio); | |
591 | } | |
592 | } | |
593 | ||
594 | double BinnedLRUCache::GetHighPriPoolRatio() const { | |
595 | double result = 0.0; | |
596 | if (num_shards_ > 0) { | |
597 | result = shards_[0].GetHighPriPoolRatio(); | |
598 | } | |
599 | return result; | |
600 | } | |
601 | ||
602 | size_t BinnedLRUCache::GetHighPriPoolUsage() const { | |
603 | // We will not lock the cache when getting the usage from shards. | |
604 | size_t usage = 0; | |
605 | for (int s = 0; s < num_shards_; s++) { | |
606 | usage += shards_[s].GetHighPriPoolUsage(); | |
607 | } | |
608 | return usage; | |
609 | } | |
610 | ||
11fdf7f2 TL |
611 | // PriCache |
612 | ||
613 | int64_t BinnedLRUCache::request_cache_bytes(PriorityCache::Priority pri, uint64_t total_cache) const | |
614 | { | |
615 | int64_t assigned = get_cache_bytes(pri); | |
616 | int64_t request = 0; | |
617 | ||
20effc67 | 618 | switch(pri) { |
11fdf7f2 TL |
619 | // PRI0 is for rocksdb's high priority items (indexes/filters) |
620 | case PriorityCache::Priority::PRI0: | |
621 | { | |
20effc67 TL |
622 | // Because we want the high pri cache to grow independently of the low |
623 | // pri cache, request a chunky allocation independent of the other | |
624 | // priorities. | |
625 | request = PriorityCache::get_chunk(GetHighPriPoolUsage(), total_cache); | |
11fdf7f2 TL |
626 | break; |
627 | } | |
20effc67 | 628 | case PriorityCache::Priority::LAST: |
11fdf7f2 | 629 | { |
20effc67 | 630 | auto max = get_bin_count(); |
11fdf7f2 TL |
631 | request = GetUsage(); |
632 | request -= GetHighPriPoolUsage(); | |
20effc67 | 633 | request -= sum_bins(0, max); |
11fdf7f2 TL |
634 | break; |
635 | } | |
636 | default: | |
20effc67 TL |
637 | { |
638 | ceph_assert(pri > 0 && pri < PriorityCache::Priority::LAST); | |
639 | auto prev_pri = static_cast<PriorityCache::Priority>(pri - 1); | |
640 | uint64_t start = get_bins(prev_pri); | |
641 | uint64_t end = get_bins(pri); | |
642 | request = sum_bins(start, end); | |
643 | break; | |
644 | } | |
11fdf7f2 TL |
645 | } |
646 | request = (request > assigned) ? request - assigned : 0; | |
647 | ldout(cct, 10) << __func__ << " Priority: " << static_cast<uint32_t>(pri) | |
648 | << " Request: " << request << dendl; | |
649 | return request; | |
650 | } | |
651 | ||
652 | int64_t BinnedLRUCache::commit_cache_size(uint64_t total_bytes) | |
653 | { | |
654 | size_t old_bytes = GetCapacity(); | |
655 | int64_t new_bytes = PriorityCache::get_chunk( | |
656 | get_cache_bytes(), total_bytes); | |
657 | ldout(cct, 10) << __func__ << " old: " << old_bytes | |
658 | << " new: " << new_bytes << dendl; | |
659 | SetCapacity((size_t) new_bytes); | |
eafe8130 TL |
660 | |
661 | double ratio = 0; | |
662 | if (new_bytes > 0) { | |
663 | int64_t pri0_bytes = get_cache_bytes(PriorityCache::Priority::PRI0); | |
eafe8130 TL |
664 | ratio = (double) pri0_bytes / new_bytes; |
665 | } | |
20effc67 | 666 | ldout(cct, 5) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl; |
11fdf7f2 TL |
667 | SetHighPriPoolRatio(ratio); |
668 | return new_bytes; | |
669 | } | |
670 | ||
20effc67 TL |
671 | void BinnedLRUCache::shift_bins() { |
672 | for (int s = 0; s < num_shards_; s++) { | |
673 | shards_[s].shift_bins(); | |
674 | } | |
675 | } | |
676 | ||
677 | uint64_t BinnedLRUCache::sum_bins(uint32_t start, uint32_t end) const { | |
678 | uint64_t bytes = 0; | |
679 | for (int s = 0; s < num_shards_; s++) { | |
680 | bytes += shards_[s].sum_bins(start, end); | |
681 | } | |
682 | return bytes; | |
683 | } | |
684 | ||
685 | uint32_t BinnedLRUCache::get_bin_count() const { | |
686 | uint32_t result = 0; | |
687 | if (num_shards_ > 0) { | |
688 | result = shards_[0].get_bin_count(); | |
689 | } | |
690 | return result; | |
691 | } | |
692 | ||
693 | void BinnedLRUCache::set_bin_count(uint32_t count) { | |
694 | for (int s = 0; s < num_shards_; s++) { | |
695 | shards_[s].set_bin_count(count); | |
696 | } | |
697 | } | |
698 | ||
11fdf7f2 TL |
699 | std::shared_ptr<rocksdb::Cache> NewBinnedLRUCache( |
700 | CephContext *c, | |
701 | size_t capacity, | |
702 | int num_shard_bits, | |
703 | bool strict_capacity_limit, | |
704 | double high_pri_pool_ratio) { | |
91327a77 AA |
705 | if (num_shard_bits >= 20) { |
706 | return nullptr; // the cache cannot be sharded into too many fine pieces | |
707 | } | |
708 | if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) { | |
709 | // invalid high_pri_pool_ratio | |
710 | return nullptr; | |
711 | } | |
712 | if (num_shard_bits < 0) { | |
713 | num_shard_bits = GetDefaultCacheShardBits(capacity); | |
714 | } | |
11fdf7f2 TL |
715 | return std::make_shared<BinnedLRUCache>( |
716 | c, capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio); | |
91327a77 AA |
717 | } |
718 | ||
719 | } // namespace rocksdb_cache |