]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/cache/lru_cache.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / cache / lru_cache.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef __STDC_FORMAT_MACROS
11 #define __STDC_FORMAT_MACROS
12 #endif
13
14 #include "cache/lru_cache.h"
15
16 #include <assert.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string>
20
21 #include "util/mutexlock.h"
22
23 namespace rocksdb {
24
25 LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
26 Resize();
27 }
28
29 LRUHandleTable::~LRUHandleTable() {
30 ApplyToAllCacheEntries([](LRUHandle* h) {
31 if (h->refs == 1) {
32 h->Free();
33 }
34 });
35 delete[] list_;
36 }
37
38 LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
39 return *FindPointer(key, hash);
40 }
41
42 LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
43 LRUHandle** ptr = FindPointer(h->key(), h->hash);
44 LRUHandle* old = *ptr;
45 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
46 *ptr = h;
47 if (old == nullptr) {
48 ++elems_;
49 if (elems_ > length_) {
50 // Since each cache entry is fairly large, we aim for a small
51 // average linked list length (<= 1).
52 Resize();
53 }
54 }
55 return old;
56 }
57
58 LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
59 LRUHandle** ptr = FindPointer(key, hash);
60 LRUHandle* result = *ptr;
61 if (result != nullptr) {
62 *ptr = result->next_hash;
63 --elems_;
64 }
65 return result;
66 }
67
68 LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
69 LRUHandle** ptr = &list_[hash & (length_ - 1)];
70 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
71 ptr = &(*ptr)->next_hash;
72 }
73 return ptr;
74 }
75
76 void LRUHandleTable::Resize() {
77 uint32_t new_length = 16;
78 while (new_length < elems_ * 1.5) {
79 new_length *= 2;
80 }
81 LRUHandle** new_list = new LRUHandle*[new_length];
82 memset(new_list, 0, sizeof(new_list[0]) * new_length);
83 uint32_t count = 0;
84 for (uint32_t i = 0; i < length_; i++) {
85 LRUHandle* h = list_[i];
86 while (h != nullptr) {
87 LRUHandle* next = h->next_hash;
88 uint32_t hash = h->hash;
89 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
90 h->next_hash = *ptr;
91 *ptr = h;
92 h = next;
93 count++;
94 }
95 }
96 assert(elems_ == count);
97 delete[] list_;
98 list_ = new_list;
99 length_ = new_length;
100 }
101
102 LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
103 double high_pri_pool_ratio)
104 : capacity_(0),
105 high_pri_pool_usage_(0),
106 strict_capacity_limit_(strict_capacity_limit),
107 high_pri_pool_ratio_(high_pri_pool_ratio),
108 high_pri_pool_capacity_(0),
109 usage_(0),
110 lru_usage_(0) {
111 // Make empty circular linked list
112 lru_.next = &lru_;
113 lru_.prev = &lru_;
114 lru_low_pri_ = &lru_;
115 SetCapacity(capacity);
116 }
117
118 LRUCacheShard::~LRUCacheShard() {}
119
120 bool LRUCacheShard::Unref(LRUHandle* e) {
121 assert(e->refs > 0);
122 e->refs--;
123 return e->refs == 0;
124 }
125
126 // Call deleter and free
127
128 void LRUCacheShard::EraseUnRefEntries() {
129 autovector<LRUHandle*> last_reference_list;
130 {
131 MutexLock l(&mutex_);
132 while (lru_.next != &lru_) {
133 LRUHandle* old = lru_.next;
134 assert(old->InCache());
135 assert(old->refs ==
136 1); // LRU list contains elements which may be evicted
137 LRU_Remove(old);
138 table_.Remove(old->key(), old->hash);
139 old->SetInCache(false);
140 Unref(old);
141 usage_ -= old->charge;
142 last_reference_list.push_back(old);
143 }
144 }
145
146 for (auto entry : last_reference_list) {
147 entry->Free();
148 }
149 }
150
151 void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
152 bool thread_safe) {
153 if (thread_safe) {
154 mutex_.Lock();
155 }
156 table_.ApplyToAllCacheEntries(
157 [callback](LRUHandle* h) { callback(h->value, h->charge); });
158 if (thread_safe) {
159 mutex_.Unlock();
160 }
161 }
162
163 void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
164 *lru = &lru_;
165 *lru_low_pri = lru_low_pri_;
166 }
167
168 size_t LRUCacheShard::TEST_GetLRUSize() {
169 LRUHandle* lru_handle = lru_.next;
170 size_t lru_size = 0;
171 while (lru_handle != &lru_) {
172 lru_size++;
173 lru_handle = lru_handle->next;
174 }
175 return lru_size;
176 }
177
178 double LRUCacheShard::GetHighPriPoolRatio() {
179 MutexLock l(&mutex_);
180 return high_pri_pool_ratio_;
181 }
182
183 void LRUCacheShard::LRU_Remove(LRUHandle* e) {
184 assert(e->next != nullptr);
185 assert(e->prev != nullptr);
186 if (lru_low_pri_ == e) {
187 lru_low_pri_ = e->prev;
188 }
189 e->next->prev = e->prev;
190 e->prev->next = e->next;
191 e->prev = e->next = nullptr;
192 lru_usage_ -= e->charge;
193 if (e->InHighPriPool()) {
194 assert(high_pri_pool_usage_ >= e->charge);
195 high_pri_pool_usage_ -= e->charge;
196 }
197 }
198
199 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
200 assert(e->next == nullptr);
201 assert(e->prev == nullptr);
202 if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
203 // Inset "e" to head of LRU list.
204 e->next = &lru_;
205 e->prev = lru_.prev;
206 e->prev->next = e;
207 e->next->prev = e;
208 e->SetInHighPriPool(true);
209 high_pri_pool_usage_ += e->charge;
210 MaintainPoolSize();
211 } else {
212 // Insert "e" to the head of low-pri pool. Note that when
213 // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
214 e->next = lru_low_pri_->next;
215 e->prev = lru_low_pri_;
216 e->prev->next = e;
217 e->next->prev = e;
218 e->SetInHighPriPool(false);
219 lru_low_pri_ = e;
220 }
221 lru_usage_ += e->charge;
222 }
223
224 void LRUCacheShard::MaintainPoolSize() {
225 while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
226 // Overflow last entry in high-pri pool to low-pri pool.
227 lru_low_pri_ = lru_low_pri_->next;
228 assert(lru_low_pri_ != &lru_);
229 lru_low_pri_->SetInHighPriPool(false);
230 high_pri_pool_usage_ -= lru_low_pri_->charge;
231 }
232 }
233
234 void LRUCacheShard::EvictFromLRU(size_t charge,
235 autovector<LRUHandle*>* deleted) {
236 while (usage_ + charge > capacity_ && lru_.next != &lru_) {
237 LRUHandle* old = lru_.next;
238 assert(old->InCache());
239 assert(old->refs == 1); // LRU list contains elements which may be evicted
240 LRU_Remove(old);
241 table_.Remove(old->key(), old->hash);
242 old->SetInCache(false);
243 Unref(old);
244 usage_ -= old->charge;
245 deleted->push_back(old);
246 }
247 }
248
249 void LRUCacheShard::SetCapacity(size_t capacity) {
250 autovector<LRUHandle*> last_reference_list;
251 {
252 MutexLock l(&mutex_);
253 capacity_ = capacity;
254 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
255 EvictFromLRU(0, &last_reference_list);
256 }
257 // we free the entries here outside of mutex for
258 // performance reasons
259 for (auto entry : last_reference_list) {
260 entry->Free();
261 }
262 }
263
264 void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
265 MutexLock l(&mutex_);
266 strict_capacity_limit_ = strict_capacity_limit;
267 }
268
269 Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
270 MutexLock l(&mutex_);
271 LRUHandle* e = table_.Lookup(key, hash);
272 if (e != nullptr) {
273 assert(e->InCache());
274 if (e->refs == 1) {
275 LRU_Remove(e);
276 }
277 e->refs++;
278 e->SetHit();
279 }
280 return reinterpret_cast<Cache::Handle*>(e);
281 }
282
283 bool LRUCacheShard::Ref(Cache::Handle* h) {
284 LRUHandle* handle = reinterpret_cast<LRUHandle*>(h);
285 MutexLock l(&mutex_);
286 if (handle->InCache() && handle->refs == 1) {
287 LRU_Remove(handle);
288 }
289 handle->refs++;
290 return true;
291 }
292
293 void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
294 MutexLock l(&mutex_);
295 high_pri_pool_ratio_ = high_pri_pool_ratio;
296 high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
297 MaintainPoolSize();
298 }
299
300 bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
301 if (handle == nullptr) {
302 return false;
303 }
304 LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
305 bool last_reference = false;
306 {
307 MutexLock l(&mutex_);
308 last_reference = Unref(e);
309 if (last_reference) {
310 usage_ -= e->charge;
311 }
312 if (e->refs == 1 && e->InCache()) {
313 // The item is still in cache, and nobody else holds a reference to it
314 if (usage_ > capacity_ || force_erase) {
315 // the cache is full
316 // The LRU list must be empty since the cache is full
317 assert(!(usage_ > capacity_) || lru_.next == &lru_);
318 // take this opportunity and remove the item
319 table_.Remove(e->key(), e->hash);
320 e->SetInCache(false);
321 Unref(e);
322 usage_ -= e->charge;
323 last_reference = true;
324 } else {
325 // put the item on the list to be potentially freed
326 LRU_Insert(e);
327 }
328 }
329 }
330
331 // free outside of mutex
332 if (last_reference) {
333 e->Free();
334 }
335 return last_reference;
336 }
337
338 Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
339 size_t charge,
340 void (*deleter)(const Slice& key, void* value),
341 Cache::Handle** handle, Cache::Priority priority) {
342 // Allocate the memory here outside of the mutex
343 // If the cache is full, we'll have to release it
344 // It shouldn't happen very often though.
345 LRUHandle* e = reinterpret_cast<LRUHandle*>(
346 new char[sizeof(LRUHandle) - 1 + key.size()]);
347 Status s;
348 autovector<LRUHandle*> last_reference_list;
349
350 e->value = value;
351 e->deleter = deleter;
352 e->charge = charge;
353 e->key_length = key.size();
354 e->flags = 0;
355 e->hash = hash;
356 e->refs = (handle == nullptr
357 ? 1
358 : 2); // One from LRUCache, one for the returned handle
359 e->next = e->prev = nullptr;
360 e->SetInCache(true);
361 e->SetPriority(priority);
362 memcpy(e->key_data, key.data(), key.size());
363
364 {
365 MutexLock l(&mutex_);
366
367 // Free the space following strict LRU policy until enough space
368 // is freed or the lru list is empty
369 EvictFromLRU(charge, &last_reference_list);
370
371 if (usage_ - lru_usage_ + charge > capacity_ &&
372 (strict_capacity_limit_ || handle == nullptr)) {
373 if (handle == nullptr) {
374 // Don't insert the entry but still return ok, as if the entry inserted
375 // into cache and get evicted immediately.
376 last_reference_list.push_back(e);
377 } else {
378 delete[] reinterpret_cast<char*>(e);
379 *handle = nullptr;
380 s = Status::Incomplete("Insert failed due to LRU cache being full.");
381 }
382 } else {
383 // insert into the cache
384 // note that the cache might get larger than its capacity if not enough
385 // space was freed
386 LRUHandle* old = table_.Insert(e);
387 usage_ += e->charge;
388 if (old != nullptr) {
389 old->SetInCache(false);
390 if (Unref(old)) {
391 usage_ -= old->charge;
392 // old is on LRU because it's in cache and its reference count
393 // was just 1 (Unref returned 0)
394 LRU_Remove(old);
395 last_reference_list.push_back(old);
396 }
397 }
398 if (handle == nullptr) {
399 LRU_Insert(e);
400 } else {
401 *handle = reinterpret_cast<Cache::Handle*>(e);
402 }
403 s = Status::OK();
404 }
405 }
406
407 // we free the entries here outside of mutex for
408 // performance reasons
409 for (auto entry : last_reference_list) {
410 entry->Free();
411 }
412
413 return s;
414 }
415
416 void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
417 LRUHandle* e;
418 bool last_reference = false;
419 {
420 MutexLock l(&mutex_);
421 e = table_.Remove(key, hash);
422 if (e != nullptr) {
423 last_reference = Unref(e);
424 if (last_reference) {
425 usage_ -= e->charge;
426 }
427 if (last_reference && e->InCache()) {
428 LRU_Remove(e);
429 }
430 e->SetInCache(false);
431 }
432 }
433
434 // mutex not held here
435 // last_reference will only be true if e != nullptr
436 if (last_reference) {
437 e->Free();
438 }
439 }
440
441 size_t LRUCacheShard::GetUsage() const {
442 MutexLock l(&mutex_);
443 return usage_;
444 }
445
446 size_t LRUCacheShard::GetPinnedUsage() const {
447 MutexLock l(&mutex_);
448 assert(usage_ >= lru_usage_);
449 return usage_ - lru_usage_;
450 }
451
452 std::string LRUCacheShard::GetPrintableOptions() const {
453 const int kBufferSize = 200;
454 char buffer[kBufferSize];
455 {
456 MutexLock l(&mutex_);
457 snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
458 high_pri_pool_ratio_);
459 }
460 return std::string(buffer);
461 }
462
463 LRUCache::LRUCache(size_t capacity, int num_shard_bits,
464 bool strict_capacity_limit, double high_pri_pool_ratio)
465 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
466 num_shards_ = 1 << num_shard_bits;
467 shards_ = reinterpret_cast<LRUCacheShard*>(
468 port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
469 size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
470 for (int i = 0; i < num_shards_; i++) {
471 new (&shards_[i])
472 LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio);
473 }
474 }
475
476 LRUCache::~LRUCache() {
477 if (shards_ != nullptr) {
478 assert(num_shards_ > 0);
479 for (int i = 0; i < num_shards_; i++) {
480 shards_[i].~LRUCacheShard();
481 }
482 port::cacheline_aligned_free(shards_);
483 }
484 }
485
486 CacheShard* LRUCache::GetShard(int shard) {
487 return reinterpret_cast<CacheShard*>(&shards_[shard]);
488 }
489
490 const CacheShard* LRUCache::GetShard(int shard) const {
491 return reinterpret_cast<CacheShard*>(&shards_[shard]);
492 }
493
494 void* LRUCache::Value(Handle* handle) {
495 return reinterpret_cast<const LRUHandle*>(handle)->value;
496 }
497
498 size_t LRUCache::GetCharge(Handle* handle) const {
499 return reinterpret_cast<const LRUHandle*>(handle)->charge;
500 }
501
502 uint32_t LRUCache::GetHash(Handle* handle) const {
503 return reinterpret_cast<const LRUHandle*>(handle)->hash;
504 }
505
506 void LRUCache::DisownData() {
507 // Do not drop data if compile with ASAN to suppress leak warning.
508 #if defined(__clang__)
509 #if !defined(__has_feature) || !__has_feature(address_sanitizer)
510 shards_ = nullptr;
511 num_shards_ = 0;
512 #endif
513 #else // __clang__
514 #ifndef __SANITIZE_ADDRESS__
515 shards_ = nullptr;
516 num_shards_ = 0;
517 #endif // !__SANITIZE_ADDRESS__
518 #endif // __clang__
519 }
520
521 size_t LRUCache::TEST_GetLRUSize() {
522 size_t lru_size_of_all_shards = 0;
523 for (int i = 0; i < num_shards_; i++) {
524 lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
525 }
526 return lru_size_of_all_shards;
527 }
528
529 double LRUCache::GetHighPriPoolRatio() {
530 double result = 0.0;
531 if (num_shards_ > 0) {
532 result = shards_[0].GetHighPriPoolRatio();
533 }
534 return result;
535 }
536
537 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
538 return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
539 cache_opts.strict_capacity_limit,
540 cache_opts.high_pri_pool_ratio);
541 }
542
543 std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
544 bool strict_capacity_limit,
545 double high_pri_pool_ratio) {
546 if (num_shard_bits >= 20) {
547 return nullptr; // the cache cannot be sharded into too many fine pieces
548 }
549 if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
550 // invalid high_pri_pool_ratio
551 return nullptr;
552 }
553 if (num_shard_bits < 0) {
554 num_shard_bits = GetDefaultCacheShardBits(capacity);
555 }
556 return std::make_shared<LRUCache>(capacity, num_shard_bits,
557 strict_capacity_limit, high_pri_pool_ratio);
558 }
559
560 } // namespace rocksdb