]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/cache/lru_cache.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / cache / lru_cache.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9#pragma once
10
11#include <string>
12
13#include "cache/sharded_cache.h"
14
15#include "port/port.h"
16#include "util/autovector.h"
17
18namespace rocksdb {
19
20// LRU cache implementation
21
22// An entry is a variable length heap-allocated structure.
23// Entries are referenced by cache and/or by any external entity.
24// The cache keeps all its entries in table. Some elements
25// are also stored on LRU list.
26//
27// LRUHandle can be in these states:
28// 1. Referenced externally AND in hash table.
29// In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
30// 2. Not referenced externally and in hash table. In that case the entry is
31// in the LRU and can be freed. (refs == 1 && in_cache == true)
32// 3. Referenced externally and not in hash table. In that case the entry is
33// in not on LRU and not in table. (refs >= 1 && in_cache == false)
34//
35// All newly created LRUHandles are in state 1. If you call
36// LRUCacheShard::Release
37// on entry in state 1, it will go into state 2. To move from state 1 to
38// state 3, either call LRUCacheShard::Erase or LRUCacheShard::Insert with the
39// same key.
40// To move from state 2 to state 1, use LRUCacheShard::Lookup.
41// Before destruction, make sure that no handles are in state 1. This means
42// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
43// matching
44// RUCache::Release (to move into state 2) or LRUCacheShard::Erase (for state 3)
45
46struct LRUHandle {
47 void* value;
48 void (*deleter)(const Slice&, void* value);
49 LRUHandle* next_hash;
50 LRUHandle* next;
51 LRUHandle* prev;
52 size_t charge; // TODO(opt): Only allow uint32_t?
53 size_t key_length;
54 uint32_t refs; // a number of refs to this entry
55 // cache itself is counted as 1
56
57 // Include the following flags:
58 // in_cache: whether this entry is referenced by the hash table.
59 // is_high_pri: whether this entry is high priority entry.
11fdf7f2 60 // in_high_pri_pool: whether this entry is in high-pri pool.
7c673cae
FG
61 char flags;
62
63 uint32_t hash; // Hash of key(); used for fast sharding and comparisons
64
65 char key_data[1]; // Beginning of key
66
67 Slice key() const {
68 // For cheaper lookups, we allow a temporary Handle object
69 // to store a pointer to a key in "value".
70 if (next == this) {
71 return *(reinterpret_cast<Slice*>(value));
72 } else {
73 return Slice(key_data, key_length);
74 }
75 }
76
77 bool InCache() { return flags & 1; }
78 bool IsHighPri() { return flags & 2; }
79 bool InHighPriPool() { return flags & 4; }
11fdf7f2 80 bool HasHit() { return flags & 8; }
7c673cae
FG
81
82 void SetInCache(bool in_cache) {
83 if (in_cache) {
84 flags |= 1;
85 } else {
86 flags &= ~1;
87 }
88 }
89
90 void SetPriority(Cache::Priority priority) {
91 if (priority == Cache::Priority::HIGH) {
92 flags |= 2;
93 } else {
94 flags &= ~2;
95 }
96 }
97
98 void SetInHighPriPool(bool in_high_pri_pool) {
99 if (in_high_pri_pool) {
100 flags |= 4;
101 } else {
102 flags &= ~4;
103 }
104 }
105
11fdf7f2
TL
106 void SetHit() { flags |= 8; }
107
7c673cae
FG
108 void Free() {
109 assert((refs == 1 && InCache()) || (refs == 0 && !InCache()));
110 if (deleter) {
111 (*deleter)(key(), value);
112 }
113 delete[] reinterpret_cast<char*>(this);
114 }
115};
116
117// We provide our own simple hash table since it removes a whole bunch
118// of porting hacks and is also faster than some of the built-in hash
119// table implementations in some of the compiler/runtime combinations
120// we have tested. E.g., readrandom speeds up by ~5% over the g++
121// 4.4.3's builtin hashtable.
122class LRUHandleTable {
123 public:
124 LRUHandleTable();
125 ~LRUHandleTable();
126
127 LRUHandle* Lookup(const Slice& key, uint32_t hash);
128 LRUHandle* Insert(LRUHandle* h);
129 LRUHandle* Remove(const Slice& key, uint32_t hash);
130
131 template <typename T>
132 void ApplyToAllCacheEntries(T func) {
133 for (uint32_t i = 0; i < length_; i++) {
134 LRUHandle* h = list_[i];
135 while (h != nullptr) {
136 auto n = h->next_hash;
137 assert(h->InCache());
138 func(h);
139 h = n;
140 }
141 }
142 }
143
144 private:
145 // Return a pointer to slot that points to a cache entry that
146 // matches key/hash. If there is no such cache entry, return a
147 // pointer to the trailing slot in the corresponding linked list.
148 LRUHandle** FindPointer(const Slice& key, uint32_t hash);
149
150 void Resize();
151
152 // The table consists of an array of buckets where each bucket is
153 // a linked list of cache entries that hash into the bucket.
11fdf7f2 154 LRUHandle** list_;
7c673cae
FG
155 uint32_t length_;
156 uint32_t elems_;
7c673cae
FG
157};
158
159// A single shard of sharded cache.
11fdf7f2 160class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
7c673cae 161 public:
11fdf7f2
TL
162 LRUCacheShard(size_t capacity, bool strict_capacity_limit,
163 double high_pri_pool_ratio);
7c673cae
FG
164 virtual ~LRUCacheShard();
165
166 // Separate from constructor so caller can easily make an array of LRUCache
167 // if current usage is more than new capacity, the function will attempt to
168 // free the needed space
169 virtual void SetCapacity(size_t capacity) override;
170
171 // Set the flag to reject insertion if cache if full.
172 virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
173
174 // Set percentage of capacity reserved for high-pri cache entries.
175 void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
176
177 // Like Cache methods, but with an extra "hash" parameter.
178 virtual Status Insert(const Slice& key, uint32_t hash, void* value,
179 size_t charge,
180 void (*deleter)(const Slice& key, void* value),
181 Cache::Handle** handle,
182 Cache::Priority priority) override;
183 virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
184 virtual bool Ref(Cache::Handle* handle) override;
185 virtual bool Release(Cache::Handle* handle,
186 bool force_erase = false) override;
187 virtual void Erase(const Slice& key, uint32_t hash) override;
188
189 // Although in some platforms the update of size_t is atomic, to make sure
190 // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
191 // protect them with mutex_.
192
193 virtual size_t GetUsage() const override;
194 virtual size_t GetPinnedUsage() const override;
195
196 virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
197 bool thread_safe) override;
198
199 virtual void EraseUnRefEntries() override;
200
201 virtual std::string GetPrintableOptions() const override;
202
203 void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);
204
11fdf7f2
TL
205 // Retrieves number of elements in LRU, for unit test purpose only
206 // not threadsafe
207 size_t TEST_GetLRUSize();
208
209 // Retrives high pri pool ratio
210 double GetHighPriPoolRatio();
211
7c673cae
FG
212 private:
213 void LRU_Remove(LRUHandle* e);
214 void LRU_Insert(LRUHandle* e);
215
216 // Overflow the last entry in high-pri pool to low-pri pool until size of
217 // high-pri pool is no larger than the size specify by high_pri_pool_pct.
218 void MaintainPoolSize();
219
220 // Just reduce the reference count by 1.
221 // Return true if last reference
222 bool Unref(LRUHandle* e);
223
224 // Free some space following strict LRU policy until enough space
225 // to hold (usage_ + charge) is freed or the lru list is empty
226 // This function is not thread safe - it needs to be executed while
227 // holding the mutex_
228 void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
229
230 // Initialized before use.
231 size_t capacity_;
232
7c673cae
FG
233 // Memory size for entries in high-pri pool.
234 size_t high_pri_pool_usage_;
235
236 // Whether to reject insertion if cache reaches its full capacity.
237 bool strict_capacity_limit_;
238
239 // Ratio of capacity reserved for high priority cache entries.
240 double high_pri_pool_ratio_;
241
242 // High-pri pool size, equals to capacity * high_pri_pool_ratio.
243 // Remember the value to avoid recomputing each time.
244 double high_pri_pool_capacity_;
245
7c673cae
FG
246 // Dummy head of LRU list.
247 // lru.prev is newest entry, lru.next is oldest entry.
248 // LRU contains items which can be evicted, ie reference only by cache
249 LRUHandle lru_;
250
251 // Pointer to head of low-pri pool in LRU list.
252 LRUHandle* lru_low_pri_;
253
11fdf7f2
TL
254 // ------------^^^^^^^^^^^^^-----------
255 // Not frequently modified data members
256 // ------------------------------------
257 //
258 // We separate data members that are updated frequently from the ones that
259 // are not frequently updated so that they don't share the same cache line
260 // which will lead into false cache sharing
261 //
262 // ------------------------------------
263 // Frequently modified data members
264 // ------------vvvvvvvvvvvvv-----------
7c673cae 265 LRUHandleTable table_;
11fdf7f2
TL
266
267 // Memory size for entries residing in the cache
268 size_t usage_;
269
270 // Memory size for entries residing only in the LRU list
271 size_t lru_usage_;
272
273 // mutex_ protects the following state.
274 // We don't count mutex_ as the cache's internal state so semantically we
275 // don't mind mutex_ invoking the non-const actions.
276 mutable port::Mutex mutex_;
7c673cae
FG
277};
278
279class LRUCache : public ShardedCache {
280 public:
281 LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
282 double high_pri_pool_ratio);
283 virtual ~LRUCache();
284 virtual const char* Name() const override { return "LRUCache"; }
285 virtual CacheShard* GetShard(int shard) override;
286 virtual const CacheShard* GetShard(int shard) const override;
287 virtual void* Value(Handle* handle) override;
288 virtual size_t GetCharge(Handle* handle) const override;
289 virtual uint32_t GetHash(Handle* handle) const override;
290 virtual void DisownData() override;
291
11fdf7f2
TL
292 // Retrieves number of elements in LRU, for unit test purpose only
293 size_t TEST_GetLRUSize();
294 // Retrives high pri pool ratio
295 double GetHighPriPoolRatio();
296
7c673cae 297 private:
11fdf7f2
TL
298 LRUCacheShard* shards_ = nullptr;
299 int num_shards_ = 0;
7c673cae
FG
300};
301
302} // namespace rocksdb