]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/cohort_lru.h
update sources to v12.2.3
[ceph.git] / ceph / src / common / cohort_lru.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Copyright (C) 2015 CohortFS, LLC.
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13 #ifndef COHORT_LRU_H
14 #define COHORT_LRU_H
15
16 #include <boost/intrusive/list.hpp>
17 #include <boost/intrusive/slist.hpp>
18
19 #include "common/likely.h"
20
21 #ifndef CACHE_LINE_SIZE
22 #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */
23 #endif
24 #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
25
26 namespace cohort {
27
28 namespace lru {
29
30 namespace bi = boost::intrusive;
31
32 /* public flag values */
33 constexpr uint32_t FLAG_NONE = 0x0000;
34 constexpr uint32_t FLAG_INITIAL = 0x0001;
35 constexpr uint32_t FLAG_RECYCLE = 0x0002;
36
37 enum class Edge : std::uint8_t
38 {
39 MRU = 0,
40 LRU
41 };
42
43 typedef bi::link_mode<bi::safe_link> link_mode;
44
45 class Object
46 {
47 private:
48 uint32_t lru_flags;
49 std::atomic<uint32_t> lru_refcnt;
50 std::atomic<uint32_t> lru_adj;
51 bi::list_member_hook< link_mode > lru_hook;
52
53 typedef bi::list<Object,
54 bi::member_hook<
55 Object, bi::list_member_hook< link_mode >,
56 &Object::lru_hook >,
57 bi::constant_time_size<true>> Queue;
58
59 bi::slist_member_hook< link_mode > q2_hook;
60
61 typedef bi::slist<Object,
62 bi::member_hook<
63 Object, bi::slist_member_hook< link_mode >,
64 &Object::q2_hook >,
65 bi::constant_time_size<true>> Queue2;
66
67 public:
68
69 Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {}
70
71 uint32_t get_refcnt() const { return lru_refcnt; }
72
73 virtual bool reclaim() = 0;
74
75 virtual ~Object() {}
76
77 private:
78 template <typename LK>
79 friend class LRU;
80
81 template <typename T, typename TTree, typename CLT, typename CEQ,
82 typename K, typename LK>
83 friend class TreeX;
84 };
85
86 /* allocator & recycler interface (create or re-use LRU objects) */
87 class ObjectFactory
88 {
89 public:
90 virtual Object* alloc(void) = 0;
91 virtual void recycle(Object*) = 0;
92 virtual ~ObjectFactory() {};
93 };
94
95 template <typename LK>
96 class LRU
97 {
98 private:
99
100 struct Lane {
101 LK lock;
102 Object::Queue q;
103 // Object::Queue pinned; /* placeholder for possible expansion */
104 CACHE_PAD(0);
105 Lane() {}
106 };
107
108 Lane *qlane;
109 int n_lanes;
110 std::atomic<uint32_t> evict_lane;
111 const uint32_t lane_hiwat;
112
113 static constexpr uint32_t lru_adj_modulus = 5;
114
115 static constexpr uint32_t SENTINEL_REFCNT = 1;
116
117 /* internal flag values */
118 static constexpr uint32_t FLAG_INLRU = 0x0001;
119 static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use
120 static constexpr uint32_t FLAG_EVICTING = 0x0004;
121
122 Lane& lane_of(void* addr) {
123 return qlane[(uint64_t)(addr) % n_lanes];
124 }
125
126 uint32_t next_evict_lane() {
127 return (evict_lane++ % n_lanes);
128 }
129
130 bool can_reclaim(Object* o) {
131 return ((o->lru_refcnt == SENTINEL_REFCNT) &&
132 (!(o->lru_flags & FLAG_EVICTING)));
133 }
134
135 Object* evict_block() {
136 uint32_t lane_ix = next_evict_lane();
137 for (int ix = 0; ix < n_lanes; ++ix,
138 lane_ix = next_evict_lane()) {
139 Lane& lane = qlane[lane_ix];
140 lane.lock.lock();
141 /* if object at LRU has refcnt==1, it may be reclaimable */
142 Object* o = &(lane.q.back());
143 if (can_reclaim(o)) {
144 ++(o->lru_refcnt);
145 o->lru_flags |= FLAG_EVICTING;
146 lane.lock.unlock();
147 if (o->reclaim()) {
148 lane.lock.lock();
149 --(o->lru_refcnt);
150 /* assertions that o state has not changed across
151 * relock */
152 assert(o->lru_refcnt == SENTINEL_REFCNT);
153 assert(o->lru_flags & FLAG_INLRU);
154 Object::Queue::iterator it =
155 Object::Queue::s_iterator_to(*o);
156 lane.q.erase(it);
157 lane.lock.unlock();
158 return o;
159 } else {
160 // XXX can't make unreachable (means what?)
161 --(o->lru_refcnt);
162 o->lru_flags &= ~FLAG_EVICTING;
163 /* unlock in next block */
164 }
165 } /* can_reclaim(o) */
166 lane.lock.unlock();
167 } /* each lane */
168 return nullptr;
169 } /* evict_block */
170
171 public:
172
173 LRU(int lanes, uint32_t _hiwat)
174 : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat)
175 {
176 assert(n_lanes > 0);
177 qlane = new Lane[n_lanes];
178 }
179
180 ~LRU() { delete[] qlane; }
181
182 bool ref(Object* o, uint32_t flags) {
183 ++(o->lru_refcnt);
184 if (flags & FLAG_INITIAL) {
185 if ((++(o->lru_adj) % lru_adj_modulus) == 0) {
186 Lane& lane = lane_of(o);
187 lane.lock.lock();
188 /* move to MRU */
189 Object::Queue::iterator it =
190 Object::Queue::s_iterator_to(*o);
191 lane.q.erase(it);
192 lane.q.push_front(*o);
193 lane.lock.unlock();
194 } /* adj */
195 } /* initial ref */
196 return true;
197 } /* ref */
198
199 void unref(Object* o, uint32_t flags) {
200 uint32_t refcnt = --(o->lru_refcnt);
201 Object* tdo = nullptr;
202 if (unlikely(refcnt == 0)) {
203 Lane& lane = lane_of(o);
204 lane.lock.lock();
205 refcnt = o->lru_refcnt.load();
206 if (unlikely(refcnt == 0)) {
207 Object::Queue::iterator it =
208 Object::Queue::s_iterator_to(*o);
209 lane.q.erase(it);
210 tdo = o;
211 }
212 lane.lock.unlock();
213 } else if (unlikely(refcnt == SENTINEL_REFCNT)) {
214 Lane& lane = lane_of(o);
215 lane.lock.lock();
216 refcnt = o->lru_refcnt.load();
217 if (likely(refcnt == SENTINEL_REFCNT)) {
218 /* move to LRU */
219 Object::Queue::iterator it =
220 Object::Queue::s_iterator_to(*o);
221 lane.q.erase(it);
222 /* hiwat check */
223 if (lane.q.size() > lane_hiwat) {
224 tdo = o;
225 } else {
226 lane.q.push_back(*o);
227 }
228 }
229 lane.lock.unlock();
230 }
231 /* unref out-of-line && !LOCKED */
232 if (tdo)
233 delete tdo;
234 } /* unref */
235
236 Object* insert(ObjectFactory* fac, Edge edge, uint32_t& flags) {
237 /* use supplied functor to re-use an evicted object, or
238 * allocate a new one of the descendant type */
239 Object* o = evict_block();
240 if (o) {
241 fac->recycle(o); /* recycle existing object */
242 flags |= FLAG_RECYCLE;
243 }
244 else
245 o = fac->alloc(); /* get a new one */
246
247 o->lru_flags = FLAG_INLRU;
248
249 Lane& lane = lane_of(o);
250 lane.lock.lock();
251 switch (edge) {
252 case Edge::MRU:
253 lane.q.push_front(*o);
254 break;
255 case Edge::LRU:
256 lane.q.push_back(*o);
257 break;
258 default:
259 abort();
260 break;
261 }
262 if (flags & FLAG_INITIAL)
263 o->lru_refcnt += 2; /* sentinel ref + initial */
264 else
265 ++(o->lru_refcnt); /* sentinel */
266 lane.lock.unlock();
267 return o;
268 } /* insert */
269
270 };
271
272 template <typename T, typename TTree, typename CLT, typename CEQ,
273 typename K, typename LK>
274 class TreeX
275 {
276 public:
277
278 static constexpr uint32_t FLAG_NONE = 0x0000;
279 static constexpr uint32_t FLAG_LOCK = 0x0001;
280 static constexpr uint32_t FLAG_UNLOCK = 0x0002;
281 static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004;
282
283 typedef T value_type;
284 typedef TTree container_type;
285 typedef typename TTree::iterator iterator;
286 typedef std::pair<iterator, bool> check_result;
287 typedef typename TTree::insert_commit_data insert_commit_data;
288 int n_part;
289 int csz;
290
291 typedef std::unique_lock<LK> unique_lock;
292
293 struct Partition {
294 LK lock;
295 TTree tr;
296 T** cache;
297 int csz;
298 CACHE_PAD(0);
299
300 Partition() : tr(), cache(nullptr), csz(0) {}
301
302 ~Partition() {
303 if (csz)
304 ::operator delete(cache);
305 }
306 };
307
308 struct Latch {
309 Partition* p;
310 LK* lock;
311 insert_commit_data commit_data;
312
313 Latch() : p(nullptr), lock(nullptr) {}
314 };
315
316 Partition& partition_of_scalar(uint64_t x) {
317 return part[x % n_part];
318 }
319
320 Partition& get(uint8_t x) {
321 return part[x];
322 }
323
324 Partition*& get() {
325 return part;
326 }
327
328 void lock() {
329 std::for_each(locks.begin(), locks.end(),
330 [](LK* lk){ lk->lock(); });
331 }
332
333 void unlock() {
334 std::for_each(locks.begin(), locks.end(),
335 [](LK* lk){ lk->unlock(); });
336 }
337
338 TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) {
339 assert(n_part > 0);
340 part = new Partition[n_part];
341 for (int ix = 0; ix < n_part; ++ix) {
342 Partition& p = part[ix];
343 if (csz) {
344 p.csz = csz;
345 p.cache = (T**) ::operator new(csz * sizeof(T*));
346 memset(p.cache, 0, csz * sizeof(T*));
347 }
348 locks.push_back(&p.lock);
349 }
350 }
351
352 ~TreeX() {
353 delete[] part;
354 }
355
356 T* find(uint64_t hk, const K& k, uint32_t flags) {
357 T* v;
358 Latch lat;
359 uint32_t slot = 0;
360 lat.p = &(partition_of_scalar(hk));
361 if (flags & FLAG_LOCK) {
362 lat.lock = &lat.p->lock;
363 lat.lock->lock();
364 }
365 if (csz) { /* template specialize? */
366 slot = hk % csz;
367 v = lat.p->cache[slot];
368 if (v) {
369 if (CEQ()(*v, k)) {
370 if (flags & FLAG_LOCK)
371 lat.lock->unlock();
372 return v;
373 }
374 v = nullptr;
375 }
376 } else {
377 v = nullptr;
378 }
379 iterator it = lat.p->tr.find(k, CLT());
380 if (it != lat.p->tr.end()){
381 v = &(*(it));
382 if (csz) {
383 /* fill cache slot at hk */
384 lat.p->cache[slot] = v;
385 }
386 }
387 if (flags & FLAG_LOCK)
388 lat.lock->unlock();
389 return v;
390 } /* find */
391
392 T* find_latch(uint64_t hk, const K& k, Latch& lat,
393 uint32_t flags) {
394 uint32_t slot = 0;
395 T* v;
396 lat.p = &(partition_of_scalar(hk));
397 lat.lock = &lat.p->lock;
398 if (flags & FLAG_LOCK)
399 lat.lock->lock();
400 if (csz) { /* template specialize? */
401 slot = hk % csz;
402 v = lat.p->cache[slot];
403 if (v) {
404 if (CEQ()(*v, k)) {
405 if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK))
406 lat.lock->unlock();
407 return v;
408 }
409 v = nullptr;
410 }
411 } else {
412 v = nullptr;
413 }
414 check_result r = lat.p->tr.insert_unique_check(
415 k, CLT(), lat.commit_data);
416 if (! r.second /* !insertable (i.e., !found) */) {
417 v = &(*(r.first));
418 if (csz) {
419 /* fill cache slot at hk */
420 lat.p->cache[slot] = v;
421 }
422 }
423 if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK))
424 lat.lock->unlock();
425 return v;
426 } /* find_latch */
427
428 void insert_latched(T* v, Latch& lat, uint32_t flags) {
429 (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data);
430 if (flags & FLAG_UNLOCK)
431 lat.lock->unlock();
432 } /* insert_latched */
433
434 void insert(uint64_t hk, T* v, uint32_t flags) {
435 Partition& p = partition_of_scalar(hk);
436 if (flags & FLAG_LOCK)
437 p.lock.lock();
438 p.tr.insert_unique(*v);
439 if (flags & FLAG_LOCK)
440 p.lock.unlock();
441 } /* insert */
442
443 void remove(uint64_t hk, T* v, uint32_t flags) {
444 Partition& p = partition_of_scalar(hk);
445 iterator it = TTree::s_iterator_to(*v);
446 if (flags & FLAG_LOCK)
447 p.lock.lock();
448 p.tr.erase(it);
449 if (csz) { /* template specialize? */
450 uint32_t slot = hk % csz;
451 T* v2 = p.cache[slot];
452 /* we are intrusive, just compare addresses */
453 if (v == v2)
454 p.cache[slot] = nullptr;
455 }
456 if (flags & FLAG_LOCK)
457 p.lock.unlock();
458 } /* remove */
459
460 void drain(std::function<void(T*)> uref,
461 uint32_t flags = FLAG_NONE) {
462 /* clear a table, call supplied function on
463 * each element found (e.g., retuns sentinel
464 * references) */
465 Object::Queue2 drain_q;
466 for (int t_ix = 0; t_ix < n_part; ++t_ix) {
467 Partition& p = part[t_ix];
468 if (flags & FLAG_LOCK) /* LOCKED */
469 p.lock.lock();
470 while (p.tr.size() > 0) {
471 iterator it = p.tr.begin();
472 T* v = &(*it);
473 p.tr.erase(it);
474 drain_q.push_front(*v);
475 }
476 if (flags & FLAG_LOCK) /* we locked it, !LOCKED */
477 p.lock.unlock();
478 } /* each partition */
479 /* unref out-of-line && !LOCKED */
480 while (drain_q.size() > 0) {
481 Object::Queue2::iterator it = drain_q.begin();
482 T* v = static_cast<T*>(&(*it));
483 drain_q.erase(it); /* must precede uref(v) in safe_link mode */
484 uref(v);
485 }
486 } /* drain */
487
488 private:
489 Partition *part;
490 std::vector<LK*> locks;
491 };
492
493 } /* namespace LRU */
494 } /* namespace cohort */
495
496 #endif /* COHORT_LRU_H */