]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/cohort_lru.h
update sources to v12.1.1
[ceph.git] / ceph / src / common / cohort_lru.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Copyright (C) 2015 CohortFS, LLC.
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13 #ifndef COHORT_LRU_H
14 #define COHORT_LRU_H
15
16 #include <boost/intrusive/list.hpp>
17 #include <boost/intrusive/slist.hpp>
18
19 #include "common/likely.h"
20
21 #ifndef CACHE_LINE_SIZE
22 #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */
23 #endif
24 #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
25
26 namespace cohort {
27
28 namespace lru {
29
30 namespace bi = boost::intrusive;
31
32 /* public flag values */
33 constexpr uint32_t FLAG_NONE = 0x0000;
34 constexpr uint32_t FLAG_INITIAL = 0x0001;
35
36 enum class Edge : std::uint8_t
37 {
38 MRU = 0,
39 LRU
40 };
41
42 typedef bi::link_mode<bi::safe_link> link_mode;
43
44 class Object
45 {
46 private:
47 uint32_t lru_flags;
48 std::atomic<uint32_t> lru_refcnt;
49 std::atomic<uint32_t> lru_adj;
50 bi::list_member_hook< link_mode > lru_hook;
51
52 typedef bi::list<Object,
53 bi::member_hook<
54 Object, bi::list_member_hook< link_mode >,
55 &Object::lru_hook >,
56 bi::constant_time_size<true>> Queue;
57
58 bi::slist_member_hook< link_mode > q2_hook;
59
60 typedef bi::slist<Object,
61 bi::member_hook<
62 Object, bi::slist_member_hook< link_mode >,
63 &Object::q2_hook >,
64 bi::constant_time_size<true>> Queue2;
65
66 public:
67
68 Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {}
69
70 uint32_t get_refcnt() const { return lru_refcnt; }
71
72 virtual bool reclaim() = 0;
73
74 virtual ~Object() {}
75
76 private:
77 template <typename LK>
78 friend class LRU;
79
80 template <typename T, typename TTree, typename CLT, typename CEQ,
81 typename K, typename LK>
82 friend class TreeX;
83 };
84
85 /* allocator & recycler interface (create or re-use LRU objects) */
86 class ObjectFactory
87 {
88 public:
89 virtual Object* alloc(void) = 0;
90 virtual void recycle(Object*) = 0;
91 virtual ~ObjectFactory() {};
92 };
93
94 template <typename LK>
95 class LRU
96 {
97 private:
98
99 struct Lane {
100 LK lock;
101 Object::Queue q;
102 // Object::Queue pinned; /* placeholder for possible expansion */
103 CACHE_PAD(0);
104 Lane() {}
105 };
106
107 Lane *qlane;
108 int n_lanes;
109 std::atomic<uint32_t> evict_lane;
110 const uint32_t lane_hiwat;
111
112 static constexpr uint32_t lru_adj_modulus = 5;
113
114 static constexpr uint32_t SENTINEL_REFCNT = 1;
115
116 /* internal flag values */
117 static constexpr uint32_t FLAG_INLRU = 0x0001;
118 static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use
119 static constexpr uint32_t FLAG_EVICTING = 0x0004;
120
121 Lane& lane_of(void* addr) {
122 return qlane[(uint64_t)(addr) % n_lanes];
123 }
124
125 uint32_t next_evict_lane() {
126 return (evict_lane++ % n_lanes);
127 }
128
129 bool can_reclaim(Object* o) {
130 return ((o->lru_refcnt == SENTINEL_REFCNT) &&
131 (!(o->lru_flags & FLAG_EVICTING)));
132 }
133
134 Object* evict_block() {
135 uint32_t lane_ix = next_evict_lane();
136 for (int ix = 0; ix < n_lanes; ++ix,
137 lane_ix = next_evict_lane()) {
138 Lane& lane = qlane[lane_ix];
139 /* if object at LRU has refcnt==1, it may be reclaimable */
140 Object* o = &(lane.q.back());
141 if (can_reclaim(o)) {
142 ++(o->lru_refcnt);
143 o->lru_flags |= FLAG_EVICTING;
144 lane.lock.unlock();
145 if (o->reclaim()) {
146 lane.lock.lock();
147 --(o->lru_refcnt);
148 /* assertions that o state has not changed across
149 * relock */
150 assert(o->lru_refcnt == SENTINEL_REFCNT);
151 assert(o->lru_flags & FLAG_INLRU);
152 Object::Queue::iterator it =
153 Object::Queue::s_iterator_to(*o);
154 lane.q.erase(it);
155 lane.lock.unlock();
156 return o;
157 } else {
158 // XXX can't make unreachable (means what?)
159 lane.lock.lock();
160 --(o->lru_refcnt);
161 o->lru_flags &= ~FLAG_EVICTING;
162 /* unlock in next block */
163 }
164 } /* can_reclaim(o) */
165 lane.lock.unlock();
166 } /* each lane */
167 return nullptr;
168 } /* evict_block */
169
170 public:
171
172 LRU(int lanes, uint32_t _hiwat)
173 : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat)
174 {
175 assert(n_lanes > 0);
176 qlane = new Lane[n_lanes];
177 }
178
179 ~LRU() { delete[] qlane; }
180
181 bool ref(Object* o, uint32_t flags) {
182 ++(o->lru_refcnt);
183 if (flags & FLAG_INITIAL) {
184 if ((++(o->lru_adj) % lru_adj_modulus) == 0) {
185 Lane& lane = lane_of(o);
186 lane.lock.lock();
187 /* move to MRU */
188 Object::Queue::iterator it =
189 Object::Queue::s_iterator_to(*o);
190 lane.q.erase(it);
191 lane.q.push_front(*o);
192 lane.lock.unlock();
193 } /* adj */
194 } /* initial ref */
195 return true;
196 } /* ref */
197
198 void unref(Object* o, uint32_t flags) {
199 uint32_t refcnt = --(o->lru_refcnt);
200 Object* tdo = nullptr;
201 if (unlikely(refcnt == 0)) {
202 Lane& lane = lane_of(o);
203 lane.lock.lock();
204 refcnt = o->lru_refcnt.load();
205 if (unlikely(refcnt == 0)) {
206 Object::Queue::iterator it =
207 Object::Queue::s_iterator_to(*o);
208 lane.q.erase(it);
209 tdo = o;
210 }
211 lane.lock.unlock();
212 } else if (unlikely(refcnt == SENTINEL_REFCNT)) {
213 Lane& lane = lane_of(o);
214 lane.lock.lock();
215 refcnt = o->lru_refcnt.load();
216 if (likely(refcnt == SENTINEL_REFCNT)) {
217 /* move to LRU */
218 Object::Queue::iterator it =
219 Object::Queue::s_iterator_to(*o);
220 lane.q.erase(it);
221 /* hiwat check */
222 if (lane.q.size() > lane_hiwat) {
223 tdo = o;
224 } else {
225 lane.q.push_back(*o);
226 }
227 }
228 lane.lock.unlock();
229 }
230 /* unref out-of-line && !LOCKED */
231 if (tdo)
232 delete tdo;
233 } /* unref */
234
235 Object* insert(ObjectFactory* fac, Edge edge, uint32_t flags) {
236 /* use supplied functor to re-use an evicted object, or
237 * allocate a new one of the descendant type */
238 Object* o = evict_block();
239 if (o)
240 fac->recycle(o); /* recycle existing object */
241 else
242 o = fac->alloc(); /* get a new one */
243
244 o->lru_flags = FLAG_INLRU;
245
246 Lane& lane = lane_of(o);
247 lane.lock.lock();
248 switch (edge) {
249 case Edge::MRU:
250 lane.q.push_front(*o);
251 break;
252 case Edge::LRU:
253 lane.q.push_back(*o);
254 break;
255 default:
256 abort();
257 break;
258 }
259 if (flags & FLAG_INITIAL)
260 o->lru_refcnt += 2; /* sentinel ref + initial */
261 else
262 ++(o->lru_refcnt); /* sentinel */
263 lane.lock.unlock();
264 return o;
265 } /* insert */
266
267 };
268
269 template <typename T, typename TTree, typename CLT, typename CEQ,
270 typename K, typename LK>
271 class TreeX
272 {
273 public:
274
275 static constexpr uint32_t FLAG_NONE = 0x0000;
276 static constexpr uint32_t FLAG_LOCK = 0x0001;
277 static constexpr uint32_t FLAG_UNLOCK = 0x0002;
278 static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004;
279
280 typedef T value_type;
281 typedef TTree container_type;
282 typedef typename TTree::iterator iterator;
283 typedef std::pair<iterator, bool> check_result;
284 typedef typename TTree::insert_commit_data insert_commit_data;
285 int n_part;
286 int csz;
287
288 typedef std::unique_lock<LK> unique_lock;
289
290 struct Partition {
291 LK lock;
292 TTree tr;
293 T** cache;
294 int csz;
295 CACHE_PAD(0);
296
297 Partition() : tr(), cache(nullptr), csz(0) {}
298
299 ~Partition() {
300 if (csz)
301 ::operator delete(cache);
302 }
303 };
304
305 struct Latch {
306 Partition* p;
307 LK* lock;
308 insert_commit_data commit_data;
309
310 Latch() : p(nullptr), lock(nullptr) {}
311 };
312
313 Partition& partition_of_scalar(uint64_t x) {
314 return part[x % n_part];
315 }
316
317 Partition& get(uint8_t x) {
318 return part[x];
319 }
320
321 Partition*& get() {
322 return part;
323 }
324
325 void lock() {
326 std::for_each(locks.begin(), locks.end(),
327 [](LK* lk){ lk->lock(); });
328 }
329
330 void unlock() {
331 std::for_each(locks.begin(), locks.end(),
332 [](LK* lk){ lk->unlock(); });
333 }
334
335 TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) {
336 assert(n_part > 0);
337 part = new Partition[n_part];
338 for (int ix = 0; ix < n_part; ++ix) {
339 Partition& p = part[ix];
340 if (csz) {
341 p.csz = csz;
342 p.cache = (T**) ::operator new(csz * sizeof(T*));
343 memset(p.cache, 0, csz * sizeof(T*));
344 }
345 locks.push_back(&p.lock);
346 }
347 }
348
349 ~TreeX() {
350 delete[] part;
351 }
352
353 T* find(uint64_t hk, const K& k, uint32_t flags) {
354 T* v;
355 Latch lat;
356 uint32_t slot = 0;
357 lat.p = &(partition_of_scalar(hk));
358 if (flags & FLAG_LOCK) {
359 lat.lock = &lat.p->lock;
360 lat.lock->lock();
361 }
362 if (csz) { /* template specialize? */
363 slot = hk % csz;
364 v = lat.p->cache[slot];
365 if (v) {
366 if (CEQ()(*v, k)) {
367 if (flags & FLAG_LOCK)
368 lat.lock->unlock();
369 return v;
370 }
371 v = nullptr;
372 }
373 } else {
374 v = nullptr;
375 }
376 iterator it = lat.p->tr.find(k, CLT());
377 if (it != lat.p->tr.end()){
378 v = &(*(it));
379 if (csz) {
380 /* fill cache slot at hk */
381 lat.p->cache[slot] = v;
382 }
383 }
384 if (flags & FLAG_LOCK)
385 lat.lock->unlock();
386 return v;
387 } /* find */
388
389 T* find_latch(uint64_t hk, const K& k, Latch& lat,
390 uint32_t flags) {
391 uint32_t slot = 0;
392 T* v;
393 lat.p = &(partition_of_scalar(hk));
394 lat.lock = &lat.p->lock;
395 if (flags & FLAG_LOCK)
396 lat.lock->lock();
397 if (csz) { /* template specialize? */
398 slot = hk % csz;
399 v = lat.p->cache[slot];
400 if (v) {
401 if (CEQ()(*v, k)) {
402 if (flags & (FLAG_LOCK|FLAG_UNLOCK))
403 lat.lock->unlock();
404 return v;
405 }
406 v = nullptr;
407 }
408 } else {
409 v = nullptr;
410 }
411 check_result r = lat.p->tr.insert_unique_check(
412 k, CLT(), lat.commit_data);
413 if (! r.second /* !insertable (i.e., !found) */) {
414 v = &(*(r.first));
415 if (csz) {
416 /* fill cache slot at hk */
417 lat.p->cache[slot] = v;
418 }
419 }
420 if (flags & (FLAG_LOCK|FLAG_UNLOCK))
421 lat.lock->unlock();
422 return v;
423 } /* find_latch */
424
425 void insert_latched(T* v, Latch& lat, uint32_t flags) {
426 (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data);
427 if (flags & FLAG_UNLOCK)
428 lat.lock->unlock();
429 } /* insert_latched */
430
431 void insert(uint64_t hk, T* v, uint32_t flags) {
432 Partition& p = partition_of_scalar(hk);
433 if (flags & FLAG_LOCK)
434 p.lock.lock();
435 p.tr.insert_unique(*v);
436 if (flags & FLAG_LOCK)
437 p.lock.unlock();
438 } /* insert */
439
440 void remove(uint64_t hk, T* v, uint32_t flags) {
441 Partition& p = partition_of_scalar(hk);
442 iterator it = TTree::s_iterator_to(*v);
443 if (flags & FLAG_LOCK)
444 p.lock.lock();
445 p.tr.erase(it);
446 if (csz) { /* template specialize? */
447 uint32_t slot = hk % csz;
448 T* v2 = p.cache[slot];
449 /* we are intrusive, just compare addresses */
450 if (v == v2)
451 p.cache[slot] = nullptr;
452 }
453 if (flags & FLAG_LOCK)
454 p.lock.unlock();
455 } /* remove */
456
457 void drain(std::function<void(T*)> uref,
458 uint32_t flags = FLAG_NONE) {
459 /* clear a table, call supplied function on
460 * each element found (e.g., retuns sentinel
461 * references) */
462 Object::Queue2 drain_q;
463 for (int t_ix = 0; t_ix < n_part; ++t_ix) {
464 Partition& p = part[t_ix];
465 if (flags & FLAG_LOCK) /* LOCKED */
466 p.lock.lock();
467 while (p.tr.size() > 0) {
468 iterator it = p.tr.begin();
469 T* v = &(*it);
470 p.tr.erase(it);
471 drain_q.push_front(*v);
472 }
473 if (flags & FLAG_LOCK) /* we locked it, !LOCKED */
474 p.lock.unlock();
475 } /* each partition */
476 /* unref out-of-line && !LOCKED */
477 while (drain_q.size() > 0) {
478 Object::Queue2::iterator it = drain_q.begin();
479 T* v = static_cast<T*>(&(*it));
480 drain_q.erase(it); /* must precede uref(v) in safe_link mode */
481 uref(v);
482 }
483 } /* drain */
484
485 private:
486 Partition *part;
487 std::vector<LK*> locks;
488 };
489
490 } /* namespace LRU */
491 } /* namespace cohort */
492
493 #endif /* COHORT_LRU_H */