]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/cohort_lru.h
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / common / cohort_lru.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Copyright (C) 2015 CohortFS, LLC.
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13#ifndef COHORT_LRU_H
14#define COHORT_LRU_H
15
7c673cae 16#include <boost/intrusive/list.hpp>
224ce89b 17#include <boost/intrusive/slist.hpp>
31f18b77 18
7c673cae
FG
19#include "common/likely.h"
20
21#ifndef CACHE_LINE_SIZE
22#define CACHE_LINE_SIZE 64 /* XXX arch-specific define */
23#endif
24#define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
25
26namespace cohort {
27
28 namespace lru {
29
30 namespace bi = boost::intrusive;
31
32 /* public flag values */
33 constexpr uint32_t FLAG_NONE = 0x0000;
34 constexpr uint32_t FLAG_INITIAL = 0x0001;
b32b8144 35 constexpr uint32_t FLAG_RECYCLE = 0x0002;
7c673cae
FG
36
37 enum class Edge : std::uint8_t
38 {
39 MRU = 0,
40 LRU
41 };
42
224ce89b 43 typedef bi::link_mode<bi::safe_link> link_mode;
7c673cae 44
f91f0fd5
TL
45 class ObjectFactory; // Forward declaration
46
7c673cae
FG
47 class Object
48 {
49 private:
50 uint32_t lru_flags;
51 std::atomic<uint32_t> lru_refcnt;
52 std::atomic<uint32_t> lru_adj;
53 bi::list_member_hook< link_mode > lru_hook;
54
55 typedef bi::list<Object,
56 bi::member_hook<
57 Object, bi::list_member_hook< link_mode >,
58 &Object::lru_hook >,
59 bi::constant_time_size<true>> Queue;
60
224ce89b
WB
61 bi::slist_member_hook< link_mode > q2_hook;
62
63 typedef bi::slist<Object,
64 bi::member_hook<
65 Object, bi::slist_member_hook< link_mode >,
66 &Object::q2_hook >,
67 bi::constant_time_size<true>> Queue2;
68
7c673cae
FG
69 public:
70
71 Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {}
72
73 uint32_t get_refcnt() const { return lru_refcnt; }
74
f91f0fd5 75 virtual bool reclaim(const ObjectFactory* newobj_fac) = 0;
7c673cae
FG
76
77 virtual ~Object() {}
78
79 private:
80 template <typename LK>
81 friend class LRU;
224ce89b
WB
82
83 template <typename T, typename TTree, typename CLT, typename CEQ,
84 typename K, typename LK>
85 friend class TreeX;
7c673cae
FG
86 };
87
88 /* allocator & recycler interface (create or re-use LRU objects) */
89 class ObjectFactory
90 {
91 public:
92 virtual Object* alloc(void) = 0;
93 virtual void recycle(Object*) = 0;
94 virtual ~ObjectFactory() {};
95 };
96
97 template <typename LK>
98 class LRU
99 {
100 private:
101
102 struct Lane {
103 LK lock;
104 Object::Queue q;
105 // Object::Queue pinned; /* placeholder for possible expansion */
106 CACHE_PAD(0);
107 Lane() {}
108 };
109
110 Lane *qlane;
111 int n_lanes;
112 std::atomic<uint32_t> evict_lane;
113 const uint32_t lane_hiwat;
114
115 static constexpr uint32_t lru_adj_modulus = 5;
116
117 static constexpr uint32_t SENTINEL_REFCNT = 1;
118
119 /* internal flag values */
120 static constexpr uint32_t FLAG_INLRU = 0x0001;
121 static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use
122 static constexpr uint32_t FLAG_EVICTING = 0x0004;
123
124 Lane& lane_of(void* addr) {
125 return qlane[(uint64_t)(addr) % n_lanes];
126 }
127
128 uint32_t next_evict_lane() {
129 return (evict_lane++ % n_lanes);
130 }
131
132 bool can_reclaim(Object* o) {
133 return ((o->lru_refcnt == SENTINEL_REFCNT) &&
134 (!(o->lru_flags & FLAG_EVICTING)));
135 }
136
f91f0fd5 137 Object* evict_block(const ObjectFactory* newobj_fac) {
7c673cae
FG
138 uint32_t lane_ix = next_evict_lane();
139 for (int ix = 0; ix < n_lanes; ++ix,
140 lane_ix = next_evict_lane()) {
141 Lane& lane = qlane[lane_ix];
20effc67 142 std::unique_lock lane_lock{lane.lock};
7c673cae
FG
143 /* if object at LRU has refcnt==1, it may be reclaimable */
144 Object* o = &(lane.q.back());
7c673cae
FG
145 if (can_reclaim(o)) {
146 ++(o->lru_refcnt);
147 o->lru_flags |= FLAG_EVICTING;
20effc67 148 lane_lock.unlock();
f91f0fd5 149 if (o->reclaim(newobj_fac)) {
20effc67 150 lane_lock.lock();
7c673cae
FG
151 --(o->lru_refcnt);
152 /* assertions that o state has not changed across
153 * relock */
11fdf7f2
TL
154 ceph_assert(o->lru_refcnt == SENTINEL_REFCNT);
155 ceph_assert(o->lru_flags & FLAG_INLRU);
7c673cae
FG
156 Object::Queue::iterator it =
157 Object::Queue::s_iterator_to(*o);
158 lane.q.erase(it);
7c673cae
FG
159 return o;
160 } else {
7c673cae
FG
161 --(o->lru_refcnt);
162 o->lru_flags &= ~FLAG_EVICTING;
163 /* unlock in next block */
164 }
165 } /* can_reclaim(o) */
7c673cae
FG
166 } /* each lane */
167 return nullptr;
168 } /* evict_block */
169
170 public:
171
172 LRU(int lanes, uint32_t _hiwat)
173 : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat)
174 {
11fdf7f2 175 ceph_assert(n_lanes > 0);
7c673cae
FG
176 qlane = new Lane[n_lanes];
177 }
178
179 ~LRU() { delete[] qlane; }
180
181 bool ref(Object* o, uint32_t flags) {
182 ++(o->lru_refcnt);
183 if (flags & FLAG_INITIAL) {
184 if ((++(o->lru_adj) % lru_adj_modulus) == 0) {
185 Lane& lane = lane_of(o);
186 lane.lock.lock();
187 /* move to MRU */
188 Object::Queue::iterator it =
189 Object::Queue::s_iterator_to(*o);
190 lane.q.erase(it);
191 lane.q.push_front(*o);
192 lane.lock.unlock();
193 } /* adj */
194 } /* initial ref */
195 return true;
196 } /* ref */
197
198 void unref(Object* o, uint32_t flags) {
199 uint32_t refcnt = --(o->lru_refcnt);
224ce89b 200 Object* tdo = nullptr;
7c673cae
FG
201 if (unlikely(refcnt == 0)) {
202 Lane& lane = lane_of(o);
203 lane.lock.lock();
204 refcnt = o->lru_refcnt.load();
205 if (unlikely(refcnt == 0)) {
206 Object::Queue::iterator it =
207 Object::Queue::s_iterator_to(*o);
208 lane.q.erase(it);
224ce89b 209 tdo = o;
7c673cae
FG
210 }
211 lane.lock.unlock();
212 } else if (unlikely(refcnt == SENTINEL_REFCNT)) {
213 Lane& lane = lane_of(o);
214 lane.lock.lock();
215 refcnt = o->lru_refcnt.load();
216 if (likely(refcnt == SENTINEL_REFCNT)) {
217 /* move to LRU */
218 Object::Queue::iterator it =
219 Object::Queue::s_iterator_to(*o);
220 lane.q.erase(it);
221 /* hiwat check */
222 if (lane.q.size() > lane_hiwat) {
224ce89b 223 tdo = o;
7c673cae
FG
224 } else {
225 lane.q.push_back(*o);
226 }
227 }
228 lane.lock.unlock();
229 }
224ce89b
WB
230 /* unref out-of-line && !LOCKED */
231 if (tdo)
232 delete tdo;
7c673cae
FG
233 } /* unref */
234
b32b8144 235 Object* insert(ObjectFactory* fac, Edge edge, uint32_t& flags) {
7c673cae
FG
236 /* use supplied functor to re-use an evicted object, or
237 * allocate a new one of the descendant type */
f91f0fd5 238 Object* o = evict_block(fac);
b32b8144 239 if (o) {
7c673cae 240 fac->recycle(o); /* recycle existing object */
b32b8144
FG
241 flags |= FLAG_RECYCLE;
242 }
7c673cae
FG
243 else
244 o = fac->alloc(); /* get a new one */
245
246 o->lru_flags = FLAG_INLRU;
247
248 Lane& lane = lane_of(o);
249 lane.lock.lock();
250 switch (edge) {
251 case Edge::MRU:
252 lane.q.push_front(*o);
253 break;
254 case Edge::LRU:
255 lane.q.push_back(*o);
256 break;
257 default:
11fdf7f2 258 ceph_abort();
7c673cae
FG
259 break;
260 }
261 if (flags & FLAG_INITIAL)
262 o->lru_refcnt += 2; /* sentinel ref + initial */
263 else
264 ++(o->lru_refcnt); /* sentinel */
265 lane.lock.unlock();
266 return o;
267 } /* insert */
268
269 };
270
271 template <typename T, typename TTree, typename CLT, typename CEQ,
272 typename K, typename LK>
273 class TreeX
274 {
275 public:
276
277 static constexpr uint32_t FLAG_NONE = 0x0000;
278 static constexpr uint32_t FLAG_LOCK = 0x0001;
279 static constexpr uint32_t FLAG_UNLOCK = 0x0002;
280 static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004;
281
282 typedef T value_type;
283 typedef TTree container_type;
284 typedef typename TTree::iterator iterator;
285 typedef std::pair<iterator, bool> check_result;
286 typedef typename TTree::insert_commit_data insert_commit_data;
287 int n_part;
288 int csz;
289
290 typedef std::unique_lock<LK> unique_lock;
291
292 struct Partition {
293 LK lock;
294 TTree tr;
295 T** cache;
296 int csz;
297 CACHE_PAD(0);
298
299 Partition() : tr(), cache(nullptr), csz(0) {}
300
301 ~Partition() {
302 if (csz)
303 ::operator delete(cache);
304 }
305 };
306
307 struct Latch {
308 Partition* p;
309 LK* lock;
11fdf7f2 310 insert_commit_data commit_data{};
7c673cae
FG
311
312 Latch() : p(nullptr), lock(nullptr) {}
313 };
314
315 Partition& partition_of_scalar(uint64_t x) {
316 return part[x % n_part];
317 }
318
319 Partition& get(uint8_t x) {
320 return part[x];
321 }
322
323 Partition*& get() {
324 return part;
325 }
326
327 void lock() {
328 std::for_each(locks.begin(), locks.end(),
329 [](LK* lk){ lk->lock(); });
330 }
331
332 void unlock() {
333 std::for_each(locks.begin(), locks.end(),
334 [](LK* lk){ lk->unlock(); });
335 }
336
337 TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) {
11fdf7f2 338 ceph_assert(n_part > 0);
7c673cae
FG
339 part = new Partition[n_part];
340 for (int ix = 0; ix < n_part; ++ix) {
341 Partition& p = part[ix];
342 if (csz) {
343 p.csz = csz;
344 p.cache = (T**) ::operator new(csz * sizeof(T*));
92f5a8d4 345 // FIPS zeroization audit 20191115: this memset is not security related.
7c673cae
FG
346 memset(p.cache, 0, csz * sizeof(T*));
347 }
348 locks.push_back(&p.lock);
349 }
350 }
351
352 ~TreeX() {
353 delete[] part;
354 }
355
356 T* find(uint64_t hk, const K& k, uint32_t flags) {
357 T* v;
358 Latch lat;
359 uint32_t slot = 0;
360 lat.p = &(partition_of_scalar(hk));
361 if (flags & FLAG_LOCK) {
362 lat.lock = &lat.p->lock;
363 lat.lock->lock();
364 }
365 if (csz) { /* template specialize? */
366 slot = hk % csz;
367 v = lat.p->cache[slot];
368 if (v) {
369 if (CEQ()(*v, k)) {
370 if (flags & FLAG_LOCK)
371 lat.lock->unlock();
372 return v;
373 }
374 v = nullptr;
375 }
376 } else {
377 v = nullptr;
378 }
379 iterator it = lat.p->tr.find(k, CLT());
380 if (it != lat.p->tr.end()){
381 v = &(*(it));
382 if (csz) {
383 /* fill cache slot at hk */
384 lat.p->cache[slot] = v;
385 }
386 }
387 if (flags & FLAG_LOCK)
388 lat.lock->unlock();
389 return v;
390 } /* find */
391
392 T* find_latch(uint64_t hk, const K& k, Latch& lat,
393 uint32_t flags) {
394 uint32_t slot = 0;
395 T* v;
396 lat.p = &(partition_of_scalar(hk));
397 lat.lock = &lat.p->lock;
398 if (flags & FLAG_LOCK)
399 lat.lock->lock();
400 if (csz) { /* template specialize? */
401 slot = hk % csz;
402 v = lat.p->cache[slot];
403 if (v) {
404 if (CEQ()(*v, k)) {
c07f9fc5 405 if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK))
7c673cae
FG
406 lat.lock->unlock();
407 return v;
408 }
409 v = nullptr;
410 }
411 } else {
412 v = nullptr;
413 }
414 check_result r = lat.p->tr.insert_unique_check(
415 k, CLT(), lat.commit_data);
416 if (! r.second /* !insertable (i.e., !found) */) {
417 v = &(*(r.first));
418 if (csz) {
419 /* fill cache slot at hk */
420 lat.p->cache[slot] = v;
421 }
422 }
c07f9fc5 423 if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK))
7c673cae
FG
424 lat.lock->unlock();
425 return v;
426 } /* find_latch */
f91f0fd5
TL
427 bool is_same_partition(uint64_t lhs, uint64_t rhs) {
428 return ((lhs % n_part) == (rhs % n_part));
429 }
7c673cae
FG
430 void insert_latched(T* v, Latch& lat, uint32_t flags) {
431 (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data);
432 if (flags & FLAG_UNLOCK)
433 lat.lock->unlock();
434 } /* insert_latched */
435
436 void insert(uint64_t hk, T* v, uint32_t flags) {
437 Partition& p = partition_of_scalar(hk);
438 if (flags & FLAG_LOCK)
439 p.lock.lock();
440 p.tr.insert_unique(*v);
441 if (flags & FLAG_LOCK)
442 p.lock.unlock();
443 } /* insert */
444
445 void remove(uint64_t hk, T* v, uint32_t flags) {
446 Partition& p = partition_of_scalar(hk);
447 iterator it = TTree::s_iterator_to(*v);
448 if (flags & FLAG_LOCK)
449 p.lock.lock();
450 p.tr.erase(it);
451 if (csz) { /* template specialize? */
452 uint32_t slot = hk % csz;
453 T* v2 = p.cache[slot];
454 /* we are intrusive, just compare addresses */
455 if (v == v2)
456 p.cache[slot] = nullptr;
457 }
458 if (flags & FLAG_LOCK)
459 p.lock.unlock();
460 } /* remove */
461
462 void drain(std::function<void(T*)> uref,
463 uint32_t flags = FLAG_NONE) {
464 /* clear a table, call supplied function on
11fdf7f2 465 * each element found (e.g., returns sentinel
7c673cae 466 * references) */
224ce89b 467 Object::Queue2 drain_q;
7c673cae
FG
468 for (int t_ix = 0; t_ix < n_part; ++t_ix) {
469 Partition& p = part[t_ix];
470 if (flags & FLAG_LOCK) /* LOCKED */
471 p.lock.lock();
472 while (p.tr.size() > 0) {
473 iterator it = p.tr.begin();
474 T* v = &(*it);
224ce89b
WB
475 p.tr.erase(it);
476 drain_q.push_front(*v);
7c673cae
FG
477 }
478 if (flags & FLAG_LOCK) /* we locked it, !LOCKED */
479 p.lock.unlock();
480 } /* each partition */
224ce89b
WB
481 /* unref out-of-line && !LOCKED */
482 while (drain_q.size() > 0) {
483 Object::Queue2::iterator it = drain_q.begin();
484 T* v = static_cast<T*>(&(*it));
485 drain_q.erase(it); /* must precede uref(v) in safe_link mode */
486 uref(v);
487 }
7c673cae
FG
488 } /* drain */
489
490 private:
491 Partition *part;
492 std::vector<LK*> locks;
493 };
494
495 } /* namespace LRU */
496} /* namespace cohort */
497
498#endif /* COHORT_LRU_H */