]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/cohort_lru.h
13c760f76f2e69814b75b7c274e3ede3fd9c44d1
[ceph.git] / ceph / src / common / cohort_lru.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Copyright (C) 2015 CohortFS, LLC.
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13 #ifndef COHORT_LRU_H
14 #define COHORT_LRU_H
15
16 #include <boost/intrusive/list.hpp>
17
18 #include "common/likely.h"
19
20 #ifndef CACHE_LINE_SIZE
21 #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */
22 #endif
23 #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
24
25 namespace cohort {
26
27 namespace lru {
28
29 namespace bi = boost::intrusive;
30
31 /* public flag values */
32 constexpr uint32_t FLAG_NONE = 0x0000;
33 constexpr uint32_t FLAG_INITIAL = 0x0001;
34
35 enum class Edge : std::uint8_t
36 {
37 MRU = 0,
38 LRU
39 };
40
41 typedef bi::link_mode<bi::safe_link> link_mode; // for debugging
42
43 class Object
44 {
45 private:
46 uint32_t lru_flags;
47 std::atomic<uint32_t> lru_refcnt;
48 std::atomic<uint32_t> lru_adj;
49 bi::list_member_hook< link_mode > lru_hook;
50
51 typedef bi::list<Object,
52 bi::member_hook<
53 Object, bi::list_member_hook< link_mode >,
54 &Object::lru_hook >,
55 bi::constant_time_size<true>> Queue;
56
57 public:
58
59 Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {}
60
61 uint32_t get_refcnt() const { return lru_refcnt; }
62
63 virtual bool reclaim() = 0;
64
65 virtual ~Object() {}
66
67 private:
68 template <typename LK>
69 friend class LRU;
70 };
71
72 /* allocator & recycler interface (create or re-use LRU objects) */
73 class ObjectFactory
74 {
75 public:
76 virtual Object* alloc(void) = 0;
77 virtual void recycle(Object*) = 0;
78 virtual ~ObjectFactory() {};
79 };
80
81 template <typename LK>
82 class LRU
83 {
84 private:
85
86 struct Lane {
87 LK lock;
88 Object::Queue q;
89 // Object::Queue pinned; /* placeholder for possible expansion */
90 CACHE_PAD(0);
91 Lane() {}
92 };
93
94 Lane *qlane;
95 int n_lanes;
96 std::atomic<uint32_t> evict_lane;
97 const uint32_t lane_hiwat;
98
99 static constexpr uint32_t lru_adj_modulus = 5;
100
101 static constexpr uint32_t SENTINEL_REFCNT = 1;
102
103 /* internal flag values */
104 static constexpr uint32_t FLAG_INLRU = 0x0001;
105 static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use
106 static constexpr uint32_t FLAG_EVICTING = 0x0004;
107
108 Lane& lane_of(void* addr) {
109 return qlane[(uint64_t)(addr) % n_lanes];
110 }
111
112 uint32_t next_evict_lane() {
113 return (evict_lane++ % n_lanes);
114 }
115
116 bool can_reclaim(Object* o) {
117 return ((o->lru_refcnt == SENTINEL_REFCNT) &&
118 (!(o->lru_flags & FLAG_EVICTING)));
119 }
120
121 Object* evict_block() {
122 uint32_t lane_ix = next_evict_lane();
123 for (int ix = 0; ix < n_lanes; ++ix,
124 lane_ix = next_evict_lane()) {
125 Lane& lane = qlane[lane_ix];
126 /* if object at LRU has refcnt==1, it may be reclaimable */
127 Object* o = &(lane.q.back());
128 #if 0 /* XXX save for refactor */
129 std::cout << __func__
130 << " " << o
131 << " refcnt: " << o->lru_refcnt
132 << std::endl;
133 #endif
134 if (can_reclaim(o)) {
135 ++(o->lru_refcnt);
136 o->lru_flags |= FLAG_EVICTING;
137 lane.lock.unlock();
138 if (o->reclaim()) {
139 lane.lock.lock();
140 --(o->lru_refcnt);
141 /* assertions that o state has not changed across
142 * relock */
143 assert(o->lru_refcnt == SENTINEL_REFCNT);
144 assert(o->lru_flags & FLAG_INLRU);
145 Object::Queue::iterator it =
146 Object::Queue::s_iterator_to(*o);
147 lane.q.erase(it);
148 lane.lock.unlock();
149 return o;
150 } else {
151 // XXX can't make unreachable (means what?)
152 lane.lock.lock();
153 --(o->lru_refcnt);
154 o->lru_flags &= ~FLAG_EVICTING;
155 /* unlock in next block */
156 }
157 } /* can_reclaim(o) */
158 lane.lock.unlock();
159 } /* each lane */
160 return nullptr;
161 } /* evict_block */
162
163 public:
164
165 LRU(int lanes, uint32_t _hiwat)
166 : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat)
167 {
168 assert(n_lanes > 0);
169 qlane = new Lane[n_lanes];
170 }
171
172 ~LRU() { delete[] qlane; }
173
174 bool ref(Object* o, uint32_t flags) {
175 ++(o->lru_refcnt);
176 if (flags & FLAG_INITIAL) {
177 if ((++(o->lru_adj) % lru_adj_modulus) == 0) {
178 Lane& lane = lane_of(o);
179 lane.lock.lock();
180 /* move to MRU */
181 Object::Queue::iterator it =
182 Object::Queue::s_iterator_to(*o);
183 lane.q.erase(it);
184 lane.q.push_front(*o);
185 lane.lock.unlock();
186 } /* adj */
187 } /* initial ref */
188 return true;
189 } /* ref */
190
191 void unref(Object* o, uint32_t flags) {
192 uint32_t refcnt = --(o->lru_refcnt);
193 if (unlikely(refcnt == 0)) {
194 Lane& lane = lane_of(o);
195 lane.lock.lock();
196 refcnt = o->lru_refcnt.load();
197 if (unlikely(refcnt == 0)) {
198 Object::Queue::iterator it =
199 Object::Queue::s_iterator_to(*o);
200 lane.q.erase(it);
201 delete o;
202 }
203 lane.lock.unlock();
204 } else if (unlikely(refcnt == SENTINEL_REFCNT)) {
205 Lane& lane = lane_of(o);
206 lane.lock.lock();
207 refcnt = o->lru_refcnt.load();
208 if (likely(refcnt == SENTINEL_REFCNT)) {
209 /* move to LRU */
210 Object::Queue::iterator it =
211 Object::Queue::s_iterator_to(*o);
212 lane.q.erase(it);
213 /* hiwat check */
214 if (lane.q.size() > lane_hiwat) {
215 delete o;
216 } else {
217 lane.q.push_back(*o);
218 }
219 }
220 lane.lock.unlock();
221 }
222 } /* unref */
223
224 Object* insert(ObjectFactory* fac, Edge edge, uint32_t flags) {
225 /* use supplied functor to re-use an evicted object, or
226 * allocate a new one of the descendant type */
227 Object* o = evict_block();
228 if (o)
229 fac->recycle(o); /* recycle existing object */
230 else
231 o = fac->alloc(); /* get a new one */
232
233 o->lru_flags = FLAG_INLRU;
234
235 Lane& lane = lane_of(o);
236 lane.lock.lock();
237 switch (edge) {
238 case Edge::MRU:
239 lane.q.push_front(*o);
240 break;
241 case Edge::LRU:
242 lane.q.push_back(*o);
243 break;
244 default:
245 abort();
246 break;
247 }
248 if (flags & FLAG_INITIAL)
249 o->lru_refcnt += 2; /* sentinel ref + initial */
250 else
251 ++(o->lru_refcnt); /* sentinel */
252 lane.lock.unlock();
253 return o;
254 } /* insert */
255
256 };
257
258 template <typename T, typename TTree, typename CLT, typename CEQ,
259 typename K, typename LK>
260 class TreeX
261 {
262 public:
263
264 static constexpr uint32_t FLAG_NONE = 0x0000;
265 static constexpr uint32_t FLAG_LOCK = 0x0001;
266 static constexpr uint32_t FLAG_UNLOCK = 0x0002;
267 static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004;
268
269 typedef T value_type;
270 typedef TTree container_type;
271 typedef typename TTree::iterator iterator;
272 typedef std::pair<iterator, bool> check_result;
273 typedef typename TTree::insert_commit_data insert_commit_data;
274 int n_part;
275 int csz;
276
277 typedef std::unique_lock<LK> unique_lock;
278
279 struct Partition {
280 LK lock;
281 TTree tr;
282 T** cache;
283 int csz;
284 CACHE_PAD(0);
285
286 Partition() : tr(), cache(nullptr), csz(0) {}
287
288 ~Partition() {
289 if (csz)
290 ::operator delete(cache);
291 }
292 };
293
294 struct Latch {
295 Partition* p;
296 LK* lock;
297 insert_commit_data commit_data;
298
299 Latch() : p(nullptr), lock(nullptr) {}
300 };
301
302 Partition& partition_of_scalar(uint64_t x) {
303 return part[x % n_part];
304 }
305
306 Partition& get(uint8_t x) {
307 return part[x];
308 }
309
310 Partition*& get() {
311 return part;
312 }
313
314 void lock() {
315 std::for_each(locks.begin(), locks.end(),
316 [](LK* lk){ lk->lock(); });
317 }
318
319 void unlock() {
320 std::for_each(locks.begin(), locks.end(),
321 [](LK* lk){ lk->unlock(); });
322 }
323
324 TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) {
325 assert(n_part > 0);
326 part = new Partition[n_part];
327 for (int ix = 0; ix < n_part; ++ix) {
328 Partition& p = part[ix];
329 if (csz) {
330 p.csz = csz;
331 p.cache = (T**) ::operator new(csz * sizeof(T*));
332 memset(p.cache, 0, csz * sizeof(T*));
333 }
334 locks.push_back(&p.lock);
335 }
336 }
337
338 ~TreeX() {
339 delete[] part;
340 }
341
342 T* find(uint64_t hk, const K& k, uint32_t flags) {
343 T* v;
344 Latch lat;
345 uint32_t slot = 0;
346 lat.p = &(partition_of_scalar(hk));
347 if (flags & FLAG_LOCK) {
348 lat.lock = &lat.p->lock;
349 lat.lock->lock();
350 }
351 if (csz) { /* template specialize? */
352 slot = hk % csz;
353 v = lat.p->cache[slot];
354 if (v) {
355 if (CEQ()(*v, k)) {
356 if (flags & FLAG_LOCK)
357 lat.lock->unlock();
358 return v;
359 }
360 v = nullptr;
361 }
362 } else {
363 v = nullptr;
364 }
365 iterator it = lat.p->tr.find(k, CLT());
366 if (it != lat.p->tr.end()){
367 v = &(*(it));
368 if (csz) {
369 /* fill cache slot at hk */
370 lat.p->cache[slot] = v;
371 }
372 }
373 if (flags & FLAG_LOCK)
374 lat.lock->unlock();
375 return v;
376 } /* find */
377
378 T* find_latch(uint64_t hk, const K& k, Latch& lat,
379 uint32_t flags) {
380 uint32_t slot = 0;
381 T* v;
382 lat.p = &(partition_of_scalar(hk));
383 lat.lock = &lat.p->lock;
384 if (flags & FLAG_LOCK)
385 lat.lock->lock();
386 if (csz) { /* template specialize? */
387 slot = hk % csz;
388 v = lat.p->cache[slot];
389 if (v) {
390 if (CEQ()(*v, k)) {
391 if (flags & (FLAG_LOCK|FLAG_UNLOCK))
392 lat.lock->unlock();
393 return v;
394 }
395 v = nullptr;
396 }
397 } else {
398 v = nullptr;
399 }
400 check_result r = lat.p->tr.insert_unique_check(
401 k, CLT(), lat.commit_data);
402 if (! r.second /* !insertable (i.e., !found) */) {
403 v = &(*(r.first));
404 if (csz) {
405 /* fill cache slot at hk */
406 lat.p->cache[slot] = v;
407 }
408 }
409 if (flags & (FLAG_LOCK|FLAG_UNLOCK))
410 lat.lock->unlock();
411 return v;
412 } /* find_latch */
413
414 void insert_latched(T* v, Latch& lat, uint32_t flags) {
415 (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data);
416 if (flags & FLAG_UNLOCK)
417 lat.lock->unlock();
418 } /* insert_latched */
419
420 void insert(uint64_t hk, T* v, uint32_t flags) {
421 Partition& p = partition_of_scalar(hk);
422 if (flags & FLAG_LOCK)
423 p.lock.lock();
424 p.tr.insert_unique(*v);
425 if (flags & FLAG_LOCK)
426 p.lock.unlock();
427 } /* insert */
428
429 void remove(uint64_t hk, T* v, uint32_t flags) {
430 Partition& p = partition_of_scalar(hk);
431 iterator it = TTree::s_iterator_to(*v);
432 if (flags & FLAG_LOCK)
433 p.lock.lock();
434 p.tr.erase(it);
435 if (csz) { /* template specialize? */
436 uint32_t slot = hk % csz;
437 T* v2 = p.cache[slot];
438 /* we are intrusive, just compare addresses */
439 if (v == v2)
440 p.cache[slot] = nullptr;
441 }
442 if (flags & FLAG_LOCK)
443 p.lock.unlock();
444 } /* remove */
445
446 void drain(std::function<void(T*)> uref,
447 uint32_t flags = FLAG_NONE) {
448 /* clear a table, call supplied function on
449 * each element found (e.g., retuns sentinel
450 * references) */
451 for (int t_ix = 0; t_ix < n_part; ++t_ix) {
452 Partition& p = part[t_ix];
453 if (flags & FLAG_LOCK) /* LOCKED */
454 p.lock.lock();
455 while (p.tr.size() > 0) {
456 iterator it = p.tr.begin();
457 T* v = &(*it);
458 p.tr.erase(it); /* must precede uref(v), in
459 * safe_link mode */
460 uref(v);
461 }
462 if (flags & FLAG_LOCK) /* we locked it, !LOCKED */
463 p.lock.unlock();
464 } /* each partition */
465 } /* drain */
466
467 private:
468 Partition *part;
469 std::vector<LK*> locks;
470 };
471
472 } /* namespace LRU */
473 } /* namespace cohort */
474
475 #endif /* COHORT_LRU_H */