]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Copyright (C) 2015 CohortFS, LLC. | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #ifndef COHORT_LRU_H | |
14 | #define COHORT_LRU_H | |
15 | ||
7c673cae | 16 | #include <boost/intrusive/list.hpp> |
224ce89b | 17 | #include <boost/intrusive/slist.hpp> |
31f18b77 | 18 | |
7c673cae FG |
19 | #include "common/likely.h" |
20 | ||
21 | #ifndef CACHE_LINE_SIZE | |
22 | #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */ | |
23 | #endif | |
24 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] | |
25 | ||
26 | namespace cohort { | |
27 | ||
28 | namespace lru { | |
29 | ||
30 | namespace bi = boost::intrusive; | |
31 | ||
32 | /* public flag values */ | |
33 | constexpr uint32_t FLAG_NONE = 0x0000; | |
34 | constexpr uint32_t FLAG_INITIAL = 0x0001; | |
35 | ||
36 | enum class Edge : std::uint8_t | |
37 | { | |
38 | MRU = 0, | |
39 | LRU | |
40 | }; | |
41 | ||
224ce89b | 42 | typedef bi::link_mode<bi::safe_link> link_mode; |
7c673cae FG |
43 | |
44 | class Object | |
45 | { | |
46 | private: | |
47 | uint32_t lru_flags; | |
48 | std::atomic<uint32_t> lru_refcnt; | |
49 | std::atomic<uint32_t> lru_adj; | |
50 | bi::list_member_hook< link_mode > lru_hook; | |
51 | ||
52 | typedef bi::list<Object, | |
53 | bi::member_hook< | |
54 | Object, bi::list_member_hook< link_mode >, | |
55 | &Object::lru_hook >, | |
56 | bi::constant_time_size<true>> Queue; | |
57 | ||
224ce89b WB |
58 | bi::slist_member_hook< link_mode > q2_hook; |
59 | ||
60 | typedef bi::slist<Object, | |
61 | bi::member_hook< | |
62 | Object, bi::slist_member_hook< link_mode >, | |
63 | &Object::q2_hook >, | |
64 | bi::constant_time_size<true>> Queue2; | |
65 | ||
7c673cae FG |
66 | public: |
67 | ||
68 | Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {} | |
69 | ||
70 | uint32_t get_refcnt() const { return lru_refcnt; } | |
71 | ||
72 | virtual bool reclaim() = 0; | |
73 | ||
74 | virtual ~Object() {} | |
75 | ||
76 | private: | |
77 | template <typename LK> | |
78 | friend class LRU; | |
224ce89b WB |
79 | |
80 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
81 | typename K, typename LK> | |
82 | friend class TreeX; | |
7c673cae FG |
83 | }; |
84 | ||
85 | /* allocator & recycler interface (create or re-use LRU objects) */ | |
86 | class ObjectFactory | |
87 | { | |
88 | public: | |
89 | virtual Object* alloc(void) = 0; | |
90 | virtual void recycle(Object*) = 0; | |
91 | virtual ~ObjectFactory() {}; | |
92 | }; | |
93 | ||
94 | template <typename LK> | |
95 | class LRU | |
96 | { | |
97 | private: | |
98 | ||
99 | struct Lane { | |
100 | LK lock; | |
101 | Object::Queue q; | |
102 | // Object::Queue pinned; /* placeholder for possible expansion */ | |
103 | CACHE_PAD(0); | |
104 | Lane() {} | |
105 | }; | |
106 | ||
107 | Lane *qlane; | |
108 | int n_lanes; | |
109 | std::atomic<uint32_t> evict_lane; | |
110 | const uint32_t lane_hiwat; | |
111 | ||
112 | static constexpr uint32_t lru_adj_modulus = 5; | |
113 | ||
114 | static constexpr uint32_t SENTINEL_REFCNT = 1; | |
115 | ||
116 | /* internal flag values */ | |
117 | static constexpr uint32_t FLAG_INLRU = 0x0001; | |
118 | static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use | |
119 | static constexpr uint32_t FLAG_EVICTING = 0x0004; | |
120 | ||
121 | Lane& lane_of(void* addr) { | |
122 | return qlane[(uint64_t)(addr) % n_lanes]; | |
123 | } | |
124 | ||
125 | uint32_t next_evict_lane() { | |
126 | return (evict_lane++ % n_lanes); | |
127 | } | |
128 | ||
129 | bool can_reclaim(Object* o) { | |
130 | return ((o->lru_refcnt == SENTINEL_REFCNT) && | |
131 | (!(o->lru_flags & FLAG_EVICTING))); | |
132 | } | |
133 | ||
134 | Object* evict_block() { | |
135 | uint32_t lane_ix = next_evict_lane(); | |
136 | for (int ix = 0; ix < n_lanes; ++ix, | |
137 | lane_ix = next_evict_lane()) { | |
138 | Lane& lane = qlane[lane_ix]; | |
139 | /* if object at LRU has refcnt==1, it may be reclaimable */ | |
140 | Object* o = &(lane.q.back()); | |
7c673cae FG |
141 | if (can_reclaim(o)) { |
142 | ++(o->lru_refcnt); | |
143 | o->lru_flags |= FLAG_EVICTING; | |
144 | lane.lock.unlock(); | |
145 | if (o->reclaim()) { | |
146 | lane.lock.lock(); | |
147 | --(o->lru_refcnt); | |
148 | /* assertions that o state has not changed across | |
149 | * relock */ | |
150 | assert(o->lru_refcnt == SENTINEL_REFCNT); | |
151 | assert(o->lru_flags & FLAG_INLRU); | |
152 | Object::Queue::iterator it = | |
153 | Object::Queue::s_iterator_to(*o); | |
154 | lane.q.erase(it); | |
155 | lane.lock.unlock(); | |
156 | return o; | |
157 | } else { | |
158 | // XXX can't make unreachable (means what?) | |
159 | lane.lock.lock(); | |
160 | --(o->lru_refcnt); | |
161 | o->lru_flags &= ~FLAG_EVICTING; | |
162 | /* unlock in next block */ | |
163 | } | |
164 | } /* can_reclaim(o) */ | |
165 | lane.lock.unlock(); | |
166 | } /* each lane */ | |
167 | return nullptr; | |
168 | } /* evict_block */ | |
169 | ||
170 | public: | |
171 | ||
172 | LRU(int lanes, uint32_t _hiwat) | |
173 | : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat) | |
174 | { | |
175 | assert(n_lanes > 0); | |
176 | qlane = new Lane[n_lanes]; | |
177 | } | |
178 | ||
179 | ~LRU() { delete[] qlane; } | |
180 | ||
181 | bool ref(Object* o, uint32_t flags) { | |
182 | ++(o->lru_refcnt); | |
183 | if (flags & FLAG_INITIAL) { | |
184 | if ((++(o->lru_adj) % lru_adj_modulus) == 0) { | |
185 | Lane& lane = lane_of(o); | |
186 | lane.lock.lock(); | |
187 | /* move to MRU */ | |
188 | Object::Queue::iterator it = | |
189 | Object::Queue::s_iterator_to(*o); | |
190 | lane.q.erase(it); | |
191 | lane.q.push_front(*o); | |
192 | lane.lock.unlock(); | |
193 | } /* adj */ | |
194 | } /* initial ref */ | |
195 | return true; | |
196 | } /* ref */ | |
197 | ||
198 | void unref(Object* o, uint32_t flags) { | |
199 | uint32_t refcnt = --(o->lru_refcnt); | |
224ce89b | 200 | Object* tdo = nullptr; |
7c673cae FG |
201 | if (unlikely(refcnt == 0)) { |
202 | Lane& lane = lane_of(o); | |
203 | lane.lock.lock(); | |
204 | refcnt = o->lru_refcnt.load(); | |
205 | if (unlikely(refcnt == 0)) { | |
206 | Object::Queue::iterator it = | |
207 | Object::Queue::s_iterator_to(*o); | |
208 | lane.q.erase(it); | |
224ce89b | 209 | tdo = o; |
7c673cae FG |
210 | } |
211 | lane.lock.unlock(); | |
212 | } else if (unlikely(refcnt == SENTINEL_REFCNT)) { | |
213 | Lane& lane = lane_of(o); | |
214 | lane.lock.lock(); | |
215 | refcnt = o->lru_refcnt.load(); | |
216 | if (likely(refcnt == SENTINEL_REFCNT)) { | |
217 | /* move to LRU */ | |
218 | Object::Queue::iterator it = | |
219 | Object::Queue::s_iterator_to(*o); | |
220 | lane.q.erase(it); | |
221 | /* hiwat check */ | |
222 | if (lane.q.size() > lane_hiwat) { | |
224ce89b | 223 | tdo = o; |
7c673cae FG |
224 | } else { |
225 | lane.q.push_back(*o); | |
226 | } | |
227 | } | |
228 | lane.lock.unlock(); | |
229 | } | |
224ce89b WB |
230 | /* unref out-of-line && !LOCKED */ |
231 | if (tdo) | |
232 | delete tdo; | |
7c673cae FG |
233 | } /* unref */ |
234 | ||
235 | Object* insert(ObjectFactory* fac, Edge edge, uint32_t flags) { | |
236 | /* use supplied functor to re-use an evicted object, or | |
237 | * allocate a new one of the descendant type */ | |
238 | Object* o = evict_block(); | |
239 | if (o) | |
240 | fac->recycle(o); /* recycle existing object */ | |
241 | else | |
242 | o = fac->alloc(); /* get a new one */ | |
243 | ||
244 | o->lru_flags = FLAG_INLRU; | |
245 | ||
246 | Lane& lane = lane_of(o); | |
247 | lane.lock.lock(); | |
248 | switch (edge) { | |
249 | case Edge::MRU: | |
250 | lane.q.push_front(*o); | |
251 | break; | |
252 | case Edge::LRU: | |
253 | lane.q.push_back(*o); | |
254 | break; | |
255 | default: | |
256 | abort(); | |
257 | break; | |
258 | } | |
259 | if (flags & FLAG_INITIAL) | |
260 | o->lru_refcnt += 2; /* sentinel ref + initial */ | |
261 | else | |
262 | ++(o->lru_refcnt); /* sentinel */ | |
263 | lane.lock.unlock(); | |
264 | return o; | |
265 | } /* insert */ | |
266 | ||
267 | }; | |
268 | ||
269 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
270 | typename K, typename LK> | |
271 | class TreeX | |
272 | { | |
273 | public: | |
274 | ||
275 | static constexpr uint32_t FLAG_NONE = 0x0000; | |
276 | static constexpr uint32_t FLAG_LOCK = 0x0001; | |
277 | static constexpr uint32_t FLAG_UNLOCK = 0x0002; | |
278 | static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004; | |
279 | ||
280 | typedef T value_type; | |
281 | typedef TTree container_type; | |
282 | typedef typename TTree::iterator iterator; | |
283 | typedef std::pair<iterator, bool> check_result; | |
284 | typedef typename TTree::insert_commit_data insert_commit_data; | |
285 | int n_part; | |
286 | int csz; | |
287 | ||
288 | typedef std::unique_lock<LK> unique_lock; | |
289 | ||
290 | struct Partition { | |
291 | LK lock; | |
292 | TTree tr; | |
293 | T** cache; | |
294 | int csz; | |
295 | CACHE_PAD(0); | |
296 | ||
297 | Partition() : tr(), cache(nullptr), csz(0) {} | |
298 | ||
299 | ~Partition() { | |
300 | if (csz) | |
301 | ::operator delete(cache); | |
302 | } | |
303 | }; | |
304 | ||
305 | struct Latch { | |
306 | Partition* p; | |
307 | LK* lock; | |
308 | insert_commit_data commit_data; | |
309 | ||
310 | Latch() : p(nullptr), lock(nullptr) {} | |
311 | }; | |
312 | ||
313 | Partition& partition_of_scalar(uint64_t x) { | |
314 | return part[x % n_part]; | |
315 | } | |
316 | ||
317 | Partition& get(uint8_t x) { | |
318 | return part[x]; | |
319 | } | |
320 | ||
321 | Partition*& get() { | |
322 | return part; | |
323 | } | |
324 | ||
325 | void lock() { | |
326 | std::for_each(locks.begin(), locks.end(), | |
327 | [](LK* lk){ lk->lock(); }); | |
328 | } | |
329 | ||
330 | void unlock() { | |
331 | std::for_each(locks.begin(), locks.end(), | |
332 | [](LK* lk){ lk->unlock(); }); | |
333 | } | |
334 | ||
335 | TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) { | |
336 | assert(n_part > 0); | |
337 | part = new Partition[n_part]; | |
338 | for (int ix = 0; ix < n_part; ++ix) { | |
339 | Partition& p = part[ix]; | |
340 | if (csz) { | |
341 | p.csz = csz; | |
342 | p.cache = (T**) ::operator new(csz * sizeof(T*)); | |
343 | memset(p.cache, 0, csz * sizeof(T*)); | |
344 | } | |
345 | locks.push_back(&p.lock); | |
346 | } | |
347 | } | |
348 | ||
349 | ~TreeX() { | |
350 | delete[] part; | |
351 | } | |
352 | ||
353 | T* find(uint64_t hk, const K& k, uint32_t flags) { | |
354 | T* v; | |
355 | Latch lat; | |
356 | uint32_t slot = 0; | |
357 | lat.p = &(partition_of_scalar(hk)); | |
358 | if (flags & FLAG_LOCK) { | |
359 | lat.lock = &lat.p->lock; | |
360 | lat.lock->lock(); | |
361 | } | |
362 | if (csz) { /* template specialize? */ | |
363 | slot = hk % csz; | |
364 | v = lat.p->cache[slot]; | |
365 | if (v) { | |
366 | if (CEQ()(*v, k)) { | |
367 | if (flags & FLAG_LOCK) | |
368 | lat.lock->unlock(); | |
369 | return v; | |
370 | } | |
371 | v = nullptr; | |
372 | } | |
373 | } else { | |
374 | v = nullptr; | |
375 | } | |
376 | iterator it = lat.p->tr.find(k, CLT()); | |
377 | if (it != lat.p->tr.end()){ | |
378 | v = &(*(it)); | |
379 | if (csz) { | |
380 | /* fill cache slot at hk */ | |
381 | lat.p->cache[slot] = v; | |
382 | } | |
383 | } | |
384 | if (flags & FLAG_LOCK) | |
385 | lat.lock->unlock(); | |
386 | return v; | |
387 | } /* find */ | |
388 | ||
389 | T* find_latch(uint64_t hk, const K& k, Latch& lat, | |
390 | uint32_t flags) { | |
391 | uint32_t slot = 0; | |
392 | T* v; | |
393 | lat.p = &(partition_of_scalar(hk)); | |
394 | lat.lock = &lat.p->lock; | |
395 | if (flags & FLAG_LOCK) | |
396 | lat.lock->lock(); | |
397 | if (csz) { /* template specialize? */ | |
398 | slot = hk % csz; | |
399 | v = lat.p->cache[slot]; | |
400 | if (v) { | |
401 | if (CEQ()(*v, k)) { | |
402 | if (flags & (FLAG_LOCK|FLAG_UNLOCK)) | |
403 | lat.lock->unlock(); | |
404 | return v; | |
405 | } | |
406 | v = nullptr; | |
407 | } | |
408 | } else { | |
409 | v = nullptr; | |
410 | } | |
411 | check_result r = lat.p->tr.insert_unique_check( | |
412 | k, CLT(), lat.commit_data); | |
413 | if (! r.second /* !insertable (i.e., !found) */) { | |
414 | v = &(*(r.first)); | |
415 | if (csz) { | |
416 | /* fill cache slot at hk */ | |
417 | lat.p->cache[slot] = v; | |
418 | } | |
419 | } | |
420 | if (flags & (FLAG_LOCK|FLAG_UNLOCK)) | |
421 | lat.lock->unlock(); | |
422 | return v; | |
423 | } /* find_latch */ | |
424 | ||
425 | void insert_latched(T* v, Latch& lat, uint32_t flags) { | |
426 | (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data); | |
427 | if (flags & FLAG_UNLOCK) | |
428 | lat.lock->unlock(); | |
429 | } /* insert_latched */ | |
430 | ||
431 | void insert(uint64_t hk, T* v, uint32_t flags) { | |
432 | Partition& p = partition_of_scalar(hk); | |
433 | if (flags & FLAG_LOCK) | |
434 | p.lock.lock(); | |
435 | p.tr.insert_unique(*v); | |
436 | if (flags & FLAG_LOCK) | |
437 | p.lock.unlock(); | |
438 | } /* insert */ | |
439 | ||
440 | void remove(uint64_t hk, T* v, uint32_t flags) { | |
441 | Partition& p = partition_of_scalar(hk); | |
442 | iterator it = TTree::s_iterator_to(*v); | |
443 | if (flags & FLAG_LOCK) | |
444 | p.lock.lock(); | |
445 | p.tr.erase(it); | |
446 | if (csz) { /* template specialize? */ | |
447 | uint32_t slot = hk % csz; | |
448 | T* v2 = p.cache[slot]; | |
449 | /* we are intrusive, just compare addresses */ | |
450 | if (v == v2) | |
451 | p.cache[slot] = nullptr; | |
452 | } | |
453 | if (flags & FLAG_LOCK) | |
454 | p.lock.unlock(); | |
455 | } /* remove */ | |
456 | ||
457 | void drain(std::function<void(T*)> uref, | |
458 | uint32_t flags = FLAG_NONE) { | |
459 | /* clear a table, call supplied function on | |
460 | * each element found (e.g., retuns sentinel | |
461 | * references) */ | |
224ce89b | 462 | Object::Queue2 drain_q; |
7c673cae FG |
463 | for (int t_ix = 0; t_ix < n_part; ++t_ix) { |
464 | Partition& p = part[t_ix]; | |
465 | if (flags & FLAG_LOCK) /* LOCKED */ | |
466 | p.lock.lock(); | |
467 | while (p.tr.size() > 0) { | |
468 | iterator it = p.tr.begin(); | |
469 | T* v = &(*it); | |
224ce89b WB |
470 | p.tr.erase(it); |
471 | drain_q.push_front(*v); | |
7c673cae FG |
472 | } |
473 | if (flags & FLAG_LOCK) /* we locked it, !LOCKED */ | |
474 | p.lock.unlock(); | |
475 | } /* each partition */ | |
224ce89b WB |
476 | /* unref out-of-line && !LOCKED */ |
477 | while (drain_q.size() > 0) { | |
478 | Object::Queue2::iterator it = drain_q.begin(); | |
479 | T* v = static_cast<T*>(&(*it)); | |
480 | drain_q.erase(it); /* must precede uref(v) in safe_link mode */ | |
481 | uref(v); | |
482 | } | |
7c673cae FG |
483 | } /* drain */ |
484 | ||
485 | private: | |
486 | Partition *part; | |
487 | std::vector<LK*> locks; | |
488 | }; | |
489 | ||
490 | } /* namespace LRU */ | |
491 | } /* namespace cohort */ | |
492 | ||
493 | #endif /* COHORT_LRU_H */ |