]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Copyright (C) 2015 CohortFS, LLC. | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #ifndef COHORT_LRU_H | |
14 | #define COHORT_LRU_H | |
15 | ||
7c673cae | 16 | #include <boost/intrusive/list.hpp> |
224ce89b | 17 | #include <boost/intrusive/slist.hpp> |
31f18b77 | 18 | |
7c673cae FG |
19 | #include "common/likely.h" |
20 | ||
21 | #ifndef CACHE_LINE_SIZE | |
22 | #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */ | |
23 | #endif | |
24 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] | |
25 | ||
26 | namespace cohort { | |
27 | ||
28 | namespace lru { | |
29 | ||
30 | namespace bi = boost::intrusive; | |
31 | ||
32 | /* public flag values */ | |
33 | constexpr uint32_t FLAG_NONE = 0x0000; | |
34 | constexpr uint32_t FLAG_INITIAL = 0x0001; | |
b32b8144 | 35 | constexpr uint32_t FLAG_RECYCLE = 0x0002; |
7c673cae FG |
36 | |
37 | enum class Edge : std::uint8_t | |
38 | { | |
39 | MRU = 0, | |
40 | LRU | |
41 | }; | |
42 | ||
224ce89b | 43 | typedef bi::link_mode<bi::safe_link> link_mode; |
7c673cae FG |
44 | |
45 | class Object | |
46 | { | |
47 | private: | |
48 | uint32_t lru_flags; | |
49 | std::atomic<uint32_t> lru_refcnt; | |
50 | std::atomic<uint32_t> lru_adj; | |
51 | bi::list_member_hook< link_mode > lru_hook; | |
52 | ||
53 | typedef bi::list<Object, | |
54 | bi::member_hook< | |
55 | Object, bi::list_member_hook< link_mode >, | |
56 | &Object::lru_hook >, | |
57 | bi::constant_time_size<true>> Queue; | |
58 | ||
224ce89b WB |
59 | bi::slist_member_hook< link_mode > q2_hook; |
60 | ||
61 | typedef bi::slist<Object, | |
62 | bi::member_hook< | |
63 | Object, bi::slist_member_hook< link_mode >, | |
64 | &Object::q2_hook >, | |
65 | bi::constant_time_size<true>> Queue2; | |
66 | ||
7c673cae FG |
67 | public: |
68 | ||
69 | Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {} | |
70 | ||
71 | uint32_t get_refcnt() const { return lru_refcnt; } | |
72 | ||
73 | virtual bool reclaim() = 0; | |
74 | ||
75 | virtual ~Object() {} | |
76 | ||
77 | private: | |
78 | template <typename LK> | |
79 | friend class LRU; | |
224ce89b WB |
80 | |
81 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
82 | typename K, typename LK> | |
83 | friend class TreeX; | |
7c673cae FG |
84 | }; |
85 | ||
86 | /* allocator & recycler interface (create or re-use LRU objects) */ | |
87 | class ObjectFactory | |
88 | { | |
89 | public: | |
90 | virtual Object* alloc(void) = 0; | |
91 | virtual void recycle(Object*) = 0; | |
92 | virtual ~ObjectFactory() {}; | |
93 | }; | |
94 | ||
95 | template <typename LK> | |
96 | class LRU | |
97 | { | |
98 | private: | |
99 | ||
100 | struct Lane { | |
101 | LK lock; | |
102 | Object::Queue q; | |
103 | // Object::Queue pinned; /* placeholder for possible expansion */ | |
104 | CACHE_PAD(0); | |
105 | Lane() {} | |
106 | }; | |
107 | ||
108 | Lane *qlane; | |
109 | int n_lanes; | |
110 | std::atomic<uint32_t> evict_lane; | |
111 | const uint32_t lane_hiwat; | |
112 | ||
113 | static constexpr uint32_t lru_adj_modulus = 5; | |
114 | ||
115 | static constexpr uint32_t SENTINEL_REFCNT = 1; | |
116 | ||
117 | /* internal flag values */ | |
118 | static constexpr uint32_t FLAG_INLRU = 0x0001; | |
119 | static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use | |
120 | static constexpr uint32_t FLAG_EVICTING = 0x0004; | |
121 | ||
122 | Lane& lane_of(void* addr) { | |
123 | return qlane[(uint64_t)(addr) % n_lanes]; | |
124 | } | |
125 | ||
126 | uint32_t next_evict_lane() { | |
127 | return (evict_lane++ % n_lanes); | |
128 | } | |
129 | ||
130 | bool can_reclaim(Object* o) { | |
131 | return ((o->lru_refcnt == SENTINEL_REFCNT) && | |
132 | (!(o->lru_flags & FLAG_EVICTING))); | |
133 | } | |
134 | ||
135 | Object* evict_block() { | |
136 | uint32_t lane_ix = next_evict_lane(); | |
137 | for (int ix = 0; ix < n_lanes; ++ix, | |
138 | lane_ix = next_evict_lane()) { | |
139 | Lane& lane = qlane[lane_ix]; | |
181888fb | 140 | lane.lock.lock(); |
7c673cae FG |
141 | /* if object at LRU has refcnt==1, it may be reclaimable */ |
142 | Object* o = &(lane.q.back()); | |
7c673cae FG |
143 | if (can_reclaim(o)) { |
144 | ++(o->lru_refcnt); | |
145 | o->lru_flags |= FLAG_EVICTING; | |
146 | lane.lock.unlock(); | |
147 | if (o->reclaim()) { | |
148 | lane.lock.lock(); | |
149 | --(o->lru_refcnt); | |
150 | /* assertions that o state has not changed across | |
151 | * relock */ | |
152 | assert(o->lru_refcnt == SENTINEL_REFCNT); | |
153 | assert(o->lru_flags & FLAG_INLRU); | |
154 | Object::Queue::iterator it = | |
155 | Object::Queue::s_iterator_to(*o); | |
156 | lane.q.erase(it); | |
157 | lane.lock.unlock(); | |
158 | return o; | |
159 | } else { | |
160 | // XXX can't make unreachable (means what?) | |
7c673cae FG |
161 | --(o->lru_refcnt); |
162 | o->lru_flags &= ~FLAG_EVICTING; | |
163 | /* unlock in next block */ | |
164 | } | |
165 | } /* can_reclaim(o) */ | |
166 | lane.lock.unlock(); | |
167 | } /* each lane */ | |
168 | return nullptr; | |
169 | } /* evict_block */ | |
170 | ||
171 | public: | |
172 | ||
173 | LRU(int lanes, uint32_t _hiwat) | |
174 | : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat) | |
175 | { | |
176 | assert(n_lanes > 0); | |
177 | qlane = new Lane[n_lanes]; | |
178 | } | |
179 | ||
180 | ~LRU() { delete[] qlane; } | |
181 | ||
182 | bool ref(Object* o, uint32_t flags) { | |
183 | ++(o->lru_refcnt); | |
184 | if (flags & FLAG_INITIAL) { | |
185 | if ((++(o->lru_adj) % lru_adj_modulus) == 0) { | |
186 | Lane& lane = lane_of(o); | |
187 | lane.lock.lock(); | |
188 | /* move to MRU */ | |
189 | Object::Queue::iterator it = | |
190 | Object::Queue::s_iterator_to(*o); | |
191 | lane.q.erase(it); | |
192 | lane.q.push_front(*o); | |
193 | lane.lock.unlock(); | |
194 | } /* adj */ | |
195 | } /* initial ref */ | |
196 | return true; | |
197 | } /* ref */ | |
198 | ||
199 | void unref(Object* o, uint32_t flags) { | |
200 | uint32_t refcnt = --(o->lru_refcnt); | |
224ce89b | 201 | Object* tdo = nullptr; |
7c673cae FG |
202 | if (unlikely(refcnt == 0)) { |
203 | Lane& lane = lane_of(o); | |
204 | lane.lock.lock(); | |
205 | refcnt = o->lru_refcnt.load(); | |
206 | if (unlikely(refcnt == 0)) { | |
207 | Object::Queue::iterator it = | |
208 | Object::Queue::s_iterator_to(*o); | |
209 | lane.q.erase(it); | |
224ce89b | 210 | tdo = o; |
7c673cae FG |
211 | } |
212 | lane.lock.unlock(); | |
213 | } else if (unlikely(refcnt == SENTINEL_REFCNT)) { | |
214 | Lane& lane = lane_of(o); | |
215 | lane.lock.lock(); | |
216 | refcnt = o->lru_refcnt.load(); | |
217 | if (likely(refcnt == SENTINEL_REFCNT)) { | |
218 | /* move to LRU */ | |
219 | Object::Queue::iterator it = | |
220 | Object::Queue::s_iterator_to(*o); | |
221 | lane.q.erase(it); | |
222 | /* hiwat check */ | |
223 | if (lane.q.size() > lane_hiwat) { | |
224ce89b | 224 | tdo = o; |
7c673cae FG |
225 | } else { |
226 | lane.q.push_back(*o); | |
227 | } | |
228 | } | |
229 | lane.lock.unlock(); | |
230 | } | |
224ce89b WB |
231 | /* unref out-of-line && !LOCKED */ |
232 | if (tdo) | |
233 | delete tdo; | |
7c673cae FG |
234 | } /* unref */ |
235 | ||
b32b8144 | 236 | Object* insert(ObjectFactory* fac, Edge edge, uint32_t& flags) { |
7c673cae FG |
237 | /* use supplied functor to re-use an evicted object, or |
238 | * allocate a new one of the descendant type */ | |
239 | Object* o = evict_block(); | |
b32b8144 | 240 | if (o) { |
7c673cae | 241 | fac->recycle(o); /* recycle existing object */ |
b32b8144 FG |
242 | flags |= FLAG_RECYCLE; |
243 | } | |
7c673cae FG |
244 | else |
245 | o = fac->alloc(); /* get a new one */ | |
246 | ||
247 | o->lru_flags = FLAG_INLRU; | |
248 | ||
249 | Lane& lane = lane_of(o); | |
250 | lane.lock.lock(); | |
251 | switch (edge) { | |
252 | case Edge::MRU: | |
253 | lane.q.push_front(*o); | |
254 | break; | |
255 | case Edge::LRU: | |
256 | lane.q.push_back(*o); | |
257 | break; | |
258 | default: | |
259 | abort(); | |
260 | break; | |
261 | } | |
262 | if (flags & FLAG_INITIAL) | |
263 | o->lru_refcnt += 2; /* sentinel ref + initial */ | |
264 | else | |
265 | ++(o->lru_refcnt); /* sentinel */ | |
266 | lane.lock.unlock(); | |
267 | return o; | |
268 | } /* insert */ | |
269 | ||
270 | }; | |
271 | ||
272 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
273 | typename K, typename LK> | |
274 | class TreeX | |
275 | { | |
276 | public: | |
277 | ||
278 | static constexpr uint32_t FLAG_NONE = 0x0000; | |
279 | static constexpr uint32_t FLAG_LOCK = 0x0001; | |
280 | static constexpr uint32_t FLAG_UNLOCK = 0x0002; | |
281 | static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004; | |
282 | ||
283 | typedef T value_type; | |
284 | typedef TTree container_type; | |
285 | typedef typename TTree::iterator iterator; | |
286 | typedef std::pair<iterator, bool> check_result; | |
287 | typedef typename TTree::insert_commit_data insert_commit_data; | |
288 | int n_part; | |
289 | int csz; | |
290 | ||
291 | typedef std::unique_lock<LK> unique_lock; | |
292 | ||
293 | struct Partition { | |
294 | LK lock; | |
295 | TTree tr; | |
296 | T** cache; | |
297 | int csz; | |
298 | CACHE_PAD(0); | |
299 | ||
300 | Partition() : tr(), cache(nullptr), csz(0) {} | |
301 | ||
302 | ~Partition() { | |
303 | if (csz) | |
304 | ::operator delete(cache); | |
305 | } | |
306 | }; | |
307 | ||
308 | struct Latch { | |
309 | Partition* p; | |
310 | LK* lock; | |
311 | insert_commit_data commit_data; | |
312 | ||
313 | Latch() : p(nullptr), lock(nullptr) {} | |
314 | }; | |
315 | ||
316 | Partition& partition_of_scalar(uint64_t x) { | |
317 | return part[x % n_part]; | |
318 | } | |
319 | ||
320 | Partition& get(uint8_t x) { | |
321 | return part[x]; | |
322 | } | |
323 | ||
324 | Partition*& get() { | |
325 | return part; | |
326 | } | |
327 | ||
328 | void lock() { | |
329 | std::for_each(locks.begin(), locks.end(), | |
330 | [](LK* lk){ lk->lock(); }); | |
331 | } | |
332 | ||
333 | void unlock() { | |
334 | std::for_each(locks.begin(), locks.end(), | |
335 | [](LK* lk){ lk->unlock(); }); | |
336 | } | |
337 | ||
338 | TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) { | |
339 | assert(n_part > 0); | |
340 | part = new Partition[n_part]; | |
341 | for (int ix = 0; ix < n_part; ++ix) { | |
342 | Partition& p = part[ix]; | |
343 | if (csz) { | |
344 | p.csz = csz; | |
345 | p.cache = (T**) ::operator new(csz * sizeof(T*)); | |
346 | memset(p.cache, 0, csz * sizeof(T*)); | |
347 | } | |
348 | locks.push_back(&p.lock); | |
349 | } | |
350 | } | |
351 | ||
352 | ~TreeX() { | |
353 | delete[] part; | |
354 | } | |
355 | ||
356 | T* find(uint64_t hk, const K& k, uint32_t flags) { | |
357 | T* v; | |
358 | Latch lat; | |
359 | uint32_t slot = 0; | |
360 | lat.p = &(partition_of_scalar(hk)); | |
361 | if (flags & FLAG_LOCK) { | |
362 | lat.lock = &lat.p->lock; | |
363 | lat.lock->lock(); | |
364 | } | |
365 | if (csz) { /* template specialize? */ | |
366 | slot = hk % csz; | |
367 | v = lat.p->cache[slot]; | |
368 | if (v) { | |
369 | if (CEQ()(*v, k)) { | |
370 | if (flags & FLAG_LOCK) | |
371 | lat.lock->unlock(); | |
372 | return v; | |
373 | } | |
374 | v = nullptr; | |
375 | } | |
376 | } else { | |
377 | v = nullptr; | |
378 | } | |
379 | iterator it = lat.p->tr.find(k, CLT()); | |
380 | if (it != lat.p->tr.end()){ | |
381 | v = &(*(it)); | |
382 | if (csz) { | |
383 | /* fill cache slot at hk */ | |
384 | lat.p->cache[slot] = v; | |
385 | } | |
386 | } | |
387 | if (flags & FLAG_LOCK) | |
388 | lat.lock->unlock(); | |
389 | return v; | |
390 | } /* find */ | |
391 | ||
392 | T* find_latch(uint64_t hk, const K& k, Latch& lat, | |
393 | uint32_t flags) { | |
394 | uint32_t slot = 0; | |
395 | T* v; | |
396 | lat.p = &(partition_of_scalar(hk)); | |
397 | lat.lock = &lat.p->lock; | |
398 | if (flags & FLAG_LOCK) | |
399 | lat.lock->lock(); | |
400 | if (csz) { /* template specialize? */ | |
401 | slot = hk % csz; | |
402 | v = lat.p->cache[slot]; | |
403 | if (v) { | |
404 | if (CEQ()(*v, k)) { | |
c07f9fc5 | 405 | if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK)) |
7c673cae FG |
406 | lat.lock->unlock(); |
407 | return v; | |
408 | } | |
409 | v = nullptr; | |
410 | } | |
411 | } else { | |
412 | v = nullptr; | |
413 | } | |
414 | check_result r = lat.p->tr.insert_unique_check( | |
415 | k, CLT(), lat.commit_data); | |
416 | if (! r.second /* !insertable (i.e., !found) */) { | |
417 | v = &(*(r.first)); | |
418 | if (csz) { | |
419 | /* fill cache slot at hk */ | |
420 | lat.p->cache[slot] = v; | |
421 | } | |
422 | } | |
c07f9fc5 | 423 | if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK)) |
7c673cae FG |
424 | lat.lock->unlock(); |
425 | return v; | |
426 | } /* find_latch */ | |
427 | ||
428 | void insert_latched(T* v, Latch& lat, uint32_t flags) { | |
429 | (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data); | |
430 | if (flags & FLAG_UNLOCK) | |
431 | lat.lock->unlock(); | |
432 | } /* insert_latched */ | |
433 | ||
434 | void insert(uint64_t hk, T* v, uint32_t flags) { | |
435 | Partition& p = partition_of_scalar(hk); | |
436 | if (flags & FLAG_LOCK) | |
437 | p.lock.lock(); | |
438 | p.tr.insert_unique(*v); | |
439 | if (flags & FLAG_LOCK) | |
440 | p.lock.unlock(); | |
441 | } /* insert */ | |
442 | ||
443 | void remove(uint64_t hk, T* v, uint32_t flags) { | |
444 | Partition& p = partition_of_scalar(hk); | |
445 | iterator it = TTree::s_iterator_to(*v); | |
446 | if (flags & FLAG_LOCK) | |
447 | p.lock.lock(); | |
448 | p.tr.erase(it); | |
449 | if (csz) { /* template specialize? */ | |
450 | uint32_t slot = hk % csz; | |
451 | T* v2 = p.cache[slot]; | |
452 | /* we are intrusive, just compare addresses */ | |
453 | if (v == v2) | |
454 | p.cache[slot] = nullptr; | |
455 | } | |
456 | if (flags & FLAG_LOCK) | |
457 | p.lock.unlock(); | |
458 | } /* remove */ | |
459 | ||
460 | void drain(std::function<void(T*)> uref, | |
461 | uint32_t flags = FLAG_NONE) { | |
462 | /* clear a table, call supplied function on | |
463 | * each element found (e.g., retuns sentinel | |
464 | * references) */ | |
224ce89b | 465 | Object::Queue2 drain_q; |
7c673cae FG |
466 | for (int t_ix = 0; t_ix < n_part; ++t_ix) { |
467 | Partition& p = part[t_ix]; | |
468 | if (flags & FLAG_LOCK) /* LOCKED */ | |
469 | p.lock.lock(); | |
470 | while (p.tr.size() > 0) { | |
471 | iterator it = p.tr.begin(); | |
472 | T* v = &(*it); | |
224ce89b WB |
473 | p.tr.erase(it); |
474 | drain_q.push_front(*v); | |
7c673cae FG |
475 | } |
476 | if (flags & FLAG_LOCK) /* we locked it, !LOCKED */ | |
477 | p.lock.unlock(); | |
478 | } /* each partition */ | |
224ce89b WB |
479 | /* unref out-of-line && !LOCKED */ |
480 | while (drain_q.size() > 0) { | |
481 | Object::Queue2::iterator it = drain_q.begin(); | |
482 | T* v = static_cast<T*>(&(*it)); | |
483 | drain_q.erase(it); /* must precede uref(v) in safe_link mode */ | |
484 | uref(v); | |
485 | } | |
7c673cae FG |
486 | } /* drain */ |
487 | ||
488 | private: | |
489 | Partition *part; | |
490 | std::vector<LK*> locks; | |
491 | }; | |
492 | ||
493 | } /* namespace LRU */ | |
494 | } /* namespace cohort */ | |
495 | ||
496 | #endif /* COHORT_LRU_H */ |