]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Copyright (C) 2015 CohortFS, LLC. | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #ifndef COHORT_LRU_H | |
14 | #define COHORT_LRU_H | |
15 | ||
7c673cae | 16 | #include <boost/intrusive/list.hpp> |
31f18b77 | 17 | |
7c673cae FG |
18 | #include "common/likely.h" |
19 | ||
20 | #ifndef CACHE_LINE_SIZE | |
21 | #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */ | |
22 | #endif | |
23 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] | |
24 | ||
25 | namespace cohort { | |
26 | ||
27 | namespace lru { | |
28 | ||
29 | namespace bi = boost::intrusive; | |
30 | ||
31 | /* public flag values */ | |
32 | constexpr uint32_t FLAG_NONE = 0x0000; | |
33 | constexpr uint32_t FLAG_INITIAL = 0x0001; | |
34 | ||
35 | enum class Edge : std::uint8_t | |
36 | { | |
37 | MRU = 0, | |
38 | LRU | |
39 | }; | |
40 | ||
41 | typedef bi::link_mode<bi::safe_link> link_mode; // for debugging | |
42 | ||
43 | class Object | |
44 | { | |
45 | private: | |
46 | uint32_t lru_flags; | |
47 | std::atomic<uint32_t> lru_refcnt; | |
48 | std::atomic<uint32_t> lru_adj; | |
49 | bi::list_member_hook< link_mode > lru_hook; | |
50 | ||
51 | typedef bi::list<Object, | |
52 | bi::member_hook< | |
53 | Object, bi::list_member_hook< link_mode >, | |
54 | &Object::lru_hook >, | |
55 | bi::constant_time_size<true>> Queue; | |
56 | ||
57 | public: | |
58 | ||
59 | Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {} | |
60 | ||
61 | uint32_t get_refcnt() const { return lru_refcnt; } | |
62 | ||
63 | virtual bool reclaim() = 0; | |
64 | ||
65 | virtual ~Object() {} | |
66 | ||
67 | private: | |
68 | template <typename LK> | |
69 | friend class LRU; | |
70 | }; | |
71 | ||
72 | /* allocator & recycler interface (create or re-use LRU objects) */ | |
73 | class ObjectFactory | |
74 | { | |
75 | public: | |
76 | virtual Object* alloc(void) = 0; | |
77 | virtual void recycle(Object*) = 0; | |
78 | virtual ~ObjectFactory() {}; | |
79 | }; | |
80 | ||
81 | template <typename LK> | |
82 | class LRU | |
83 | { | |
84 | private: | |
85 | ||
86 | struct Lane { | |
87 | LK lock; | |
88 | Object::Queue q; | |
89 | // Object::Queue pinned; /* placeholder for possible expansion */ | |
90 | CACHE_PAD(0); | |
91 | Lane() {} | |
92 | }; | |
93 | ||
94 | Lane *qlane; | |
95 | int n_lanes; | |
96 | std::atomic<uint32_t> evict_lane; | |
97 | const uint32_t lane_hiwat; | |
98 | ||
99 | static constexpr uint32_t lru_adj_modulus = 5; | |
100 | ||
101 | static constexpr uint32_t SENTINEL_REFCNT = 1; | |
102 | ||
103 | /* internal flag values */ | |
104 | static constexpr uint32_t FLAG_INLRU = 0x0001; | |
105 | static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use | |
106 | static constexpr uint32_t FLAG_EVICTING = 0x0004; | |
107 | ||
108 | Lane& lane_of(void* addr) { | |
109 | return qlane[(uint64_t)(addr) % n_lanes]; | |
110 | } | |
111 | ||
112 | uint32_t next_evict_lane() { | |
113 | return (evict_lane++ % n_lanes); | |
114 | } | |
115 | ||
116 | bool can_reclaim(Object* o) { | |
117 | return ((o->lru_refcnt == SENTINEL_REFCNT) && | |
118 | (!(o->lru_flags & FLAG_EVICTING))); | |
119 | } | |
120 | ||
121 | Object* evict_block() { | |
122 | uint32_t lane_ix = next_evict_lane(); | |
123 | for (int ix = 0; ix < n_lanes; ++ix, | |
124 | lane_ix = next_evict_lane()) { | |
125 | Lane& lane = qlane[lane_ix]; | |
126 | /* if object at LRU has refcnt==1, it may be reclaimable */ | |
127 | Object* o = &(lane.q.back()); | |
128 | #if 0 /* XXX save for refactor */ | |
129 | std::cout << __func__ | |
130 | << " " << o | |
131 | << " refcnt: " << o->lru_refcnt | |
132 | << std::endl; | |
133 | #endif | |
134 | if (can_reclaim(o)) { | |
135 | ++(o->lru_refcnt); | |
136 | o->lru_flags |= FLAG_EVICTING; | |
137 | lane.lock.unlock(); | |
138 | if (o->reclaim()) { | |
139 | lane.lock.lock(); | |
140 | --(o->lru_refcnt); | |
141 | /* assertions that o state has not changed across | |
142 | * relock */ | |
143 | assert(o->lru_refcnt == SENTINEL_REFCNT); | |
144 | assert(o->lru_flags & FLAG_INLRU); | |
145 | Object::Queue::iterator it = | |
146 | Object::Queue::s_iterator_to(*o); | |
147 | lane.q.erase(it); | |
148 | lane.lock.unlock(); | |
149 | return o; | |
150 | } else { | |
151 | // XXX can't make unreachable (means what?) | |
152 | lane.lock.lock(); | |
153 | --(o->lru_refcnt); | |
154 | o->lru_flags &= ~FLAG_EVICTING; | |
155 | /* unlock in next block */ | |
156 | } | |
157 | } /* can_reclaim(o) */ | |
158 | lane.lock.unlock(); | |
159 | } /* each lane */ | |
160 | return nullptr; | |
161 | } /* evict_block */ | |
162 | ||
163 | public: | |
164 | ||
165 | LRU(int lanes, uint32_t _hiwat) | |
166 | : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat) | |
167 | { | |
168 | assert(n_lanes > 0); | |
169 | qlane = new Lane[n_lanes]; | |
170 | } | |
171 | ||
172 | ~LRU() { delete[] qlane; } | |
173 | ||
174 | bool ref(Object* o, uint32_t flags) { | |
175 | ++(o->lru_refcnt); | |
176 | if (flags & FLAG_INITIAL) { | |
177 | if ((++(o->lru_adj) % lru_adj_modulus) == 0) { | |
178 | Lane& lane = lane_of(o); | |
179 | lane.lock.lock(); | |
180 | /* move to MRU */ | |
181 | Object::Queue::iterator it = | |
182 | Object::Queue::s_iterator_to(*o); | |
183 | lane.q.erase(it); | |
184 | lane.q.push_front(*o); | |
185 | lane.lock.unlock(); | |
186 | } /* adj */ | |
187 | } /* initial ref */ | |
188 | return true; | |
189 | } /* ref */ | |
190 | ||
191 | void unref(Object* o, uint32_t flags) { | |
192 | uint32_t refcnt = --(o->lru_refcnt); | |
193 | if (unlikely(refcnt == 0)) { | |
194 | Lane& lane = lane_of(o); | |
195 | lane.lock.lock(); | |
196 | refcnt = o->lru_refcnt.load(); | |
197 | if (unlikely(refcnt == 0)) { | |
198 | Object::Queue::iterator it = | |
199 | Object::Queue::s_iterator_to(*o); | |
200 | lane.q.erase(it); | |
201 | delete o; | |
202 | } | |
203 | lane.lock.unlock(); | |
204 | } else if (unlikely(refcnt == SENTINEL_REFCNT)) { | |
205 | Lane& lane = lane_of(o); | |
206 | lane.lock.lock(); | |
207 | refcnt = o->lru_refcnt.load(); | |
208 | if (likely(refcnt == SENTINEL_REFCNT)) { | |
209 | /* move to LRU */ | |
210 | Object::Queue::iterator it = | |
211 | Object::Queue::s_iterator_to(*o); | |
212 | lane.q.erase(it); | |
213 | /* hiwat check */ | |
214 | if (lane.q.size() > lane_hiwat) { | |
215 | delete o; | |
216 | } else { | |
217 | lane.q.push_back(*o); | |
218 | } | |
219 | } | |
220 | lane.lock.unlock(); | |
221 | } | |
222 | } /* unref */ | |
223 | ||
224 | Object* insert(ObjectFactory* fac, Edge edge, uint32_t flags) { | |
225 | /* use supplied functor to re-use an evicted object, or | |
226 | * allocate a new one of the descendant type */ | |
227 | Object* o = evict_block(); | |
228 | if (o) | |
229 | fac->recycle(o); /* recycle existing object */ | |
230 | else | |
231 | o = fac->alloc(); /* get a new one */ | |
232 | ||
233 | o->lru_flags = FLAG_INLRU; | |
234 | ||
235 | Lane& lane = lane_of(o); | |
236 | lane.lock.lock(); | |
237 | switch (edge) { | |
238 | case Edge::MRU: | |
239 | lane.q.push_front(*o); | |
240 | break; | |
241 | case Edge::LRU: | |
242 | lane.q.push_back(*o); | |
243 | break; | |
244 | default: | |
245 | abort(); | |
246 | break; | |
247 | } | |
248 | if (flags & FLAG_INITIAL) | |
249 | o->lru_refcnt += 2; /* sentinel ref + initial */ | |
250 | else | |
251 | ++(o->lru_refcnt); /* sentinel */ | |
252 | lane.lock.unlock(); | |
253 | return o; | |
254 | } /* insert */ | |
255 | ||
256 | }; | |
257 | ||
258 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
259 | typename K, typename LK> | |
260 | class TreeX | |
261 | { | |
262 | public: | |
263 | ||
264 | static constexpr uint32_t FLAG_NONE = 0x0000; | |
265 | static constexpr uint32_t FLAG_LOCK = 0x0001; | |
266 | static constexpr uint32_t FLAG_UNLOCK = 0x0002; | |
267 | static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004; | |
268 | ||
269 | typedef T value_type; | |
270 | typedef TTree container_type; | |
271 | typedef typename TTree::iterator iterator; | |
272 | typedef std::pair<iterator, bool> check_result; | |
273 | typedef typename TTree::insert_commit_data insert_commit_data; | |
274 | int n_part; | |
275 | int csz; | |
276 | ||
277 | typedef std::unique_lock<LK> unique_lock; | |
278 | ||
279 | struct Partition { | |
280 | LK lock; | |
281 | TTree tr; | |
282 | T** cache; | |
283 | int csz; | |
284 | CACHE_PAD(0); | |
285 | ||
286 | Partition() : tr(), cache(nullptr), csz(0) {} | |
287 | ||
288 | ~Partition() { | |
289 | if (csz) | |
290 | ::operator delete(cache); | |
291 | } | |
292 | }; | |
293 | ||
294 | struct Latch { | |
295 | Partition* p; | |
296 | LK* lock; | |
297 | insert_commit_data commit_data; | |
298 | ||
299 | Latch() : p(nullptr), lock(nullptr) {} | |
300 | }; | |
301 | ||
302 | Partition& partition_of_scalar(uint64_t x) { | |
303 | return part[x % n_part]; | |
304 | } | |
305 | ||
306 | Partition& get(uint8_t x) { | |
307 | return part[x]; | |
308 | } | |
309 | ||
310 | Partition*& get() { | |
311 | return part; | |
312 | } | |
313 | ||
314 | void lock() { | |
315 | std::for_each(locks.begin(), locks.end(), | |
316 | [](LK* lk){ lk->lock(); }); | |
317 | } | |
318 | ||
319 | void unlock() { | |
320 | std::for_each(locks.begin(), locks.end(), | |
321 | [](LK* lk){ lk->unlock(); }); | |
322 | } | |
323 | ||
324 | TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) { | |
325 | assert(n_part > 0); | |
326 | part = new Partition[n_part]; | |
327 | for (int ix = 0; ix < n_part; ++ix) { | |
328 | Partition& p = part[ix]; | |
329 | if (csz) { | |
330 | p.csz = csz; | |
331 | p.cache = (T**) ::operator new(csz * sizeof(T*)); | |
332 | memset(p.cache, 0, csz * sizeof(T*)); | |
333 | } | |
334 | locks.push_back(&p.lock); | |
335 | } | |
336 | } | |
337 | ||
338 | ~TreeX() { | |
339 | delete[] part; | |
340 | } | |
341 | ||
342 | T* find(uint64_t hk, const K& k, uint32_t flags) { | |
343 | T* v; | |
344 | Latch lat; | |
345 | uint32_t slot = 0; | |
346 | lat.p = &(partition_of_scalar(hk)); | |
347 | if (flags & FLAG_LOCK) { | |
348 | lat.lock = &lat.p->lock; | |
349 | lat.lock->lock(); | |
350 | } | |
351 | if (csz) { /* template specialize? */ | |
352 | slot = hk % csz; | |
353 | v = lat.p->cache[slot]; | |
354 | if (v) { | |
355 | if (CEQ()(*v, k)) { | |
356 | if (flags & FLAG_LOCK) | |
357 | lat.lock->unlock(); | |
358 | return v; | |
359 | } | |
360 | v = nullptr; | |
361 | } | |
362 | } else { | |
363 | v = nullptr; | |
364 | } | |
365 | iterator it = lat.p->tr.find(k, CLT()); | |
366 | if (it != lat.p->tr.end()){ | |
367 | v = &(*(it)); | |
368 | if (csz) { | |
369 | /* fill cache slot at hk */ | |
370 | lat.p->cache[slot] = v; | |
371 | } | |
372 | } | |
373 | if (flags & FLAG_LOCK) | |
374 | lat.lock->unlock(); | |
375 | return v; | |
376 | } /* find */ | |
377 | ||
378 | T* find_latch(uint64_t hk, const K& k, Latch& lat, | |
379 | uint32_t flags) { | |
380 | uint32_t slot = 0; | |
381 | T* v; | |
382 | lat.p = &(partition_of_scalar(hk)); | |
383 | lat.lock = &lat.p->lock; | |
384 | if (flags & FLAG_LOCK) | |
385 | lat.lock->lock(); | |
386 | if (csz) { /* template specialize? */ | |
387 | slot = hk % csz; | |
388 | v = lat.p->cache[slot]; | |
389 | if (v) { | |
390 | if (CEQ()(*v, k)) { | |
391 | if (flags & (FLAG_LOCK|FLAG_UNLOCK)) | |
392 | lat.lock->unlock(); | |
393 | return v; | |
394 | } | |
395 | v = nullptr; | |
396 | } | |
397 | } else { | |
398 | v = nullptr; | |
399 | } | |
400 | check_result r = lat.p->tr.insert_unique_check( | |
401 | k, CLT(), lat.commit_data); | |
402 | if (! r.second /* !insertable (i.e., !found) */) { | |
403 | v = &(*(r.first)); | |
404 | if (csz) { | |
405 | /* fill cache slot at hk */ | |
406 | lat.p->cache[slot] = v; | |
407 | } | |
408 | } | |
409 | if (flags & (FLAG_LOCK|FLAG_UNLOCK)) | |
410 | lat.lock->unlock(); | |
411 | return v; | |
412 | } /* find_latch */ | |
413 | ||
414 | void insert_latched(T* v, Latch& lat, uint32_t flags) { | |
415 | (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data); | |
416 | if (flags & FLAG_UNLOCK) | |
417 | lat.lock->unlock(); | |
418 | } /* insert_latched */ | |
419 | ||
420 | void insert(uint64_t hk, T* v, uint32_t flags) { | |
421 | Partition& p = partition_of_scalar(hk); | |
422 | if (flags & FLAG_LOCK) | |
423 | p.lock.lock(); | |
424 | p.tr.insert_unique(*v); | |
425 | if (flags & FLAG_LOCK) | |
426 | p.lock.unlock(); | |
427 | } /* insert */ | |
428 | ||
429 | void remove(uint64_t hk, T* v, uint32_t flags) { | |
430 | Partition& p = partition_of_scalar(hk); | |
431 | iterator it = TTree::s_iterator_to(*v); | |
432 | if (flags & FLAG_LOCK) | |
433 | p.lock.lock(); | |
434 | p.tr.erase(it); | |
435 | if (csz) { /* template specialize? */ | |
436 | uint32_t slot = hk % csz; | |
437 | T* v2 = p.cache[slot]; | |
438 | /* we are intrusive, just compare addresses */ | |
439 | if (v == v2) | |
440 | p.cache[slot] = nullptr; | |
441 | } | |
442 | if (flags & FLAG_LOCK) | |
443 | p.lock.unlock(); | |
444 | } /* remove */ | |
445 | ||
446 | void drain(std::function<void(T*)> uref, | |
447 | uint32_t flags = FLAG_NONE) { | |
448 | /* clear a table, call supplied function on | |
449 | * each element found (e.g., retuns sentinel | |
450 | * references) */ | |
451 | for (int t_ix = 0; t_ix < n_part; ++t_ix) { | |
452 | Partition& p = part[t_ix]; | |
453 | if (flags & FLAG_LOCK) /* LOCKED */ | |
454 | p.lock.lock(); | |
455 | while (p.tr.size() > 0) { | |
456 | iterator it = p.tr.begin(); | |
457 | T* v = &(*it); | |
458 | p.tr.erase(it); /* must precede uref(v), in | |
459 | * safe_link mode */ | |
460 | uref(v); | |
461 | } | |
462 | if (flags & FLAG_LOCK) /* we locked it, !LOCKED */ | |
463 | p.lock.unlock(); | |
464 | } /* each partition */ | |
465 | } /* drain */ | |
466 | ||
467 | private: | |
468 | Partition *part; | |
469 | std::vector<LK*> locks; | |
470 | }; | |
471 | ||
472 | } /* namespace LRU */ | |
473 | } /* namespace cohort */ | |
474 | ||
475 | #endif /* COHORT_LRU_H */ |