]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Copyright (C) 2015 CohortFS, LLC. | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #ifndef COHORT_LRU_H | |
14 | #define COHORT_LRU_H | |
15 | ||
7c673cae | 16 | #include <boost/intrusive/list.hpp> |
224ce89b | 17 | #include <boost/intrusive/slist.hpp> |
31f18b77 | 18 | |
7c673cae FG |
19 | #include "common/likely.h" |
20 | ||
21 | #ifndef CACHE_LINE_SIZE | |
22 | #define CACHE_LINE_SIZE 64 /* XXX arch-specific define */ | |
23 | #endif | |
24 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] | |
25 | ||
26 | namespace cohort { | |
27 | ||
28 | namespace lru { | |
29 | ||
30 | namespace bi = boost::intrusive; | |
31 | ||
32 | /* public flag values */ | |
33 | constexpr uint32_t FLAG_NONE = 0x0000; | |
34 | constexpr uint32_t FLAG_INITIAL = 0x0001; | |
b32b8144 | 35 | constexpr uint32_t FLAG_RECYCLE = 0x0002; |
7c673cae FG |
36 | |
37 | enum class Edge : std::uint8_t | |
38 | { | |
39 | MRU = 0, | |
40 | LRU | |
41 | }; | |
42 | ||
224ce89b | 43 | typedef bi::link_mode<bi::safe_link> link_mode; |
7c673cae | 44 | |
f91f0fd5 TL |
45 | class ObjectFactory; // Forward declaration |
46 | ||
7c673cae FG |
47 | class Object |
48 | { | |
49 | private: | |
50 | uint32_t lru_flags; | |
51 | std::atomic<uint32_t> lru_refcnt; | |
52 | std::atomic<uint32_t> lru_adj; | |
53 | bi::list_member_hook< link_mode > lru_hook; | |
54 | ||
55 | typedef bi::list<Object, | |
56 | bi::member_hook< | |
57 | Object, bi::list_member_hook< link_mode >, | |
58 | &Object::lru_hook >, | |
59 | bi::constant_time_size<true>> Queue; | |
60 | ||
224ce89b WB |
61 | bi::slist_member_hook< link_mode > q2_hook; |
62 | ||
63 | typedef bi::slist<Object, | |
64 | bi::member_hook< | |
65 | Object, bi::slist_member_hook< link_mode >, | |
66 | &Object::q2_hook >, | |
67 | bi::constant_time_size<true>> Queue2; | |
68 | ||
7c673cae FG |
69 | public: |
70 | ||
71 | Object() : lru_flags(FLAG_NONE), lru_refcnt(0), lru_adj(0) {} | |
72 | ||
73 | uint32_t get_refcnt() const { return lru_refcnt; } | |
74 | ||
f91f0fd5 | 75 | virtual bool reclaim(const ObjectFactory* newobj_fac) = 0; |
7c673cae FG |
76 | |
77 | virtual ~Object() {} | |
78 | ||
79 | private: | |
80 | template <typename LK> | |
81 | friend class LRU; | |
224ce89b WB |
82 | |
83 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
84 | typename K, typename LK> | |
85 | friend class TreeX; | |
7c673cae FG |
86 | }; |
87 | ||
88 | /* allocator & recycler interface (create or re-use LRU objects) */ | |
89 | class ObjectFactory | |
90 | { | |
91 | public: | |
92 | virtual Object* alloc(void) = 0; | |
93 | virtual void recycle(Object*) = 0; | |
94 | virtual ~ObjectFactory() {}; | |
95 | }; | |
96 | ||
97 | template <typename LK> | |
98 | class LRU | |
99 | { | |
100 | private: | |
101 | ||
102 | struct Lane { | |
103 | LK lock; | |
104 | Object::Queue q; | |
105 | // Object::Queue pinned; /* placeholder for possible expansion */ | |
106 | CACHE_PAD(0); | |
107 | Lane() {} | |
108 | }; | |
109 | ||
110 | Lane *qlane; | |
111 | int n_lanes; | |
112 | std::atomic<uint32_t> evict_lane; | |
113 | const uint32_t lane_hiwat; | |
114 | ||
115 | static constexpr uint32_t lru_adj_modulus = 5; | |
116 | ||
117 | static constexpr uint32_t SENTINEL_REFCNT = 1; | |
118 | ||
119 | /* internal flag values */ | |
120 | static constexpr uint32_t FLAG_INLRU = 0x0001; | |
121 | static constexpr uint32_t FLAG_PINNED = 0x0002; // possible future use | |
122 | static constexpr uint32_t FLAG_EVICTING = 0x0004; | |
123 | ||
124 | Lane& lane_of(void* addr) { | |
125 | return qlane[(uint64_t)(addr) % n_lanes]; | |
126 | } | |
127 | ||
128 | uint32_t next_evict_lane() { | |
129 | return (evict_lane++ % n_lanes); | |
130 | } | |
131 | ||
132 | bool can_reclaim(Object* o) { | |
133 | return ((o->lru_refcnt == SENTINEL_REFCNT) && | |
134 | (!(o->lru_flags & FLAG_EVICTING))); | |
135 | } | |
136 | ||
f91f0fd5 | 137 | Object* evict_block(const ObjectFactory* newobj_fac) { |
7c673cae FG |
138 | uint32_t lane_ix = next_evict_lane(); |
139 | for (int ix = 0; ix < n_lanes; ++ix, | |
140 | lane_ix = next_evict_lane()) { | |
141 | Lane& lane = qlane[lane_ix]; | |
20effc67 | 142 | std::unique_lock lane_lock{lane.lock}; |
7c673cae FG |
143 | /* if object at LRU has refcnt==1, it may be reclaimable */ |
144 | Object* o = &(lane.q.back()); | |
7c673cae FG |
145 | if (can_reclaim(o)) { |
146 | ++(o->lru_refcnt); | |
147 | o->lru_flags |= FLAG_EVICTING; | |
20effc67 | 148 | lane_lock.unlock(); |
f91f0fd5 | 149 | if (o->reclaim(newobj_fac)) { |
20effc67 | 150 | lane_lock.lock(); |
7c673cae FG |
151 | --(o->lru_refcnt); |
152 | /* assertions that o state has not changed across | |
153 | * relock */ | |
11fdf7f2 TL |
154 | ceph_assert(o->lru_refcnt == SENTINEL_REFCNT); |
155 | ceph_assert(o->lru_flags & FLAG_INLRU); | |
7c673cae FG |
156 | Object::Queue::iterator it = |
157 | Object::Queue::s_iterator_to(*o); | |
158 | lane.q.erase(it); | |
7c673cae FG |
159 | return o; |
160 | } else { | |
7c673cae FG |
161 | --(o->lru_refcnt); |
162 | o->lru_flags &= ~FLAG_EVICTING; | |
163 | /* unlock in next block */ | |
164 | } | |
165 | } /* can_reclaim(o) */ | |
7c673cae FG |
166 | } /* each lane */ |
167 | return nullptr; | |
168 | } /* evict_block */ | |
169 | ||
170 | public: | |
171 | ||
172 | LRU(int lanes, uint32_t _hiwat) | |
173 | : n_lanes(lanes), evict_lane(0), lane_hiwat(_hiwat) | |
174 | { | |
11fdf7f2 | 175 | ceph_assert(n_lanes > 0); |
7c673cae FG |
176 | qlane = new Lane[n_lanes]; |
177 | } | |
178 | ||
179 | ~LRU() { delete[] qlane; } | |
180 | ||
181 | bool ref(Object* o, uint32_t flags) { | |
182 | ++(o->lru_refcnt); | |
183 | if (flags & FLAG_INITIAL) { | |
184 | if ((++(o->lru_adj) % lru_adj_modulus) == 0) { | |
185 | Lane& lane = lane_of(o); | |
186 | lane.lock.lock(); | |
187 | /* move to MRU */ | |
188 | Object::Queue::iterator it = | |
189 | Object::Queue::s_iterator_to(*o); | |
190 | lane.q.erase(it); | |
191 | lane.q.push_front(*o); | |
192 | lane.lock.unlock(); | |
193 | } /* adj */ | |
194 | } /* initial ref */ | |
195 | return true; | |
196 | } /* ref */ | |
197 | ||
198 | void unref(Object* o, uint32_t flags) { | |
199 | uint32_t refcnt = --(o->lru_refcnt); | |
224ce89b | 200 | Object* tdo = nullptr; |
7c673cae FG |
201 | if (unlikely(refcnt == 0)) { |
202 | Lane& lane = lane_of(o); | |
203 | lane.lock.lock(); | |
204 | refcnt = o->lru_refcnt.load(); | |
205 | if (unlikely(refcnt == 0)) { | |
206 | Object::Queue::iterator it = | |
207 | Object::Queue::s_iterator_to(*o); | |
208 | lane.q.erase(it); | |
224ce89b | 209 | tdo = o; |
7c673cae FG |
210 | } |
211 | lane.lock.unlock(); | |
212 | } else if (unlikely(refcnt == SENTINEL_REFCNT)) { | |
213 | Lane& lane = lane_of(o); | |
214 | lane.lock.lock(); | |
215 | refcnt = o->lru_refcnt.load(); | |
216 | if (likely(refcnt == SENTINEL_REFCNT)) { | |
217 | /* move to LRU */ | |
218 | Object::Queue::iterator it = | |
219 | Object::Queue::s_iterator_to(*o); | |
220 | lane.q.erase(it); | |
221 | /* hiwat check */ | |
222 | if (lane.q.size() > lane_hiwat) { | |
224ce89b | 223 | tdo = o; |
7c673cae FG |
224 | } else { |
225 | lane.q.push_back(*o); | |
226 | } | |
227 | } | |
228 | lane.lock.unlock(); | |
229 | } | |
224ce89b WB |
230 | /* unref out-of-line && !LOCKED */ |
231 | if (tdo) | |
232 | delete tdo; | |
7c673cae FG |
233 | } /* unref */ |
234 | ||
b32b8144 | 235 | Object* insert(ObjectFactory* fac, Edge edge, uint32_t& flags) { |
7c673cae FG |
236 | /* use supplied functor to re-use an evicted object, or |
237 | * allocate a new one of the descendant type */ | |
f91f0fd5 | 238 | Object* o = evict_block(fac); |
b32b8144 | 239 | if (o) { |
7c673cae | 240 | fac->recycle(o); /* recycle existing object */ |
b32b8144 FG |
241 | flags |= FLAG_RECYCLE; |
242 | } | |
7c673cae FG |
243 | else |
244 | o = fac->alloc(); /* get a new one */ | |
245 | ||
246 | o->lru_flags = FLAG_INLRU; | |
247 | ||
248 | Lane& lane = lane_of(o); | |
249 | lane.lock.lock(); | |
250 | switch (edge) { | |
251 | case Edge::MRU: | |
252 | lane.q.push_front(*o); | |
253 | break; | |
254 | case Edge::LRU: | |
255 | lane.q.push_back(*o); | |
256 | break; | |
257 | default: | |
11fdf7f2 | 258 | ceph_abort(); |
7c673cae FG |
259 | break; |
260 | } | |
261 | if (flags & FLAG_INITIAL) | |
262 | o->lru_refcnt += 2; /* sentinel ref + initial */ | |
263 | else | |
264 | ++(o->lru_refcnt); /* sentinel */ | |
265 | lane.lock.unlock(); | |
266 | return o; | |
267 | } /* insert */ | |
268 | ||
269 | }; | |
270 | ||
271 | template <typename T, typename TTree, typename CLT, typename CEQ, | |
272 | typename K, typename LK> | |
273 | class TreeX | |
274 | { | |
275 | public: | |
276 | ||
277 | static constexpr uint32_t FLAG_NONE = 0x0000; | |
278 | static constexpr uint32_t FLAG_LOCK = 0x0001; | |
279 | static constexpr uint32_t FLAG_UNLOCK = 0x0002; | |
280 | static constexpr uint32_t FLAG_UNLOCK_ON_MISS = 0x0004; | |
281 | ||
282 | typedef T value_type; | |
283 | typedef TTree container_type; | |
284 | typedef typename TTree::iterator iterator; | |
285 | typedef std::pair<iterator, bool> check_result; | |
286 | typedef typename TTree::insert_commit_data insert_commit_data; | |
287 | int n_part; | |
288 | int csz; | |
289 | ||
290 | typedef std::unique_lock<LK> unique_lock; | |
291 | ||
292 | struct Partition { | |
293 | LK lock; | |
294 | TTree tr; | |
295 | T** cache; | |
296 | int csz; | |
297 | CACHE_PAD(0); | |
298 | ||
299 | Partition() : tr(), cache(nullptr), csz(0) {} | |
300 | ||
301 | ~Partition() { | |
302 | if (csz) | |
303 | ::operator delete(cache); | |
304 | } | |
305 | }; | |
306 | ||
307 | struct Latch { | |
308 | Partition* p; | |
309 | LK* lock; | |
11fdf7f2 | 310 | insert_commit_data commit_data{}; |
7c673cae FG |
311 | |
312 | Latch() : p(nullptr), lock(nullptr) {} | |
313 | }; | |
314 | ||
315 | Partition& partition_of_scalar(uint64_t x) { | |
316 | return part[x % n_part]; | |
317 | } | |
318 | ||
319 | Partition& get(uint8_t x) { | |
320 | return part[x]; | |
321 | } | |
322 | ||
323 | Partition*& get() { | |
324 | return part; | |
325 | } | |
326 | ||
327 | void lock() { | |
328 | std::for_each(locks.begin(), locks.end(), | |
329 | [](LK* lk){ lk->lock(); }); | |
330 | } | |
331 | ||
332 | void unlock() { | |
333 | std::for_each(locks.begin(), locks.end(), | |
334 | [](LK* lk){ lk->unlock(); }); | |
335 | } | |
336 | ||
337 | TreeX(int n_part=1, int csz=127) : n_part(n_part), csz(csz) { | |
11fdf7f2 | 338 | ceph_assert(n_part > 0); |
7c673cae FG |
339 | part = new Partition[n_part]; |
340 | for (int ix = 0; ix < n_part; ++ix) { | |
341 | Partition& p = part[ix]; | |
342 | if (csz) { | |
343 | p.csz = csz; | |
344 | p.cache = (T**) ::operator new(csz * sizeof(T*)); | |
92f5a8d4 | 345 | // FIPS zeroization audit 20191115: this memset is not security related. |
7c673cae FG |
346 | memset(p.cache, 0, csz * sizeof(T*)); |
347 | } | |
348 | locks.push_back(&p.lock); | |
349 | } | |
350 | } | |
351 | ||
352 | ~TreeX() { | |
353 | delete[] part; | |
354 | } | |
355 | ||
356 | T* find(uint64_t hk, const K& k, uint32_t flags) { | |
357 | T* v; | |
358 | Latch lat; | |
359 | uint32_t slot = 0; | |
360 | lat.p = &(partition_of_scalar(hk)); | |
361 | if (flags & FLAG_LOCK) { | |
362 | lat.lock = &lat.p->lock; | |
363 | lat.lock->lock(); | |
364 | } | |
365 | if (csz) { /* template specialize? */ | |
366 | slot = hk % csz; | |
367 | v = lat.p->cache[slot]; | |
368 | if (v) { | |
369 | if (CEQ()(*v, k)) { | |
370 | if (flags & FLAG_LOCK) | |
371 | lat.lock->unlock(); | |
372 | return v; | |
373 | } | |
374 | v = nullptr; | |
375 | } | |
376 | } else { | |
377 | v = nullptr; | |
378 | } | |
379 | iterator it = lat.p->tr.find(k, CLT()); | |
380 | if (it != lat.p->tr.end()){ | |
381 | v = &(*(it)); | |
382 | if (csz) { | |
383 | /* fill cache slot at hk */ | |
384 | lat.p->cache[slot] = v; | |
385 | } | |
386 | } | |
387 | if (flags & FLAG_LOCK) | |
388 | lat.lock->unlock(); | |
389 | return v; | |
390 | } /* find */ | |
391 | ||
392 | T* find_latch(uint64_t hk, const K& k, Latch& lat, | |
393 | uint32_t flags) { | |
394 | uint32_t slot = 0; | |
395 | T* v; | |
396 | lat.p = &(partition_of_scalar(hk)); | |
397 | lat.lock = &lat.p->lock; | |
398 | if (flags & FLAG_LOCK) | |
399 | lat.lock->lock(); | |
400 | if (csz) { /* template specialize? */ | |
401 | slot = hk % csz; | |
402 | v = lat.p->cache[slot]; | |
403 | if (v) { | |
404 | if (CEQ()(*v, k)) { | |
c07f9fc5 | 405 | if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK)) |
7c673cae FG |
406 | lat.lock->unlock(); |
407 | return v; | |
408 | } | |
409 | v = nullptr; | |
410 | } | |
411 | } else { | |
412 | v = nullptr; | |
413 | } | |
414 | check_result r = lat.p->tr.insert_unique_check( | |
415 | k, CLT(), lat.commit_data); | |
416 | if (! r.second /* !insertable (i.e., !found) */) { | |
417 | v = &(*(r.first)); | |
418 | if (csz) { | |
419 | /* fill cache slot at hk */ | |
420 | lat.p->cache[slot] = v; | |
421 | } | |
422 | } | |
c07f9fc5 | 423 | if ((flags & FLAG_LOCK) && (flags & FLAG_UNLOCK)) |
7c673cae FG |
424 | lat.lock->unlock(); |
425 | return v; | |
426 | } /* find_latch */ | |
f91f0fd5 TL |
427 | bool is_same_partition(uint64_t lhs, uint64_t rhs) { |
428 | return ((lhs % n_part) == (rhs % n_part)); | |
429 | } | |
7c673cae FG |
430 | void insert_latched(T* v, Latch& lat, uint32_t flags) { |
431 | (void) lat.p->tr.insert_unique_commit(*v, lat.commit_data); | |
432 | if (flags & FLAG_UNLOCK) | |
433 | lat.lock->unlock(); | |
434 | } /* insert_latched */ | |
435 | ||
436 | void insert(uint64_t hk, T* v, uint32_t flags) { | |
437 | Partition& p = partition_of_scalar(hk); | |
438 | if (flags & FLAG_LOCK) | |
439 | p.lock.lock(); | |
440 | p.tr.insert_unique(*v); | |
441 | if (flags & FLAG_LOCK) | |
442 | p.lock.unlock(); | |
443 | } /* insert */ | |
444 | ||
445 | void remove(uint64_t hk, T* v, uint32_t flags) { | |
446 | Partition& p = partition_of_scalar(hk); | |
447 | iterator it = TTree::s_iterator_to(*v); | |
448 | if (flags & FLAG_LOCK) | |
449 | p.lock.lock(); | |
450 | p.tr.erase(it); | |
451 | if (csz) { /* template specialize? */ | |
452 | uint32_t slot = hk % csz; | |
453 | T* v2 = p.cache[slot]; | |
454 | /* we are intrusive, just compare addresses */ | |
455 | if (v == v2) | |
456 | p.cache[slot] = nullptr; | |
457 | } | |
458 | if (flags & FLAG_LOCK) | |
459 | p.lock.unlock(); | |
460 | } /* remove */ | |
461 | ||
462 | void drain(std::function<void(T*)> uref, | |
463 | uint32_t flags = FLAG_NONE) { | |
464 | /* clear a table, call supplied function on | |
11fdf7f2 | 465 | * each element found (e.g., returns sentinel |
7c673cae | 466 | * references) */ |
224ce89b | 467 | Object::Queue2 drain_q; |
7c673cae FG |
468 | for (int t_ix = 0; t_ix < n_part; ++t_ix) { |
469 | Partition& p = part[t_ix]; | |
470 | if (flags & FLAG_LOCK) /* LOCKED */ | |
471 | p.lock.lock(); | |
472 | while (p.tr.size() > 0) { | |
473 | iterator it = p.tr.begin(); | |
474 | T* v = &(*it); | |
224ce89b WB |
475 | p.tr.erase(it); |
476 | drain_q.push_front(*v); | |
7c673cae FG |
477 | } |
478 | if (flags & FLAG_LOCK) /* we locked it, !LOCKED */ | |
479 | p.lock.unlock(); | |
480 | } /* each partition */ | |
224ce89b WB |
481 | /* unref out-of-line && !LOCKED */ |
482 | while (drain_q.size() > 0) { | |
483 | Object::Queue2::iterator it = drain_q.begin(); | |
484 | T* v = static_cast<T*>(&(*it)); | |
485 | drain_q.erase(it); /* must precede uref(v) in safe_link mode */ | |
486 | uref(v); | |
487 | } | |
7c673cae FG |
488 | } /* drain */ |
489 | ||
490 | private: | |
491 | Partition *part; | |
492 | std::vector<LK*> locks; | |
493 | }; | |
494 | ||
495 | } /* namespace LRU */ | |
496 | } /* namespace cohort */ | |
497 | ||
498 | #endif /* COHORT_LRU_H */ |