]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <iostream> | |
7 | ||
8 | #include "include/byteorder.h" | |
9 | ||
10 | #include "crimson/common/layout.h" | |
11 | ||
12 | namespace crimson::common { | |
13 | ||
14 | template <typename T, bool is_const> | |
15 | struct maybe_const_t { | |
16 | }; | |
17 | template<typename T> | |
18 | struct maybe_const_t<T, true> { | |
19 | using type = const T*; | |
20 | }; | |
21 | template<typename T> | |
22 | struct maybe_const_t<T, false> { | |
23 | using type = T*; | |
24 | }; | |
25 | ||
26 | ||
27 | /** | |
28 | * FixedKVNodeLayout | |
29 | * | |
30 | * Reusable implementation of a fixed size block mapping | |
31 | * K -> V with internal representations KINT and VINT. | |
32 | * | |
33 | * Uses absl::container_internal::Layout for the actual memory layout. | |
34 | * | |
35 | * The primary interface exposed is centered on the iterator | |
36 | * and related methods. | |
37 | * | |
38 | * Also included are helpers for doing splits and merges as for a btree. | |
39 | */ | |
40 | template < | |
41 | size_t CAPACITY, | |
42 | typename Meta, | |
43 | typename MetaInt, | |
44 | typename K, | |
45 | typename KINT, | |
46 | typename V, | |
47 | typename VINT, | |
48 | bool VALIDATE_INVARIANTS=true> | |
49 | class FixedKVNodeLayout { | |
50 | char *buf = nullptr; | |
51 | ||
52 | using L = absl::container_internal::Layout<ceph_le32, MetaInt, KINT, VINT>; | |
53 | static constexpr L layout{1, 1, CAPACITY, CAPACITY}; | |
54 | ||
55 | public: | |
56 | template <bool is_const> | |
57 | struct iter_t { | |
58 | friend class FixedKVNodeLayout; | |
59 | using parent_t = typename maybe_const_t<FixedKVNodeLayout, is_const>::type; | |
60 | ||
61 | parent_t node; | |
62 | uint16_t offset; | |
63 | ||
64 | iter_t( | |
65 | parent_t parent, | |
66 | uint16_t offset) : node(parent), offset(offset) {} | |
67 | ||
68 | iter_t(const iter_t &) = default; | |
69 | iter_t(iter_t &&) = default; | |
70 | iter_t &operator=(const iter_t &) = default; | |
71 | iter_t &operator=(iter_t &&) = default; | |
72 | ||
73 | operator iter_t<!is_const>() const { | |
74 | static_assert(!is_const); | |
75 | return iter_t<!is_const>(node, offset); | |
76 | } | |
77 | ||
78 | // Work nicely with for loops without requiring a nested type. | |
79 | iter_t &operator*() { return *this; } | |
80 | iter_t *operator->() { return this; } | |
81 | ||
82 | iter_t operator++(int) { | |
83 | auto ret = *this; | |
84 | ++offset; | |
85 | return ret; | |
86 | } | |
87 | ||
88 | iter_t &operator++() { | |
89 | ++offset; | |
90 | return *this; | |
91 | } | |
92 | ||
93 | uint16_t operator-(const iter_t &rhs) const { | |
94 | assert(rhs.node == node); | |
95 | return offset - rhs.offset; | |
96 | } | |
97 | ||
98 | iter_t operator+(uint16_t off) const { | |
99 | return iter_t( | |
100 | node, | |
101 | offset + off); | |
102 | } | |
103 | iter_t operator-(uint16_t off) const { | |
104 | return iter_t( | |
105 | node, | |
106 | offset - off); | |
107 | } | |
108 | ||
109 | bool operator==(const iter_t &rhs) const { | |
110 | assert(node == rhs.node); | |
111 | return rhs.offset == offset; | |
112 | } | |
113 | ||
114 | bool operator!=(const iter_t &rhs) const { | |
115 | return !(*this == rhs); | |
116 | } | |
117 | ||
118 | K get_key() const { | |
119 | return K(node->get_key_ptr()[offset]); | |
120 | } | |
121 | ||
122 | K get_next_key_or_max() const { | |
123 | auto next = *this + 1; | |
124 | if (next == node->end()) | |
125 | return std::numeric_limits<K>::max(); | |
126 | else | |
127 | return next->get_key(); | |
128 | } | |
129 | ||
130 | void set_val(V val) const { | |
131 | static_assert(!is_const); | |
132 | node->get_val_ptr()[offset] = VINT(val); | |
133 | } | |
134 | ||
135 | V get_val() const { | |
136 | return V(node->get_val_ptr()[offset]); | |
137 | }; | |
138 | ||
139 | bool contains(K addr) const { | |
140 | return (get_key() <= addr) && (get_next_key_or_max() > addr); | |
141 | } | |
142 | ||
143 | uint16_t get_offset() const { | |
144 | return offset; | |
145 | } | |
146 | ||
147 | private: | |
148 | void set_key(K _lb) const { | |
149 | static_assert(!is_const); | |
150 | KINT lb; | |
151 | lb = _lb; | |
152 | node->get_key_ptr()[offset] = lb; | |
153 | } | |
154 | ||
155 | typename maybe_const_t<char, is_const>::type get_key_ptr() const { | |
156 | return reinterpret_cast< | |
157 | typename maybe_const_t<char, is_const>::type>( | |
158 | node->get_key_ptr() + offset); | |
159 | } | |
160 | ||
161 | typename maybe_const_t<char, is_const>::type get_val_ptr() const { | |
162 | return reinterpret_cast< | |
163 | typename maybe_const_t<char, is_const>::type>( | |
164 | node->get_val_ptr() + offset); | |
165 | } | |
166 | }; | |
167 | using const_iterator = iter_t<true>; | |
168 | using iterator = iter_t<false>; | |
169 | ||
170 | struct delta_t { | |
171 | enum class op_t : uint8_t { | |
172 | INSERT, | |
173 | REMOVE, | |
174 | UPDATE, | |
175 | } op; | |
176 | KINT key; | |
177 | VINT val; | |
178 | ||
179 | void replay(FixedKVNodeLayout &l) { | |
180 | switch (op) { | |
181 | case op_t::INSERT: { | |
182 | l.insert(l.lower_bound(key), key, val); | |
183 | break; | |
184 | } | |
185 | case op_t::REMOVE: { | |
186 | auto iter = l.find(key); | |
187 | assert(iter != l.end()); | |
188 | l.remove(iter); | |
189 | break; | |
190 | } | |
191 | case op_t::UPDATE: { | |
192 | auto iter = l.find(key); | |
193 | assert(iter != l.end()); | |
194 | l.update(iter, val); | |
195 | break; | |
196 | } | |
197 | default: | |
198 | assert(0 == "Impossible"); | |
199 | } | |
200 | } | |
201 | ||
202 | bool operator==(const delta_t &rhs) const { | |
203 | return op == rhs.op && | |
204 | key == rhs.key && | |
205 | val == rhs.val; | |
206 | } | |
207 | }; | |
208 | ||
209 | public: | |
210 | class delta_buffer_t { | |
211 | std::vector<delta_t> buffer; | |
212 | public: | |
213 | bool empty() const { | |
214 | return buffer.empty(); | |
215 | } | |
216 | void insert( | |
217 | const K &key, | |
218 | const V &val) { | |
219 | KINT k; | |
220 | k = key; | |
221 | buffer.push_back( | |
222 | delta_t{ | |
223 | delta_t::op_t::INSERT, | |
224 | k, | |
225 | VINT(val) | |
226 | }); | |
227 | } | |
228 | void update( | |
229 | const K &key, | |
230 | const V &val) { | |
231 | KINT k; | |
232 | k = key; | |
233 | buffer.push_back( | |
234 | delta_t{ | |
235 | delta_t::op_t::UPDATE, | |
236 | k, | |
237 | VINT(val) | |
238 | }); | |
239 | } | |
240 | void remove(const K &key) { | |
241 | KINT k; | |
242 | k = key; | |
243 | buffer.push_back( | |
244 | delta_t{ | |
245 | delta_t::op_t::REMOVE, | |
246 | k, | |
247 | VINT() | |
248 | }); | |
249 | } | |
250 | void replay(FixedKVNodeLayout &node) { | |
251 | for (auto &i: buffer) { | |
252 | i.replay(node); | |
253 | } | |
254 | } | |
255 | size_t get_bytes() const { | |
256 | return buffer.size() * sizeof(delta_t); | |
257 | } | |
258 | void copy_out(char *out, size_t len) { | |
259 | assert(len == get_bytes()); | |
260 | ::memcpy(out, reinterpret_cast<const void *>(buffer.data()), get_bytes()); | |
261 | buffer.clear(); | |
262 | } | |
263 | void copy_in(const char *out, size_t len) { | |
264 | assert(empty()); | |
265 | assert(len % sizeof(delta_t) == 0); | |
266 | buffer = std::vector( | |
267 | reinterpret_cast<const delta_t*>(out), | |
268 | reinterpret_cast<const delta_t*>(out + len)); | |
269 | } | |
270 | bool operator==(const delta_buffer_t &rhs) const { | |
271 | return buffer == rhs.buffer; | |
272 | } | |
273 | }; | |
274 | ||
275 | void journal_insert( | |
276 | const_iterator _iter, | |
277 | const K &key, | |
278 | const V &val, | |
279 | delta_buffer_t *recorder) { | |
280 | auto iter = iterator(this, _iter.offset); | |
281 | if (recorder) { | |
282 | recorder->insert( | |
283 | key, | |
284 | val); | |
285 | } | |
286 | insert(iter, key, val); | |
287 | } | |
288 | ||
289 | void journal_update( | |
290 | const_iterator _iter, | |
291 | const V &val, | |
292 | delta_buffer_t *recorder) { | |
293 | auto iter = iterator(this, _iter.offset); | |
294 | if (recorder) { | |
295 | recorder->update(iter->get_key(), val); | |
296 | } | |
297 | update(iter, val); | |
298 | } | |
299 | ||
300 | void journal_replace( | |
301 | const_iterator _iter, | |
302 | const K &key, | |
303 | const V &val, | |
304 | delta_buffer_t *recorder) { | |
305 | auto iter = iterator(this, _iter.offset); | |
306 | if (recorder) { | |
307 | recorder->remove(iter->get_key()); | |
308 | recorder->insert(key, val); | |
309 | } | |
310 | replace(iter, key, val); | |
311 | } | |
312 | ||
313 | ||
314 | void journal_remove( | |
315 | const_iterator _iter, | |
316 | delta_buffer_t *recorder) { | |
317 | auto iter = iterator(this, _iter.offset); | |
318 | if (recorder) { | |
319 | recorder->remove(iter->get_key()); | |
320 | } | |
321 | remove(iter); | |
322 | } | |
323 | ||
324 | ||
325 | FixedKVNodeLayout(char *buf) : | |
326 | buf(buf) {} | |
327 | ||
328 | virtual ~FixedKVNodeLayout() = default; | |
329 | ||
330 | const_iterator begin() const { | |
331 | return const_iterator( | |
332 | this, | |
333 | 0); | |
334 | } | |
335 | ||
336 | const_iterator end() const { | |
337 | return const_iterator( | |
338 | this, | |
339 | get_size()); | |
340 | } | |
341 | ||
342 | iterator begin() { | |
343 | return iterator( | |
344 | this, | |
345 | 0); | |
346 | } | |
347 | ||
348 | iterator end() { | |
349 | return iterator( | |
350 | this, | |
351 | get_size()); | |
352 | } | |
353 | ||
354 | const_iterator iter_idx(uint16_t off) const { | |
355 | return const_iterator( | |
356 | this, | |
357 | off); | |
358 | } | |
359 | ||
360 | const_iterator find(K l) const { | |
361 | auto ret = begin(); | |
362 | for (; ret != end(); ++ret) { | |
363 | if (ret->get_key() == l) | |
364 | break; | |
365 | } | |
366 | return ret; | |
367 | } | |
368 | iterator find(K l) { | |
369 | const auto &tref = *this; | |
370 | return iterator(this, tref.find(l).offset); | |
371 | } | |
372 | ||
373 | const_iterator lower_bound(K l) const { | |
374 | auto ret = begin(); | |
375 | for (; ret != end(); ++ret) { | |
376 | if (ret->get_key() >= l) | |
377 | break; | |
378 | } | |
379 | return ret; | |
380 | } | |
381 | iterator lower_bound(K l) { | |
382 | const auto &tref = *this; | |
383 | return iterator(this, tref.lower_bound(l).offset); | |
384 | } | |
385 | ||
386 | const_iterator upper_bound(K l) const { | |
387 | auto ret = begin(); | |
388 | for (; ret != end(); ++ret) { | |
389 | if (ret->get_key() > l) | |
390 | break; | |
391 | } | |
392 | return ret; | |
393 | } | |
394 | iterator upper_bound(K l) { | |
395 | const auto &tref = *this; | |
396 | return iterator(this, tref.upper_bound(l).offset); | |
397 | } | |
398 | ||
399 | const_iterator get_split_pivot() const { | |
400 | return iter_idx(get_size() / 2); | |
401 | } | |
402 | ||
403 | uint16_t get_size() const { | |
404 | return *layout.template Pointer<0>(buf); | |
405 | } | |
406 | ||
407 | /** | |
408 | * set_size | |
409 | * | |
410 | * Set size representation to match size | |
411 | */ | |
412 | void set_size(uint16_t size) { | |
413 | *layout.template Pointer<0>(buf) = size; | |
414 | } | |
415 | ||
416 | /** | |
417 | * get_meta/set_meta | |
418 | * | |
419 | * Enables stashing a templated type within the layout. | |
420 | * Cannot be modified after initial write as it is not represented | |
421 | * in delta_t | |
422 | */ | |
423 | Meta get_meta() const { | |
424 | MetaInt &metaint = *layout.template Pointer<1>(buf); | |
425 | return Meta(metaint); | |
426 | } | |
427 | void set_meta(const Meta &meta) { | |
428 | *layout.template Pointer<1>(buf) = MetaInt(meta); | |
429 | } | |
430 | ||
431 | constexpr static size_t get_capacity() { | |
432 | return CAPACITY; | |
433 | } | |
434 | ||
435 | bool operator==(const FixedKVNodeLayout &rhs) const { | |
436 | if (get_size() != rhs.get_size()) { | |
437 | return false; | |
438 | } | |
439 | ||
440 | auto iter = begin(); | |
441 | auto iter2 = rhs.begin(); | |
442 | while (iter != end()) { | |
443 | if (iter->get_key() != iter2->get_key() || | |
444 | iter->get_val() != iter2->get_val()) { | |
445 | return false; | |
446 | } | |
447 | iter++; | |
448 | iter2++; | |
449 | } | |
450 | return true; | |
451 | } | |
452 | ||
453 | /** | |
454 | * split_into | |
455 | * | |
456 | * Takes *this and splits its contents into left and right. | |
457 | */ | |
458 | K split_into( | |
459 | FixedKVNodeLayout &left, | |
460 | FixedKVNodeLayout &right) const { | |
461 | auto piviter = get_split_pivot(); | |
462 | ||
463 | left.copy_from_foreign(left.begin(), begin(), piviter); | |
464 | left.set_size(piviter - begin()); | |
465 | ||
466 | right.copy_from_foreign(right.begin(), piviter, end()); | |
467 | right.set_size(end() - piviter); | |
468 | ||
469 | auto [lmeta, rmeta] = get_meta().split_into(piviter->get_key()); | |
470 | left.set_meta(lmeta); | |
471 | right.set_meta(rmeta); | |
472 | ||
473 | return piviter->get_key(); | |
474 | } | |
475 | ||
476 | /** | |
477 | * merge_from | |
478 | * | |
479 | * Takes two nodes and copies their contents into *this. | |
480 | * | |
481 | * precondition: left.size() + right.size() < CAPACITY | |
482 | */ | |
483 | void merge_from( | |
484 | const FixedKVNodeLayout &left, | |
485 | const FixedKVNodeLayout &right) | |
486 | { | |
487 | copy_from_foreign( | |
488 | end(), | |
489 | left.begin(), | |
490 | left.end()); | |
491 | set_size(left.get_size()); | |
492 | copy_from_foreign( | |
493 | end(), | |
494 | right.begin(), | |
495 | right.end()); | |
496 | set_size(left.get_size() + right.get_size()); | |
497 | set_meta(Meta::merge_from(left.get_meta(), right.get_meta())); | |
498 | } | |
499 | ||
500 | /** | |
501 | * balance_into_new_nodes | |
502 | * | |
503 | * Takes the contents of left and right and copies them into | |
504 | * replacement_left and replacement_right such that in the | |
505 | * event that the number of elements is odd the extra goes to | |
506 | * the left side iff prefer_left. | |
507 | */ | |
508 | static K balance_into_new_nodes( | |
509 | const FixedKVNodeLayout &left, | |
510 | const FixedKVNodeLayout &right, | |
511 | bool prefer_left, | |
512 | FixedKVNodeLayout &replacement_left, | |
513 | FixedKVNodeLayout &replacement_right) | |
514 | { | |
515 | auto total = left.get_size() + right.get_size(); | |
516 | auto pivot_idx = (left.get_size() + right.get_size()) / 2; | |
517 | if (total % 2 && prefer_left) { | |
518 | pivot_idx++; | |
519 | } | |
520 | auto replacement_pivot = pivot_idx >= left.get_size() ? | |
521 | right.iter_idx(pivot_idx - left.get_size())->get_key() : | |
522 | left.iter_idx(pivot_idx)->get_key(); | |
523 | ||
524 | if (pivot_idx < left.get_size()) { | |
525 | replacement_left.copy_from_foreign( | |
526 | replacement_left.end(), | |
527 | left.begin(), | |
528 | left.iter_idx(pivot_idx)); | |
529 | replacement_left.set_size(pivot_idx); | |
530 | ||
531 | replacement_right.copy_from_foreign( | |
532 | replacement_right.end(), | |
533 | left.iter_idx(pivot_idx), | |
534 | left.end()); | |
535 | ||
536 | replacement_right.set_size(left.get_size() - pivot_idx); | |
537 | replacement_right.copy_from_foreign( | |
538 | replacement_right.end(), | |
539 | right.begin(), | |
540 | right.end()); | |
541 | replacement_right.set_size(total - pivot_idx); | |
542 | } else { | |
543 | replacement_left.copy_from_foreign( | |
544 | replacement_left.end(), | |
545 | left.begin(), | |
546 | left.end()); | |
547 | replacement_left.set_size(left.get_size()); | |
548 | ||
549 | replacement_left.copy_from_foreign( | |
550 | replacement_left.end(), | |
551 | right.begin(), | |
552 | right.iter_idx(pivot_idx - left.get_size())); | |
553 | replacement_left.set_size(pivot_idx); | |
554 | ||
555 | replacement_right.copy_from_foreign( | |
556 | replacement_right.end(), | |
557 | right.iter_idx(pivot_idx - left.get_size()), | |
558 | right.end()); | |
559 | replacement_right.set_size(total - pivot_idx); | |
560 | } | |
561 | ||
562 | auto [lmeta, rmeta] = Meta::rebalance( | |
563 | left.get_meta(), right.get_meta(), replacement_pivot); | |
564 | replacement_left.set_meta(lmeta); | |
565 | replacement_right.set_meta(rmeta); | |
566 | return replacement_pivot; | |
567 | } | |
568 | ||
569 | private: | |
570 | void insert( | |
571 | iterator iter, | |
572 | const K &key, | |
573 | const V &val) { | |
574 | if (VALIDATE_INVARIANTS) { | |
575 | if (iter != begin()) { | |
576 | assert((iter - 1)->get_key() < key); | |
577 | } | |
578 | if (iter != end()) { | |
579 | assert(iter->get_key() > key); | |
580 | } | |
581 | assert(get_size() < CAPACITY); | |
582 | } | |
583 | copy_from_local(iter + 1, iter, end()); | |
584 | iter->set_key(key); | |
585 | iter->set_val(val); | |
586 | set_size(get_size() + 1); | |
587 | } | |
588 | ||
589 | void update( | |
590 | iterator iter, | |
591 | V val) { | |
592 | assert(iter != end()); | |
593 | iter->set_val(val); | |
594 | } | |
595 | ||
596 | void replace( | |
597 | iterator iter, | |
598 | const K &key, | |
599 | const V &val) { | |
600 | assert(iter != end()); | |
601 | if (VALIDATE_INVARIANTS) { | |
602 | if (iter != begin()) { | |
603 | assert((iter - 1)->get_key() < key); | |
604 | } | |
605 | if ((iter + 1) != end()) { | |
606 | assert((iter + 1)->get_key() > key); | |
607 | } | |
608 | } | |
609 | iter->set_key(key); | |
610 | iter->set_val(val); | |
611 | } | |
612 | ||
613 | void remove(iterator iter) { | |
614 | assert(iter != end()); | |
615 | copy_from_local(iter, iter + 1, end()); | |
616 | set_size(get_size() - 1); | |
617 | } | |
618 | ||
619 | /** | |
620 | * get_key_ptr | |
621 | * | |
622 | * Get pointer to start of key array | |
623 | */ | |
624 | KINT *get_key_ptr() { | |
625 | return layout.template Pointer<2>(buf); | |
626 | } | |
627 | const KINT *get_key_ptr() const { | |
628 | return layout.template Pointer<2>(buf); | |
629 | } | |
630 | ||
631 | /** | |
632 | * get_val_ptr | |
633 | * | |
634 | * Get pointer to start of val array | |
635 | */ | |
636 | VINT *get_val_ptr() { | |
637 | return layout.template Pointer<3>(buf); | |
638 | } | |
639 | const VINT *get_val_ptr() const { | |
640 | return layout.template Pointer<3>(buf); | |
641 | } | |
642 | ||
643 | /** | |
644 | * node_resolve/unresolve_vals | |
645 | * | |
646 | * If the representation for values depends in some way on the | |
647 | * node in which they are located, users may implement | |
648 | * resolve/unresolve to enable copy_from_foreign to handle that | |
649 | * transition. | |
650 | */ | |
651 | virtual void node_resolve_vals(iterator from, iterator to) const {} | |
652 | virtual void node_unresolve_vals(iterator from, iterator to) const {} | |
653 | ||
654 | /** | |
655 | * copy_from_foreign | |
656 | * | |
657 | * Copies entries from [from_src, to_src) to tgt. | |
658 | * | |
659 | * tgt and from_src must be from different nodes. | |
660 | * from_src and to_src must be from the same node. | |
661 | */ | |
662 | static void copy_from_foreign( | |
663 | iterator tgt, | |
664 | const_iterator from_src, | |
665 | const_iterator to_src) { | |
666 | assert(tgt->node != from_src->node); | |
667 | assert(to_src->node == from_src->node); | |
668 | memcpy( | |
669 | tgt->get_val_ptr(), from_src->get_val_ptr(), | |
670 | to_src->get_val_ptr() - from_src->get_val_ptr()); | |
671 | memcpy( | |
672 | tgt->get_key_ptr(), from_src->get_key_ptr(), | |
673 | to_src->get_key_ptr() - from_src->get_key_ptr()); | |
674 | from_src->node->node_resolve_vals(tgt, tgt + (to_src - from_src)); | |
675 | tgt->node->node_unresolve_vals(tgt, tgt + (to_src - from_src)); | |
676 | } | |
677 | ||
678 | /** | |
679 | * copy_from_local | |
680 | * | |
681 | * Copies entries from [from_src, to_src) to tgt. | |
682 | * | |
683 | * tgt, from_src, and to_src must be from the same node. | |
684 | */ | |
685 | static void copy_from_local( | |
686 | iterator tgt, | |
687 | iterator from_src, | |
688 | iterator to_src) { | |
689 | assert(tgt->node == from_src->node); | |
690 | assert(to_src->node == from_src->node); | |
691 | memmove( | |
692 | tgt->get_val_ptr(), from_src->get_val_ptr(), | |
693 | to_src->get_val_ptr() - from_src->get_val_ptr()); | |
694 | memmove( | |
695 | tgt->get_key_ptr(), from_src->get_key_ptr(), | |
696 | to_src->get_key_ptr() - from_src->get_key_ptr()); | |
697 | } | |
698 | }; | |
699 | ||
700 | } |