]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / omap_manager / btree / string_kv_node_layout.h
1 // -*- mode:C++; tab-width:8; c-basic-index:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <iostream>
7 #include <string>
8
9 #include "include/byteorder.h"
10 #include "include/denc.h"
11 #include "include/encoding.h"
12
13 #include "crimson/common/layout.h"
14 #include "crimson/common/fixed_kv_node_layout.h"
15 #include "crimson/os/seastore/omap_manager.h"
16 #include "crimson/os/seastore/omap_manager/btree/omap_types.h"
17
18 namespace crimson::os::seastore::omap_manager {
19 class StringKVInnerNodeLayout;
20 class StringKVLeafNodeLayout;
21
22 /**
23 * copy_from_foreign
24 *
25 * Copy from another node entries to this node.
26 * [from_src, to_src) is another node entry range.
27 * tgt is this node entry to copy to.
28 * tgt and from_src must be from different nodes.
29 * from_src and to_src must be in the same node.
30 */
31 template <typename iterator, typename const_iterator>
32 static void copy_from_foreign(
33 iterator tgt,
34 const_iterator from_src,
35 const_iterator to_src) {
36 assert(tgt->node != from_src->node);
37 assert(to_src->node == from_src->node);
38 if (from_src == to_src)
39 return;
40
41 auto to_copy = from_src->get_right_ptr_end() - to_src->get_right_ptr_end();
42 assert(to_copy > 0);
43 memcpy(
44 tgt->get_right_ptr_end() - to_copy,
45 to_src->get_right_ptr_end(),
46 to_copy);
47 memcpy(
48 tgt->get_node_key_ptr(),
49 from_src->get_node_key_ptr(),
50 to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
51
52 auto offset_diff = tgt->get_right_offset_end() - from_src->get_right_offset_end();
53 for (auto i = tgt; i != tgt + (to_src - from_src); ++i) {
54 i->update_offset(offset_diff);
55 }
56 }
57
58 /**
59 * copy_from_local
60 *
61 * Copies entries from [from_src, to_src) to tgt.
62 * tgt, from_src, and to_src must be from the same node.
63 */
64 template <typename iterator>
65 static void copy_from_local(
66 unsigned len,
67 iterator tgt,
68 iterator from_src,
69 iterator to_src) {
70 assert(tgt->node == from_src->node);
71 assert(to_src->node == from_src->node);
72
73 auto to_copy = from_src->get_right_ptr_end() - to_src->get_right_ptr_end();
74 assert(to_copy > 0);
75 int adjust_offset = tgt > from_src? -len : len;
76 memmove(to_src->get_right_ptr_end() + adjust_offset,
77 to_src->get_right_ptr_end(),
78 to_copy);
79
80 for ( auto ite = from_src; ite < to_src; ite++) {
81 ite->update_offset(-adjust_offset);
82 }
83 memmove(tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
84 to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
85 }
86
87 struct delta_inner_t {
88 enum class op_t : uint_fast8_t {
89 INSERT,
90 UPDATE,
91 REMOVE,
92 } op;
93 std::string key;
94 laddr_t addr;
95
96 DENC(delta_inner_t, v, p) {
97 DENC_START(1, 1, p);
98 denc(v.op, p);
99 denc(v.key, p);
100 denc(v.addr, p);
101 DENC_FINISH(p);
102 }
103
104 void replay(StringKVInnerNodeLayout &l);
105 bool operator==(const delta_inner_t &rhs) const {
106 return op == rhs.op &&
107 key == rhs.key &&
108 addr == rhs.addr;
109 }
110 };
111 }
112 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_t)
113
114 namespace crimson::os::seastore::omap_manager {
115 struct delta_leaf_t {
116 enum class op_t : uint_fast8_t {
117 INSERT,
118 UPDATE,
119 REMOVE,
120 } op;
121 std::string key;
122 ceph::bufferlist val;
123
124 DENC(delta_leaf_t, v, p) {
125 DENC_START(1, 1, p);
126 denc(v.op, p);
127 denc(v.key, p);
128 denc(v.val, p);
129 DENC_FINISH(p);
130 }
131
132 void replay(StringKVLeafNodeLayout &l);
133 bool operator==(const delta_leaf_t &rhs) const {
134 return op == rhs.op &&
135 key == rhs.key &&
136 val == rhs.val;
137 }
138 };
139 }
140 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_t)
141
142 namespace crimson::os::seastore::omap_manager {
143 class delta_inner_buffer_t {
144 std::vector<delta_inner_t> buffer;
145 public:
146 bool empty() const {
147 return buffer.empty();
148 }
149 void insert(
150 const std::string &key,
151 laddr_t addr) {
152 buffer.push_back(
153 delta_inner_t{
154 delta_inner_t::op_t::INSERT,
155 key,
156 addr
157 });
158 }
159 void update(
160 const std::string &key,
161 laddr_t addr) {
162 buffer.push_back(
163 delta_inner_t{
164 delta_inner_t::op_t::UPDATE,
165 key,
166 addr
167 });
168 }
169 void remove(const std::string &key) {
170 buffer.push_back(
171 delta_inner_t{
172 delta_inner_t::op_t::REMOVE,
173 key,
174 L_ADDR_NULL
175 });
176 }
177
178 void replay(StringKVInnerNodeLayout &node) {
179 for (auto &i: buffer) {
180 i.replay(node);
181 }
182 }
183
184 void clear() {
185 buffer.clear();
186 }
187
188 DENC(delta_inner_buffer_t, v, p) {
189 DENC_START(1, 1, p);
190 denc(v.buffer, p);
191 DENC_FINISH(p);
192 }
193
194 bool operator==(const delta_inner_buffer_t &rhs) const {
195 return buffer == rhs.buffer;
196 }
197 };
198 }
199 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_buffer_t)
200
201 namespace crimson::os::seastore::omap_manager {
202 class delta_leaf_buffer_t {
203 std::vector<delta_leaf_t> buffer;
204 public:
205 bool empty() const {
206 return buffer.empty();
207 }
208 void insert(
209 const std::string &key,
210 const ceph::bufferlist &val) {
211 buffer.push_back(
212 delta_leaf_t{
213 delta_leaf_t::op_t::INSERT,
214 key,
215 val
216 });
217 }
218 void update(
219 const std::string &key,
220 const ceph::bufferlist &val) {
221 buffer.push_back(
222 delta_leaf_t{
223 delta_leaf_t::op_t::UPDATE,
224 key,
225 val
226 });
227 }
228 void remove(const std::string &key) {
229 buffer.push_back(
230 delta_leaf_t{
231 delta_leaf_t::op_t::REMOVE,
232 key,
233 bufferlist()
234 });
235 }
236
237 void replay(StringKVLeafNodeLayout &node) {
238 for (auto &i: buffer) {
239 i.replay(node);
240 }
241 }
242
243 void clear() {
244 buffer.clear();
245 }
246
247 DENC(delta_leaf_buffer_t, v, p) {
248 DENC_START(1, 1, p);
249 denc(v.buffer, p);
250 DENC_FINISH(p);
251 }
252
253 bool operator==(const delta_leaf_buffer_t &rhs) const {
254 return buffer == rhs.buffer;
255 }
256 };
257 }
258 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_buffer_t)
259
260 namespace crimson::os::seastore::omap_manager {
261 /**
262 * StringKVInnerNodeLayout
263 *
264 * Uses absl::container_internal::Layout for the actual key memory layout.
265 *
266 * The primary interface exposed is centered on the iterator
267 * and related methods.
268 *
269 * Also included are helpers for doing splits and merges as for a btree.
270 *
271 * layout diagram:
272 *
273 * # <----------------------------- node range --------------------------------------------> #
274 * # #<~># free space #
275 * # <------------- left part -----------------------------> # <~# <----- right keys -----> #
276 * # # <------------ left keys --------------> #~> # #
277 * # # keys [2, n) |<~># #<~>| right keys [2, n) #
278 * # # <--- key 0 ----> | <--- key 1 ----> | # # | <- k1 -> | <-- k0 --> #
279 * # # | | # # | | #
280 * # num_ | meta # key | key | val | key | key | val | # # | key | key #
281 * # keys | depth # off | len | laddr| off | len | laddr| # # | buff | buff #
282 * # | # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | key 0 #
283 * # | | | <- off --+----------> #
284 * # | | ^ | <- off --> #
285 * | | | ^
286 * | +----------------------------------+ |
287 * +----------------------------------------------------------------+
288 */
289 class StringKVInnerNodeLayout {
290 char *buf = nullptr;
291
292 using L = absl::container_internal::Layout<ceph_le32, omap_node_meta_le_t, omap_inner_key_le_t>;
293 static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1);
294 friend class delta_inner_t;
295 public:
296 template <bool is_const>
297 class iter_t : public std::iterator<std::input_iterator_tag, StringKVInnerNodeLayout> {
298 friend class StringKVInnerNodeLayout;
299
300 template <typename iterator, typename const_iterator>
301 friend void copy_from_foreign(iterator, const_iterator, const_iterator);
302 template <typename iterator>
303 friend void copy_from_local(unsigned, iterator, iterator, iterator);
304
305 using parent_t = typename crimson::common::maybe_const_t<StringKVInnerNodeLayout, is_const>::type;
306
307 mutable parent_t node;
308 uint16_t index;
309
310 iter_t(
311 parent_t parent,
312 uint16_t index) : node(parent), index(index) {}
313
314 public:
315 iter_t(const iter_t &) = default;
316 iter_t(iter_t &&) = default;
317 iter_t &operator=(const iter_t &) = default;
318 iter_t &operator=(iter_t &&) = default;
319
320 operator iter_t<!is_const>() const {
321 static_assert(!is_const);
322 return iter_t<!is_const>(node, index);
323 }
324
325 using reference = iter_t&;
326 iter_t &operator*() { return *this; }
327 iter_t *operator->() { return this; }
328
329 iter_t operator++(int) {
330 auto ret = *this;
331 ++index;
332 return ret;
333 }
334
335 iter_t &operator++() {
336 ++index;
337 return *this;
338 }
339
340 iter_t operator--(int) {
341 auto ret = *this;
342 assert(index > 0);
343 --index;
344 return ret;
345 }
346
347 iter_t &operator--() {
348 assert(index > 0);
349 --index;
350 return *this;
351 }
352
353 uint16_t operator-(const iter_t &rhs) const {
354 assert(rhs.node == node);
355 return index - rhs.index;
356 }
357
358 iter_t operator+(uint16_t off) const {
359 return iter_t(node, index + off);
360 }
361 iter_t operator-(uint16_t off) const {
362 return iter_t(node, index - off);
363 }
364
365 uint16_t operator<(const iter_t &rhs) const {
366 assert(rhs.node == node);
367 return index < rhs.index;
368 }
369
370 uint16_t operator>(const iter_t &rhs) const {
371 assert(rhs.node == node);
372 return index > rhs.index;
373 }
374
375 bool operator==(const iter_t &rhs) const {
376 assert(node == rhs.node);
377 return rhs.index == index;
378 }
379
380 bool operator!=(const iter_t &rhs) const {
381 return !(*this == rhs);
382 }
383
384 private:
385 omap_inner_key_t get_node_key() const {
386 omap_inner_key_le_t kint = node->get_node_key_ptr()[index];
387 return omap_inner_key_t(kint);
388 }
389 auto get_node_key_ptr() const {
390 return reinterpret_cast<
391 typename crimson::common::maybe_const_t<char, is_const>::type>(
392 node->get_node_key_ptr() + index);
393 }
394
395 uint32_t get_node_val_offset() const {
396 return get_node_key().key_off;
397 }
398 auto get_node_val_ptr() const {
399 auto tail = node->buf + OMAP_BLOCK_SIZE;
400 if (*this == node->iter_end())
401 return tail;
402 else {
403 return tail - get_node_val_offset();
404 }
405 }
406
407 int get_right_offset_end() const {
408 if (index == 0)
409 return 0;
410 else
411 return (*this - 1)->get_node_val_offset();
412 }
413 auto get_right_ptr_end() const {
414 return node->buf + OMAP_BLOCK_SIZE - get_right_offset_end();
415 }
416
417 void update_offset(int offset) {
418 static_assert(!is_const);
419 auto key = get_node_key();
420 assert(offset + key.key_off >= 0);
421 key.key_off += offset;
422 set_node_key(key);
423 }
424
425 void set_node_key(omap_inner_key_t _lb) {
426 static_assert(!is_const);
427 omap_inner_key_le_t lb;
428 lb = _lb;
429 node->get_node_key_ptr()[index] = lb;
430 }
431
432 void set_node_val(const std::string &str) {
433 static_assert(!is_const);
434 assert(str.size() == get_node_key().key_len);
435 assert(get_node_key().key_off >= str.size());
436 assert(get_node_key().key_off < OMAP_BLOCK_SIZE);
437 assert(str.size() < OMAP_BLOCK_SIZE);
438 ::memcpy(get_node_val_ptr(), str.data(), str.size());
439 }
440
441 public:
442 uint16_t get_index() const {
443 return index;
444 }
445
446 std::string get_key() const {
447 return std::string(
448 get_node_val_ptr(),
449 get_node_key().key_len);
450 }
451
452 laddr_t get_val() const {
453 return get_node_key().laddr;
454 }
455
456 bool contains(std::string_view key) const {
457 assert(*this != node->iter_end());
458 auto next = *this + 1;
459 if (next == node->iter_end()) {
460 return get_key() <= key;
461 } else {
462 return (get_key() <= key) && (next->get_key() > key);
463 }
464 }
465 };
466 using const_iterator = iter_t<true>;
467 using iterator = iter_t<false>;
468
469 public:
470 void journal_inner_insert(
471 const_iterator _iter,
472 const laddr_t laddr,
473 const std::string &key,
474 delta_inner_buffer_t *recorder) {
475 auto iter = iterator(this, _iter.index);
476 if (recorder) {
477 recorder->insert(
478 key,
479 laddr);
480 }
481 inner_insert(iter, key, laddr);
482 }
483
484 void journal_inner_update(
485 const_iterator _iter,
486 const laddr_t laddr,
487 delta_inner_buffer_t *recorder) {
488 auto iter = iterator(this, _iter.index);
489 auto key = iter->get_key();
490 if (recorder) {
491 recorder->update(key, laddr);
492 }
493 inner_update(iter, laddr);
494 }
495
496 void journal_inner_remove(
497 const_iterator _iter,
498 delta_inner_buffer_t *recorder) {
499 auto iter = iterator(this, _iter.index);
500 if (recorder) {
501 recorder->remove(iter->get_key());
502 }
503 inner_remove(iter);
504 }
505
506 StringKVInnerNodeLayout(char *buf) :
507 buf(buf) {}
508
509 uint32_t get_size() const {
510 ceph_le32 &size = *layout.template Pointer<0>(buf);
511 return uint32_t(size);
512 }
513
514 /**
515 * set_size
516 *
517 * Set size representation to match size
518 */
519 void set_size(uint32_t size) {
520 ceph_le32 s;
521 s = size;
522 *layout.template Pointer<0>(buf) = s;
523 }
524
525 const_iterator iter_cbegin() const {
526 return const_iterator(
527 this,
528 0);
529 }
530 const_iterator iter_begin() const {
531 return iter_cbegin();
532 }
533
534 const_iterator iter_cend() const {
535 return const_iterator(
536 this,
537 get_size());
538 }
539 const_iterator iter_end() const {
540 return iter_cend();
541 }
542
543 iterator iter_begin() {
544 return iterator(
545 this,
546 0);
547 }
548
549 iterator iter_end() {
550 return iterator(
551 this,
552 get_size());
553 }
554
555 const_iterator iter_idx(uint16_t off) const {
556 return const_iterator(
557 this,
558 off);
559 }
560
561 const_iterator string_lower_bound(std::string_view str) const {
562 auto it = std::lower_bound(boost::make_counting_iterator<uint16_t>(0),
563 boost::make_counting_iterator<uint16_t>(get_size()),
564 str,
565 [this](uint16_t i, std::string_view str) {
566 const_iterator iter(this, i);
567 return iter->get_key() < str;
568 });
569 return const_iterator(this, *it);
570 }
571
572 iterator string_lower_bound(std::string_view str) {
573 const auto &tref = *this;
574 return iterator(this, tref.string_lower_bound(str).index);
575 }
576
577 const_iterator string_upper_bound(std::string_view str) const {
578 auto it = std::upper_bound(boost::make_counting_iterator<uint16_t>(0),
579 boost::make_counting_iterator<uint16_t>(get_size()),
580 str,
581 [this](std::string_view str, uint16_t i) {
582 const_iterator iter(this, i);
583 return str < iter->get_key();
584 });
585 return const_iterator(this, *it);
586 }
587
588 iterator string_upper_bound(std::string_view str) {
589 const auto &tref = *this;
590 return iterator(this, tref.string_upper_bound(str).index);
591 }
592
593 const_iterator find_string_key(std::string_view str) const {
594 auto ret = iter_begin();
595 for (; ret != iter_end(); ++ret) {
596 std::string s = ret->get_key();
597 if (s == str)
598 break;
599 }
600 return ret;
601 }
602
603 iterator find_string_key(std::string_view str) {
604 const auto &tref = *this;
605 return iterator(this, tref.find_string_key(str).index);
606 }
607
608 const_iterator get_split_pivot() const {
609 uint32_t total_size = omap_inner_key_t(
610 get_node_key_ptr()[get_size()-1]).key_off;
611 uint32_t pivot_size = total_size / 2;
612 uint32_t size = 0;
613 for (auto ite = iter_begin(); ite < iter_end(); ite++) {
614 auto node_key = ite->get_node_key();
615 size += node_key.key_len;
616 if (size >= pivot_size){
617 return ite;
618 }
619 }
620 return iter_end();
621 }
622
623
624 /**
625 * get_meta/set_meta
626 *
627 * Enables stashing a templated type within the layout.
628 * Cannot be modified after initial write as it is not represented
629 * in delta_t
630 */
631 omap_node_meta_t get_meta() const {
632 omap_node_meta_le_t &metaint = *layout.template Pointer<1>(buf);
633 return omap_node_meta_t(metaint);
634 }
635 void set_meta(const omap_node_meta_t &meta) {
636 *layout.template Pointer<1>(buf) = omap_node_meta_le_t(meta);
637 }
638
639 uint32_t used_space() const {
640 uint32_t count = get_size();
641 if (count) {
642 omap_inner_key_t last_key = omap_inner_key_t(get_node_key_ptr()[count-1]);
643 return last_key.key_off + count * sizeof(omap_inner_key_le_t);
644 } else {
645 return 0;
646 }
647 }
648
649 uint32_t free_space() const {
650 return capacity() - used_space();
651 }
652
653 uint16_t capacity() const {
654 return OMAP_BLOCK_SIZE - (reinterpret_cast<char*>(layout.template Pointer<2>(buf))-
655 reinterpret_cast<char*>(layout.template Pointer<0>(buf)));
656 }
657
658 bool is_overflow(size_t ksize) const {
659 return free_space() < (sizeof(omap_inner_key_le_t) + ksize);
660 }
661 bool below_min() const {
662 return free_space() > (capacity() / 2);
663 }
664
665 bool operator==(const StringKVInnerNodeLayout &rhs) const {
666 if (get_size() != rhs.get_size()) {
667 return false;
668 }
669
670 auto iter = iter_begin();
671 auto iter2 = rhs.iter_begin();
672 while (iter != iter_end()) {
673 if (iter->get_key() != iter2->get_key() ||
674 iter->get_val() != iter2->get_val()) {
675 return false;
676 }
677 iter++;
678 iter2++;
679 }
680 return true;
681 }
682
683 /**
684 * split_into
685 *
686 * Takes *this and splits its contents into left and right.
687 */
688 std::string split_into(
689 StringKVInnerNodeLayout &left,
690 StringKVInnerNodeLayout &right) const {
691 auto piviter = get_split_pivot();
692 assert(piviter != iter_end());
693
694 copy_from_foreign(left.iter_begin(), iter_begin(), piviter);
695 left.set_size(piviter - iter_begin());
696
697 copy_from_foreign(right.iter_begin(), piviter, iter_end());
698 right.set_size(iter_end() - piviter);
699
700 auto [lmeta, rmeta] = get_meta().split_into();
701 left.set_meta(lmeta);
702 right.set_meta(rmeta);
703
704 return piviter->get_key();
705 }
706
707 /**
708 * merge_from
709 *
710 * Takes two nodes and copies their contents into *this.
711 *
712 * precondition: left.size() + right.size() < CAPACITY
713 */
714 void merge_from(
715 const StringKVInnerNodeLayout &left,
716 const StringKVInnerNodeLayout &right) {
717 copy_from_foreign(
718 iter_end(),
719 left.iter_begin(),
720 left.iter_end());
721 set_size(left.get_size());
722
723 copy_from_foreign(
724 iter_end(),
725 right.iter_begin(),
726 right.iter_end());
727 set_size(left.get_size() + right.get_size());
728 set_meta(omap_node_meta_t::merge_from(left.get_meta(), right.get_meta()));
729 }
730
731 /**
732 * balance_into_new_nodes
733 *
734 * Takes the contents of left and right and copies them into
735 * replacement_left and replacement_right such that
736 * the size of replacement_left just >= 1/2 of (left + right)
737 */
738 static std::string balance_into_new_nodes(
739 const StringKVInnerNodeLayout &left,
740 const StringKVInnerNodeLayout &right,
741 StringKVInnerNodeLayout &replacement_left,
742 StringKVInnerNodeLayout &replacement_right)
743 {
744 uint32_t left_size = omap_inner_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off;
745 uint32_t right_size = omap_inner_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off;
746 uint32_t total = left_size + right_size;
747 uint32_t pivot_size = total / 2;
748 uint32_t pivot_idx = 0;
749 if (pivot_size < left_size) {
750 uint32_t size = 0;
751 for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) {
752 auto node_key = ite->get_node_key();
753 size += node_key.key_len;
754 if (size >= pivot_size){
755 pivot_idx = ite.get_index();
756 break;
757 }
758 }
759 } else {
760 uint32_t more_size = pivot_size - left_size;
761 uint32_t size = 0;
762 for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) {
763 auto node_key = ite->get_node_key();
764 size += node_key.key_len;
765 if (size >= more_size){
766 pivot_idx = ite.get_index() + left.get_size();
767 break;
768 }
769 }
770 }
771
772 auto replacement_pivot = pivot_idx >= left.get_size() ?
773 right.iter_idx(pivot_idx - left.get_size())->get_key() :
774 left.iter_idx(pivot_idx)->get_key();
775
776 if (pivot_size < left_size) {
777 copy_from_foreign(
778 replacement_left.iter_end(),
779 left.iter_begin(),
780 left.iter_idx(pivot_idx));
781 replacement_left.set_size(pivot_idx);
782
783 copy_from_foreign(
784 replacement_right.iter_end(),
785 left.iter_idx(pivot_idx),
786 left.iter_end());
787 replacement_right.set_size(left.get_size() - pivot_idx);
788
789 copy_from_foreign(
790 replacement_right.iter_end(),
791 right.iter_begin(),
792 right.iter_end());
793 replacement_right.set_size(right.get_size() + left.get_size()- pivot_idx);
794 } else {
795 copy_from_foreign(
796 replacement_left.iter_end(),
797 left.iter_begin(),
798 left.iter_end());
799 replacement_left.set_size(left.get_size());
800
801 copy_from_foreign(
802 replacement_left.iter_end(),
803 right.iter_begin(),
804 right.iter_idx(pivot_idx - left.get_size()));
805 replacement_left.set_size(pivot_idx);
806
807 copy_from_foreign(
808 replacement_right.iter_end(),
809 right.iter_idx(pivot_idx - left.get_size()),
810 right.iter_end());
811 replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
812 }
813
814 auto [lmeta, rmeta] = omap_node_meta_t::rebalance(
815 left.get_meta(), right.get_meta());
816 replacement_left.set_meta(lmeta);
817 replacement_right.set_meta(rmeta);
818 return replacement_pivot;
819 }
820
821 private:
822 void inner_insert(
823 iterator iter,
824 const std::string &key,
825 laddr_t val) {
826 if (iter != iter_begin()) {
827 assert((iter - 1)->get_key() < key);
828 }
829 if (iter != iter_end()) {
830 assert(iter->get_key() > key);
831 }
832 assert(!is_overflow(key.size()));
833
834 if (iter != iter_end()) {
835 copy_from_local(key.size(), iter + 1, iter, iter_end());
836 }
837
838 omap_inner_key_t nkey;
839 nkey.key_len = key.size();
840 nkey.laddr = val;
841 if (iter != iter_begin()) {
842 auto pkey = (iter - 1).get_node_key();
843 nkey.key_off = nkey.key_len + pkey.key_off;
844 } else {
845 nkey.key_off = nkey.key_len;
846 }
847
848 iter->set_node_key(nkey);
849 set_size(get_size() + 1);
850 iter->set_node_val(key);
851 }
852
853 void inner_update(
854 iterator iter,
855 laddr_t addr) {
856 assert(iter != iter_end());
857 auto node_key = iter->get_node_key();
858 node_key.laddr = addr;
859 iter->set_node_key(node_key);
860 }
861
862 void inner_remove(iterator iter) {
863 assert(iter != iter_end());
864 if ((iter + 1) != iter_end())
865 copy_from_local(iter->get_node_key().key_len, iter, iter + 1, iter_end());
866 set_size(get_size() - 1);
867 }
868
869 /**
870 * get_key_ptr
871 *
872 * Get pointer to start of key array
873 */
874 omap_inner_key_le_t *get_node_key_ptr() {
875 return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
876 }
877 const omap_inner_key_le_t *get_node_key_ptr() const {
878 return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
879 }
880
881 };
882
883 /**
884 * StringKVLeafNodeLayout
885 *
886 * layout diagram:
887 *
888 * # <----------------------------- node range -------------------------------------------------> #
889 * # #<~># free space #
890 * # <------------- left part ---------------------------> # <~# <----- right key-value pairs --> #
891 * # # <------------ left keys ------------> #~> # #
892 * # # keys [2, n) |<~># #<~>| right kvs [2, n) #
893 * # # <--- key 0 ---> | <--- key 1 ---> | # # | <-- kv 1 --> | <-- kv 0 --> #
894 * # # | | # # | | #
895 * # num_ | meta # key | key | val | key | key | val | # # | key | val | key | val #
896 * # keys | depth # off | len | len | off | len | len | # # | buff | buff | buff | buff #
897 * # # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | val 1| key 0 | val 0 #
898 * # | | | <--- off ----+-------------> #
899 * # | | ^ | <--- off ---> #
900 * | | | ^
901 * | +-----------------------------------+ |
902 * +-------------------------------------------------------------------+
903 */
904 class StringKVLeafNodeLayout {
905 char *buf = nullptr;
906
907 using L = absl::container_internal::Layout<ceph_le32, omap_node_meta_le_t, omap_leaf_key_le_t>;
908 static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1);
909 friend class delta_leaf_t;
910
911 public:
912 template <bool is_const>
913 class iter_t {
914 friend class StringKVLeafNodeLayout;
915 using parent_t = typename crimson::common::maybe_const_t<StringKVLeafNodeLayout, is_const>::type;
916
917 template <typename iterator, typename const_iterator>
918 friend void copy_from_foreign(iterator, const_iterator, const_iterator);
919 template <typename iterator>
920 friend void copy_from_local(unsigned, iterator, iterator, iterator);
921
922 parent_t node;
923 uint16_t index;
924
925 iter_t(
926 parent_t parent,
927 uint16_t index) : node(parent), index(index) {}
928
929 public:
930 iter_t(const iter_t &) = default;
931 iter_t(iter_t &&) = default;
932 iter_t &operator=(const iter_t &) = default;
933 iter_t &operator=(iter_t &&) = default;
934
935 operator iter_t<!is_const>() const {
936 static_assert(!is_const);
937 return iter_t<!is_const>(node, index);
938 }
939
940 iter_t &operator*() { return *this; }
941 iter_t *operator->() { return this; }
942
943 iter_t operator++(int) {
944 auto ret = *this;
945 ++index;
946 return ret;
947 }
948
949 iter_t &operator++() {
950 ++index;
951 return *this;
952 }
953
954 uint16_t operator-(const iter_t &rhs) const {
955 assert(rhs.node == node);
956 return index - rhs.index;
957 }
958
959 iter_t operator+(uint16_t off) const {
960 return iter_t(
961 node,
962 index + off);
963 }
964 iter_t operator-(uint16_t off) const {
965 return iter_t(
966 node,
967 index - off);
968 }
969
970 uint16_t operator<(const iter_t &rhs) const {
971 assert(rhs.node == node);
972 return index < rhs.index;
973 }
974
975 uint16_t operator>(const iter_t &rhs) const {
976 assert(rhs.node == node);
977 return index > rhs.index;
978 }
979
980 bool operator==(const iter_t &rhs) const {
981 assert(node == rhs.node);
982 return rhs.index == index;
983 }
984
985 bool operator!=(const iter_t &rhs) const {
986 assert(node == rhs.node);
987 return index != rhs.index;
988 }
989
990 private:
991 omap_leaf_key_t get_node_key() const {
992 omap_leaf_key_le_t kint = node->get_node_key_ptr()[index];
993 return omap_leaf_key_t(kint);
994 }
995 auto get_node_key_ptr() const {
996 return reinterpret_cast<
997 typename crimson::common::maybe_const_t<char, is_const>::type>(
998 node->get_node_key_ptr() + index);
999 }
1000
1001 uint32_t get_node_val_offset() const {
1002 return get_node_key().key_off;
1003 }
1004 auto get_node_val_ptr() const {
1005 auto tail = node->buf + OMAP_BLOCK_SIZE;
1006 if (*this == node->iter_end())
1007 return tail;
1008 else {
1009 return tail - get_node_val_offset();
1010 }
1011 }
1012
1013 int get_right_offset_end() const {
1014 if (index == 0)
1015 return 0;
1016 else
1017 return (*this - 1)->get_node_val_offset();
1018 }
1019 auto get_right_ptr_end() const {
1020 return node->buf + OMAP_BLOCK_SIZE - get_right_offset_end();
1021 }
1022
1023 void update_offset(int offset) {
1024 auto key = get_node_key();
1025 assert(offset + key.key_off >= 0);
1026 key.key_off += offset;
1027 set_node_key(key);
1028 }
1029
1030 void set_node_key(omap_leaf_key_t _lb) const {
1031 static_assert(!is_const);
1032 omap_leaf_key_le_t lb;
1033 lb = _lb;
1034 node->get_node_key_ptr()[index] = lb;
1035 }
1036
1037 void set_node_val(const std::string &key, const ceph::bufferlist &val) {
1038 static_assert(!is_const);
1039 auto node_key = get_node_key();
1040 assert(key.size() == node_key.key_len);
1041 assert(val.length() == node_key.val_len);
1042 ::memcpy(get_node_val_ptr(), key.data(), key.size());
1043 auto bliter = val.begin();
1044 bliter.copy(node_key.val_len, get_node_val_ptr() + node_key.key_len);
1045 }
1046
1047 public:
1048 uint16_t get_index() const {
1049 return index;
1050 }
1051
1052 std::string get_key() const {
1053 return std::string(
1054 get_node_val_ptr(),
1055 get_node_key().key_len);
1056 }
1057
1058 std::string get_str_val() const {
1059 auto node_key = get_node_key();
1060 return std::string(
1061 get_node_val_ptr() + node_key.key_len,
1062 get_node_key().val_len);
1063 }
1064
1065 ceph::bufferlist get_val() const {
1066 auto node_key = get_node_key();
1067 ceph::bufferlist bl;
1068 ceph::bufferptr bptr(
1069 get_node_val_ptr() + node_key.key_len,
1070 get_node_key().val_len);
1071 bl.append(bptr);
1072 return bl;
1073 }
1074 };
1075 using const_iterator = iter_t<true>;
1076 using iterator = iter_t<false>;
1077
1078 public:
1079 void journal_leaf_insert(
1080 const_iterator _iter,
1081 const std::string &key,
1082 const ceph::bufferlist &val,
1083 delta_leaf_buffer_t *recorder) {
1084 auto iter = iterator(this, _iter.index);
1085 if (recorder) {
1086 recorder->insert(
1087 key,
1088 val);
1089 }
1090 leaf_insert(iter, key, val);
1091 }
1092
1093 void journal_leaf_update(
1094 const_iterator _iter,
1095 const std::string &key,
1096 const ceph::bufferlist &val,
1097 delta_leaf_buffer_t *recorder) {
1098 auto iter = iterator(this, _iter.index);
1099 if (recorder) {
1100 recorder->remove(iter->get_key());
1101 recorder->insert(key, val);
1102 }
1103 leaf_update(iter, key, val);
1104 }
1105
1106 void journal_leaf_remove(
1107 const_iterator _iter,
1108 delta_leaf_buffer_t *recorder) {
1109 auto iter = iterator(this, _iter.index);
1110 if (recorder) {
1111 recorder->remove(iter->get_key());
1112 }
1113 leaf_remove(iter);
1114 }
1115
1116 StringKVLeafNodeLayout(char *buf) :
1117 buf(buf) {}
1118
1119 const_iterator iter_begin() const {
1120 return const_iterator(
1121 this,
1122 0);
1123 }
1124
1125 const_iterator iter_end() const {
1126 return const_iterator(
1127 this,
1128 get_size());
1129 }
1130
1131 iterator iter_begin() {
1132 return iterator(
1133 this,
1134 0);
1135 }
1136
1137 iterator iter_end() {
1138 return iterator(
1139 this,
1140 get_size());
1141 }
1142
1143 const_iterator iter_idx(uint16_t off) const {
1144 return const_iterator(
1145 this,
1146 off);
1147 }
1148
1149 const_iterator string_lower_bound(std::string_view str) const {
1150 uint16_t start = 0, end = get_size();
1151 while (start != end) {
1152 unsigned mid = (start + end) / 2;
1153 const_iterator iter(this, mid);
1154 std::string s = iter->get_key();
1155 if (s < str) {
1156 start = ++mid;
1157 } else if (s > str) {
1158 end = mid;
1159 } else {
1160 return iter;
1161 }
1162 }
1163 return const_iterator(this, start);
1164 }
1165
1166 iterator string_lower_bound(std::string_view str) {
1167 const auto &tref = *this;
1168 return iterator(this, tref.string_lower_bound(str).index);
1169 }
1170
1171 const_iterator string_upper_bound(std::string_view str) const {
1172 auto ret = iter_begin();
1173 for (; ret != iter_end(); ++ret) {
1174 std::string s = ret->get_key();
1175 if (s > str)
1176 break;
1177 }
1178 return ret;
1179 }
1180
1181 iterator string_upper_bound(std::string_view str) {
1182 const auto &tref = *this;
1183 return iterator(this, tref.string_upper_bound(str).index);
1184 }
1185
1186 const_iterator find_string_key(std::string_view str) const {
1187 auto ret = iter_begin();
1188 for (; ret != iter_end(); ++ret) {
1189 std::string s = ret->get_key();
1190 if (s == str)
1191 break;
1192 }
1193 return ret;
1194 }
1195 iterator find_string_key(std::string_view str) {
1196 const auto &tref = *this;
1197 return iterator(this, tref.find_string_key(str).index);
1198 }
1199
1200 const_iterator get_split_pivot() const {
1201 uint32_t total_size = omap_leaf_key_t(get_node_key_ptr()[get_size()-1]).key_off;
1202 uint32_t pivot_size = total_size / 2;
1203 uint32_t size = 0;
1204 for (auto ite = iter_begin(); ite < iter_end(); ite++) {
1205 auto node_key = ite->get_node_key();
1206 size += node_key.key_len + node_key.val_len;
1207 if (size >= pivot_size){
1208 return ite;
1209 }
1210 }
1211 return iter_end();
1212 }
1213
1214 uint32_t get_size() const {
1215 ceph_le32 &size = *layout.template Pointer<0>(buf);
1216 return uint32_t(size);
1217 }
1218
1219 /**
1220 * set_size
1221 *
1222 * Set size representation to match size
1223 */
1224 void set_size(uint32_t size) {
1225 ceph_le32 s;
1226 s = size;
1227 *layout.template Pointer<0>(buf) = s;
1228 }
1229
1230 /**
1231 * get_meta/set_meta
1232 *
1233 * Enables stashing a templated type within the layout.
1234 * Cannot be modified after initial write as it is not represented
1235 * in delta_t
1236 */
1237 omap_node_meta_t get_meta() const {
1238 omap_node_meta_le_t &metaint = *layout.template Pointer<1>(buf);
1239 return omap_node_meta_t(metaint);
1240 }
1241 void set_meta(const omap_node_meta_t &meta) {
1242 *layout.template Pointer<1>(buf) = omap_node_meta_le_t(meta);
1243 }
1244
1245 uint32_t used_space() const {
1246 uint32_t count = get_size();
1247 if (count) {
1248 omap_leaf_key_t last_key = omap_leaf_key_t(get_node_key_ptr()[count-1]);
1249 return last_key.key_off + count * sizeof(omap_leaf_key_le_t);
1250 } else {
1251 return 0;
1252 }
1253 }
1254
1255 uint32_t free_space() const {
1256 return capacity() - used_space();
1257 }
1258
1259 uint32_t capacity() const {
1260 return OMAP_BLOCK_SIZE - (reinterpret_cast<char*>(layout.template Pointer<2>(buf))-
1261 reinterpret_cast<char*>(layout.template Pointer<0>(buf)));
1262 }
1263
1264 bool is_overflow(size_t ksize, size_t vsize) const {
1265 return free_space() < (sizeof(omap_leaf_key_le_t) + ksize + vsize);
1266 }
1267 bool below_min() const {
1268 return free_space() > (capacity() / 2);
1269 }
1270
1271 bool operator==(const StringKVLeafNodeLayout &rhs) const {
1272 if (get_size() != rhs.get_size()) {
1273 return false;
1274 }
1275
1276 auto iter = iter_begin();
1277 auto iter2 = rhs.iter_begin();
1278 while (iter != iter_end()) {
1279 if(iter->get_key() != iter2->get_key() ||
1280 iter->get_val() != iter2->get_val()) {
1281 return false;
1282 }
1283 iter++;
1284 iter2++;
1285 }
1286 return true;
1287 }
1288
1289 /**
1290 * split_into
1291 *
1292 * Takes *this and splits its contents into left and right.
1293 */
1294 std::string split_into(
1295 StringKVLeafNodeLayout &left,
1296 StringKVLeafNodeLayout &right) const {
1297 auto piviter = get_split_pivot();
1298 assert (piviter != iter_end());
1299
1300 copy_from_foreign(left.iter_begin(), iter_begin(), piviter);
1301 left.set_size(piviter - iter_begin());
1302
1303 copy_from_foreign(right.iter_begin(), piviter, iter_end());
1304 right.set_size(iter_end() - piviter);
1305
1306 auto [lmeta, rmeta] = get_meta().split_into();
1307 left.set_meta(lmeta);
1308 right.set_meta(rmeta);
1309
1310 return piviter->get_key();
1311 }
1312
1313 /**
1314 * merge_from
1315 *
1316 * Takes two nodes and copies their contents into *this.
1317 *
1318 * precondition: left.size() + right.size() < CAPACITY
1319 */
1320 void merge_from(
1321 const StringKVLeafNodeLayout &left,
1322 const StringKVLeafNodeLayout &right)
1323 {
1324 copy_from_foreign(
1325 iter_end(),
1326 left.iter_begin(),
1327 left.iter_end());
1328 set_size(left.get_size());
1329 copy_from_foreign(
1330 iter_end(),
1331 right.iter_begin(),
1332 right.iter_end());
1333 set_size(left.get_size() + right.get_size());
1334 set_meta(omap_node_meta_t::merge_from(left.get_meta(), right.get_meta()));
1335 }
1336
1337 /**
1338 * balance_into_new_nodes
1339 *
1340 * Takes the contents of left and right and copies them into
1341 * replacement_left and replacement_right such that
1342 * the size of replacement_left side just >= 1/2 of the total size (left + right).
1343 */
1344 static std::string balance_into_new_nodes(
1345 const StringKVLeafNodeLayout &left,
1346 const StringKVLeafNodeLayout &right,
1347 StringKVLeafNodeLayout &replacement_left,
1348 StringKVLeafNodeLayout &replacement_right)
1349 {
1350 uint32_t left_size = omap_leaf_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off;
1351 uint32_t right_size = omap_leaf_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off;
1352 uint32_t total = left_size + right_size;
1353 uint32_t pivot_size = total / 2;
1354 uint32_t pivot_idx = 0;
1355 if (pivot_size < left_size) {
1356 uint32_t size = 0;
1357 for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) {
1358 auto node_key = ite->get_node_key();
1359 size += node_key.key_len + node_key.val_len;
1360 if (size >= pivot_size){
1361 pivot_idx = ite.get_index();
1362 break;
1363 }
1364 }
1365 } else {
1366 uint32_t more_size = pivot_size - left_size;
1367 uint32_t size = 0;
1368 for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) {
1369 auto node_key = ite->get_node_key();
1370 size += node_key.key_len + node_key.val_len;
1371 if (size >= more_size){
1372 pivot_idx = ite.get_index() + left.get_size();
1373 break;
1374 }
1375 }
1376 }
1377
1378 auto replacement_pivot = pivot_idx >= left.get_size() ?
1379 right.iter_idx(pivot_idx - left.get_size())->get_key() :
1380 left.iter_idx(pivot_idx)->get_key();
1381
1382 if (pivot_size < left_size) {
1383 copy_from_foreign(
1384 replacement_left.iter_end(),
1385 left.iter_begin(),
1386 left.iter_idx(pivot_idx));
1387 replacement_left.set_size(pivot_idx);
1388
1389 copy_from_foreign(
1390 replacement_right.iter_end(),
1391 left.iter_idx(pivot_idx),
1392 left.iter_end());
1393 replacement_right.set_size(left.get_size() - pivot_idx);
1394
1395 copy_from_foreign(
1396 replacement_right.iter_end(),
1397 right.iter_begin(),
1398 right.iter_end());
1399 replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
1400 } else {
1401 copy_from_foreign(
1402 replacement_left.iter_end(),
1403 left.iter_begin(),
1404 left.iter_end());
1405 replacement_left.set_size(left.get_size());
1406
1407 copy_from_foreign(
1408 replacement_left.iter_end(),
1409 right.iter_begin(),
1410 right.iter_idx(pivot_idx - left.get_size()));
1411 replacement_left.set_size(pivot_idx);
1412
1413 copy_from_foreign(
1414 replacement_right.iter_end(),
1415 right.iter_idx(pivot_idx - left.get_size()),
1416 right.iter_end());
1417 replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
1418 }
1419
1420 auto [lmeta, rmeta] = omap_node_meta_t::rebalance(
1421 left.get_meta(), right.get_meta());
1422 replacement_left.set_meta(lmeta);
1423 replacement_right.set_meta(rmeta);
1424 return replacement_pivot;
1425 }
1426
1427 private:
1428 void leaf_insert(
1429 iterator iter,
1430 const std::string &key,
1431 const bufferlist &val) {
1432 if (iter != iter_begin()) {
1433 assert((iter - 1)->get_key() < key);
1434 }
1435 if (iter != iter_end()) {
1436 assert(iter->get_key() > key);
1437 }
1438 assert(!is_overflow(key.size(), val.length()));
1439 omap_leaf_key_t node_key;
1440 if (iter == iter_begin()) {
1441 node_key.key_off = key.size() + val.length();
1442 node_key.key_len = key.size();
1443 node_key.val_len = val.length();
1444 } else {
1445 node_key.key_off = (iter - 1)->get_node_key().key_off +
1446 (key.size() + val.length());
1447 node_key.key_len = key.size();
1448 node_key.val_len = val.length();
1449 }
1450 if (get_size() != 0 && iter != iter_end())
1451 copy_from_local(node_key.key_len + node_key.val_len, iter + 1, iter, iter_end());
1452
1453 iter->set_node_key(node_key);
1454 set_size(get_size() + 1);
1455 iter->set_node_val(key, val);
1456 }
1457
1458 void leaf_update(
1459 iterator iter,
1460 const std::string &key,
1461 const ceph::bufferlist &val) {
1462 assert(iter != iter_end());
1463 leaf_remove(iter);
1464 assert(!is_overflow(key.size(), val.length()));
1465 leaf_insert(iter, key, val);
1466 }
1467
1468 void leaf_remove(iterator iter) {
1469 assert(iter != iter_end());
1470 if ((iter + 1) != iter_end()) {
1471 omap_leaf_key_t key = iter->get_node_key();
1472 copy_from_local(key.key_len + key.val_len, iter, iter + 1, iter_end());
1473 }
1474 set_size(get_size() - 1);
1475 }
1476
1477 /**
1478 * get_key_ptr
1479 *
1480 * Get pointer to start of key array
1481 */
1482 omap_leaf_key_le_t *get_node_key_ptr() {
1483 return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
1484 }
1485 const omap_leaf_key_le_t *get_node_key_ptr() const {
1486 return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
1487 }
1488
1489 };
1490
1491 inline void delta_inner_t::replay(StringKVInnerNodeLayout &l) {
1492 switch (op) {
1493 case op_t::INSERT: {
1494 l.inner_insert(l.string_lower_bound(key), key, addr);
1495 break;
1496 }
1497 case op_t::UPDATE: {
1498 auto iter = l.find_string_key(key);
1499 assert(iter != l.iter_end());
1500 l.inner_update(iter, addr);
1501 break;
1502 }
1503 case op_t::REMOVE: {
1504 auto iter = l.find_string_key(key);
1505 assert(iter != l.iter_end());
1506 l.inner_remove(iter);
1507 break;
1508 }
1509 default:
1510 assert(0 == "Impossible");
1511 }
1512 }
1513
1514 inline void delta_leaf_t::replay(StringKVLeafNodeLayout &l) {
1515 switch (op) {
1516 case op_t::INSERT: {
1517 l.leaf_insert(l.string_lower_bound(key), key, val);
1518 break;
1519 }
1520 case op_t::UPDATE: {
1521 auto iter = l.find_string_key(key);
1522 assert(iter != l.iter_end());
1523 l.leaf_update(iter, key, val);
1524 break;
1525 }
1526 case op_t::REMOVE: {
1527 auto iter = l.find_string_key(key);
1528 assert(iter != l.iter_end());
1529 l.leaf_remove(iter);
1530 break;
1531 }
1532 default:
1533 assert(0 == "Impossible");
1534 }
1535 }
1536
1537 }