1 // -*- mode:C++; tab-width:8; c-basic-index:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
9 #include "include/byteorder.h"
10 #include "include/denc.h"
11 #include "include/encoding.h"
13 #include "crimson/common/layout.h"
14 #include "crimson/common/fixed_kv_node_layout.h"
15 #include "crimson/os/seastore/omap_manager.h"
16 #include "crimson/os/seastore/omap_manager/btree/omap_types.h"
18 namespace crimson::os::seastore::omap_manager
{
19 class StringKVInnerNodeLayout
;
20 class StringKVLeafNodeLayout
;
25 * Copy from another node entries to this node.
26 * [from_src, to_src) is another node entry range.
27 * tgt is this node entry to copy to.
28 * tgt and from_src must be from different nodes.
29 * from_src and to_src must be in the same node.
31 template <typename iterator
, typename const_iterator
>
32 static void copy_from_foreign(
34 const_iterator from_src
,
35 const_iterator to_src
) {
36 assert(tgt
->node
!= from_src
->node
);
37 assert(to_src
->node
== from_src
->node
);
38 if (from_src
== to_src
)
41 auto to_copy
= from_src
->get_right_ptr_end() - to_src
->get_right_ptr_end();
44 tgt
->get_right_ptr_end() - to_copy
,
45 to_src
->get_right_ptr_end(),
48 tgt
->get_node_key_ptr(),
49 from_src
->get_node_key_ptr(),
50 to_src
->get_node_key_ptr() - from_src
->get_node_key_ptr());
52 auto offset_diff
= tgt
->get_right_offset_end() - from_src
->get_right_offset_end();
53 for (auto i
= tgt
; i
!= tgt
+ (to_src
- from_src
); ++i
) {
54 i
->update_offset(offset_diff
);
61 * Copies entries from [from_src, to_src) to tgt.
62 * tgt, from_src, and to_src must be from the same node.
64 template <typename iterator
>
65 static void copy_from_local(
70 assert(tgt
->node
== from_src
->node
);
71 assert(to_src
->node
== from_src
->node
);
73 auto to_copy
= from_src
->get_right_ptr_end() - to_src
->get_right_ptr_end();
75 int adjust_offset
= tgt
> from_src
? -len
: len
;
76 memmove(to_src
->get_right_ptr_end() + adjust_offset
,
77 to_src
->get_right_ptr_end(),
80 for ( auto ite
= from_src
; ite
< to_src
; ite
++) {
81 ite
->update_offset(-adjust_offset
);
83 memmove(tgt
->get_node_key_ptr(), from_src
->get_node_key_ptr(),
84 to_src
->get_node_key_ptr() - from_src
->get_node_key_ptr());
87 struct delta_inner_t
{
88 enum class op_t
: uint_fast8_t {
96 DENC(delta_inner_t
, v
, p
) {
104 void replay(StringKVInnerNodeLayout
&l
);
105 bool operator==(const delta_inner_t
&rhs
) const {
106 return op
== rhs
.op
&&
112 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_t
)
114 namespace crimson::os::seastore::omap_manager
{
115 struct delta_leaf_t
{
116 enum class op_t
: uint_fast8_t {
122 ceph::bufferlist val
;
124 DENC(delta_leaf_t
, v
, p
) {
132 void replay(StringKVLeafNodeLayout
&l
);
133 bool operator==(const delta_leaf_t
&rhs
) const {
134 return op
== rhs
.op
&&
140 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_t
)
142 namespace crimson::os::seastore::omap_manager
{
143 class delta_inner_buffer_t
{
144 std::vector
<delta_inner_t
> buffer
;
147 return buffer
.empty();
150 const std::string
&key
,
154 delta_inner_t::op_t::INSERT
,
160 const std::string
&key
,
164 delta_inner_t::op_t::UPDATE
,
169 void remove(const std::string
&key
) {
172 delta_inner_t::op_t::REMOVE
,
178 void replay(StringKVInnerNodeLayout
&node
) {
179 for (auto &i
: buffer
) {
188 DENC(delta_inner_buffer_t
, v
, p
) {
194 bool operator==(const delta_inner_buffer_t
&rhs
) const {
195 return buffer
== rhs
.buffer
;
199 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_buffer_t
)
201 namespace crimson::os::seastore::omap_manager
{
202 class delta_leaf_buffer_t
{
203 std::vector
<delta_leaf_t
> buffer
;
206 return buffer
.empty();
209 const std::string
&key
,
210 const ceph::bufferlist
&val
) {
213 delta_leaf_t::op_t::INSERT
,
219 const std::string
&key
,
220 const ceph::bufferlist
&val
) {
223 delta_leaf_t::op_t::UPDATE
,
228 void remove(const std::string
&key
) {
231 delta_leaf_t::op_t::REMOVE
,
237 void replay(StringKVLeafNodeLayout
&node
) {
238 for (auto &i
: buffer
) {
247 DENC(delta_leaf_buffer_t
, v
, p
) {
253 bool operator==(const delta_leaf_buffer_t
&rhs
) const {
254 return buffer
== rhs
.buffer
;
258 WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_buffer_t
)
260 namespace crimson::os::seastore::omap_manager
{
262 * StringKVInnerNodeLayout
264 * Uses absl::container_internal::Layout for the actual key memory layout.
266 * The primary interface exposed is centered on the iterator
267 * and related methods.
269 * Also included are helpers for doing splits and merges as for a btree.
273 * # <----------------------------- node range --------------------------------------------> #
274 * # #<~># free space #
275 * # <------------- left part -----------------------------> # <~# <----- right keys -----> #
276 * # # <------------ left keys --------------> #~> # #
277 * # # keys [2, n) |<~># #<~>| right keys [2, n) #
278 * # # <--- key 0 ----> | <--- key 1 ----> | # # | <- k1 -> | <-- k0 --> #
280 * # num_ | meta # key | key | val | key | key | val | # # | key | key #
281 * # keys | depth # off | len | laddr| off | len | laddr| # # | buff | buff #
282 * # | # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | key 0 #
283 * # | | | <- off --+----------> #
284 * # | | ^ | <- off --> #
286 * | +----------------------------------+ |
287 * +----------------------------------------------------------------+
289 class StringKVInnerNodeLayout
{
292 using L
= absl::container_internal::Layout
<ceph_le32
, omap_node_meta_le_t
, omap_inner_key_le_t
>;
293 static constexpr L layout
{1, 1, 1}; // = L::Partial(1, 1, 1);
294 friend class delta_inner_t
;
296 template <bool is_const
>
297 class iter_t
: public std::iterator
<std::input_iterator_tag
, StringKVInnerNodeLayout
> {
298 friend class StringKVInnerNodeLayout
;
300 template <typename iterator
, typename const_iterator
>
301 friend void copy_from_foreign(iterator
, const_iterator
, const_iterator
);
302 template <typename iterator
>
303 friend void copy_from_local(unsigned, iterator
, iterator
, iterator
);
305 using parent_t
= typename
crimson::common::maybe_const_t
<StringKVInnerNodeLayout
, is_const
>::type
;
307 mutable parent_t node
;
312 uint16_t index
) : node(parent
), index(index
) {}
315 iter_t(const iter_t
&) = default;
316 iter_t(iter_t
&&) = default;
317 iter_t
&operator=(const iter_t
&) = default;
318 iter_t
&operator=(iter_t
&&) = default;
320 operator iter_t
<!is_const
>() const {
321 static_assert(!is_const
);
322 return iter_t
<!is_const
>(node
, index
);
325 using reference
= iter_t
&;
326 iter_t
&operator*() { return *this; }
327 iter_t
*operator->() { return this; }
329 iter_t
operator++(int) {
335 iter_t
&operator++() {
340 iter_t
operator--(int) {
347 iter_t
&operator--() {
353 uint16_t operator-(const iter_t
&rhs
) const {
354 assert(rhs
.node
== node
);
355 return index
- rhs
.index
;
358 iter_t
operator+(uint16_t off
) const {
359 return iter_t(node
, index
+ off
);
361 iter_t
operator-(uint16_t off
) const {
362 return iter_t(node
, index
- off
);
365 uint16_t operator<(const iter_t
&rhs
) const {
366 assert(rhs
.node
== node
);
367 return index
< rhs
.index
;
370 uint16_t operator>(const iter_t
&rhs
) const {
371 assert(rhs
.node
== node
);
372 return index
> rhs
.index
;
375 bool operator==(const iter_t
&rhs
) const {
376 assert(node
== rhs
.node
);
377 return rhs
.index
== index
;
380 bool operator!=(const iter_t
&rhs
) const {
381 return !(*this == rhs
);
385 omap_inner_key_t
get_node_key() const {
386 omap_inner_key_le_t kint
= node
->get_node_key_ptr()[index
];
387 return omap_inner_key_t(kint
);
389 auto get_node_key_ptr() const {
390 return reinterpret_cast<
391 typename
crimson::common::maybe_const_t
<char, is_const
>::type
>(
392 node
->get_node_key_ptr() + index
);
395 uint32_t get_node_val_offset() const {
396 return get_node_key().key_off
;
398 auto get_node_val_ptr() const {
399 auto tail
= node
->buf
+ OMAP_BLOCK_SIZE
;
400 if (*this == node
->iter_end())
403 return tail
- get_node_val_offset();
407 int get_right_offset_end() const {
411 return (*this - 1)->get_node_val_offset();
413 auto get_right_ptr_end() const {
414 return node
->buf
+ OMAP_BLOCK_SIZE
- get_right_offset_end();
417 void update_offset(int offset
) {
418 static_assert(!is_const
);
419 auto key
= get_node_key();
420 assert(offset
+ key
.key_off
>= 0);
421 key
.key_off
+= offset
;
425 void set_node_key(omap_inner_key_t _lb
) {
426 static_assert(!is_const
);
427 omap_inner_key_le_t lb
;
429 node
->get_node_key_ptr()[index
] = lb
;
432 void set_node_val(const std::string
&str
) {
433 static_assert(!is_const
);
434 assert(str
.size() == get_node_key().key_len
);
435 assert(get_node_key().key_off
>= str
.size());
436 assert(get_node_key().key_off
< OMAP_BLOCK_SIZE
);
437 assert(str
.size() < OMAP_BLOCK_SIZE
);
438 ::memcpy(get_node_val_ptr(), str
.data(), str
.size());
442 uint16_t get_index() const {
446 std::string
get_key() const {
449 get_node_key().key_len
);
452 laddr_t
get_val() const {
453 return get_node_key().laddr
;
456 bool contains(std::string_view key
) const {
457 assert(*this != node
->iter_end());
458 auto next
= *this + 1;
459 if (next
== node
->iter_end()) {
460 return get_key() <= key
;
462 return (get_key() <= key
) && (next
->get_key() > key
);
466 using const_iterator
= iter_t
<true>;
467 using iterator
= iter_t
<false>;
470 void journal_inner_insert(
471 const_iterator _iter
,
473 const std::string
&key
,
474 delta_inner_buffer_t
*recorder
) {
475 auto iter
= iterator(this, _iter
.index
);
481 inner_insert(iter
, key
, laddr
);
484 void journal_inner_update(
485 const_iterator _iter
,
487 delta_inner_buffer_t
*recorder
) {
488 auto iter
= iterator(this, _iter
.index
);
489 auto key
= iter
->get_key();
491 recorder
->update(key
, laddr
);
493 inner_update(iter
, laddr
);
496 void journal_inner_remove(
497 const_iterator _iter
,
498 delta_inner_buffer_t
*recorder
) {
499 auto iter
= iterator(this, _iter
.index
);
501 recorder
->remove(iter
->get_key());
506 StringKVInnerNodeLayout(char *buf
) :
509 uint32_t get_size() const {
510 ceph_le32
&size
= *layout
.template Pointer
<0>(buf
);
511 return uint32_t(size
);
517 * Set size representation to match size
519 void set_size(uint32_t size
) {
522 *layout
.template Pointer
<0>(buf
) = s
;
525 const_iterator
iter_cbegin() const {
526 return const_iterator(
530 const_iterator
iter_begin() const {
531 return iter_cbegin();
534 const_iterator
iter_cend() const {
535 return const_iterator(
539 const_iterator
iter_end() const {
543 iterator
iter_begin() {
549 iterator
iter_end() {
555 const_iterator
iter_idx(uint16_t off
) const {
556 return const_iterator(
561 const_iterator
string_lower_bound(std::string_view str
) const {
562 auto it
= std::lower_bound(boost::make_counting_iterator
<uint16_t>(0),
563 boost::make_counting_iterator
<uint16_t>(get_size()),
565 [this](uint16_t i
, std::string_view str
) {
566 const_iterator
iter(this, i
);
567 return iter
->get_key() < str
;
569 return const_iterator(this, *it
);
572 iterator
string_lower_bound(std::string_view str
) {
573 const auto &tref
= *this;
574 return iterator(this, tref
.string_lower_bound(str
).index
);
577 const_iterator
string_upper_bound(std::string_view str
) const {
578 auto it
= std::upper_bound(boost::make_counting_iterator
<uint16_t>(0),
579 boost::make_counting_iterator
<uint16_t>(get_size()),
581 [this](std::string_view str
, uint16_t i
) {
582 const_iterator
iter(this, i
);
583 return str
< iter
->get_key();
585 return const_iterator(this, *it
);
588 iterator
string_upper_bound(std::string_view str
) {
589 const auto &tref
= *this;
590 return iterator(this, tref
.string_upper_bound(str
).index
);
593 const_iterator
find_string_key(std::string_view str
) const {
594 auto ret
= iter_begin();
595 for (; ret
!= iter_end(); ++ret
) {
596 std::string s
= ret
->get_key();
603 iterator
find_string_key(std::string_view str
) {
604 const auto &tref
= *this;
605 return iterator(this, tref
.find_string_key(str
).index
);
608 const_iterator
get_split_pivot() const {
609 uint32_t total_size
= omap_inner_key_t(
610 get_node_key_ptr()[get_size()-1]).key_off
;
611 uint32_t pivot_size
= total_size
/ 2;
613 for (auto ite
= iter_begin(); ite
< iter_end(); ite
++) {
614 auto node_key
= ite
->get_node_key();
615 size
+= node_key
.key_len
;
616 if (size
>= pivot_size
){
627 * Enables stashing a templated type within the layout.
628 * Cannot be modified after initial write as it is not represented
631 omap_node_meta_t
get_meta() const {
632 omap_node_meta_le_t
&metaint
= *layout
.template Pointer
<1>(buf
);
633 return omap_node_meta_t(metaint
);
635 void set_meta(const omap_node_meta_t
&meta
) {
636 *layout
.template Pointer
<1>(buf
) = omap_node_meta_le_t(meta
);
639 uint32_t used_space() const {
640 uint32_t count
= get_size();
642 omap_inner_key_t last_key
= omap_inner_key_t(get_node_key_ptr()[count
-1]);
643 return last_key
.key_off
+ count
* sizeof(omap_inner_key_le_t
);
649 uint32_t free_space() const {
650 return capacity() - used_space();
653 uint16_t capacity() const {
654 return OMAP_BLOCK_SIZE
- (reinterpret_cast<char*>(layout
.template Pointer
<2>(buf
))-
655 reinterpret_cast<char*>(layout
.template Pointer
<0>(buf
)));
658 bool is_overflow(size_t ksize
) const {
659 return free_space() < (sizeof(omap_inner_key_le_t
) + ksize
);
661 bool below_min() const {
662 return free_space() > (capacity() / 2);
665 bool operator==(const StringKVInnerNodeLayout
&rhs
) const {
666 if (get_size() != rhs
.get_size()) {
670 auto iter
= iter_begin();
671 auto iter2
= rhs
.iter_begin();
672 while (iter
!= iter_end()) {
673 if (iter
->get_key() != iter2
->get_key() ||
674 iter
->get_val() != iter2
->get_val()) {
686 * Takes *this and splits its contents into left and right.
688 std::string
split_into(
689 StringKVInnerNodeLayout
&left
,
690 StringKVInnerNodeLayout
&right
) const {
691 auto piviter
= get_split_pivot();
692 assert(piviter
!= iter_end());
694 copy_from_foreign(left
.iter_begin(), iter_begin(), piviter
);
695 left
.set_size(piviter
- iter_begin());
697 copy_from_foreign(right
.iter_begin(), piviter
, iter_end());
698 right
.set_size(iter_end() - piviter
);
700 auto [lmeta
, rmeta
] = get_meta().split_into();
701 left
.set_meta(lmeta
);
702 right
.set_meta(rmeta
);
704 return piviter
->get_key();
710 * Takes two nodes and copies their contents into *this.
712 * precondition: left.size() + right.size() < CAPACITY
715 const StringKVInnerNodeLayout
&left
,
716 const StringKVInnerNodeLayout
&right
) {
721 set_size(left
.get_size());
727 set_size(left
.get_size() + right
.get_size());
728 set_meta(omap_node_meta_t::merge_from(left
.get_meta(), right
.get_meta()));
732 * balance_into_new_nodes
734 * Takes the contents of left and right and copies them into
735 * replacement_left and replacement_right such that
736 * the size of replacement_left just >= 1/2 of (left + right)
738 static std::string
balance_into_new_nodes(
739 const StringKVInnerNodeLayout
&left
,
740 const StringKVInnerNodeLayout
&right
,
741 StringKVInnerNodeLayout
&replacement_left
,
742 StringKVInnerNodeLayout
&replacement_right
)
744 uint32_t left_size
= omap_inner_key_t(left
.get_node_key_ptr()[left
.get_size()-1]).key_off
;
745 uint32_t right_size
= omap_inner_key_t(right
.get_node_key_ptr()[right
.get_size()-1]).key_off
;
746 uint32_t total
= left_size
+ right_size
;
747 uint32_t pivot_size
= total
/ 2;
748 uint32_t pivot_idx
= 0;
749 if (pivot_size
< left_size
) {
751 for (auto ite
= left
.iter_begin(); ite
< left
.iter_end(); ite
++) {
752 auto node_key
= ite
->get_node_key();
753 size
+= node_key
.key_len
;
754 if (size
>= pivot_size
){
755 pivot_idx
= ite
.get_index();
760 uint32_t more_size
= pivot_size
- left_size
;
762 for (auto ite
= right
.iter_begin(); ite
< right
.iter_end(); ite
++) {
763 auto node_key
= ite
->get_node_key();
764 size
+= node_key
.key_len
;
765 if (size
>= more_size
){
766 pivot_idx
= ite
.get_index() + left
.get_size();
772 auto replacement_pivot
= pivot_idx
>= left
.get_size() ?
773 right
.iter_idx(pivot_idx
- left
.get_size())->get_key() :
774 left
.iter_idx(pivot_idx
)->get_key();
776 if (pivot_size
< left_size
) {
778 replacement_left
.iter_end(),
780 left
.iter_idx(pivot_idx
));
781 replacement_left
.set_size(pivot_idx
);
784 replacement_right
.iter_end(),
785 left
.iter_idx(pivot_idx
),
787 replacement_right
.set_size(left
.get_size() - pivot_idx
);
790 replacement_right
.iter_end(),
793 replacement_right
.set_size(right
.get_size() + left
.get_size()- pivot_idx
);
796 replacement_left
.iter_end(),
799 replacement_left
.set_size(left
.get_size());
802 replacement_left
.iter_end(),
804 right
.iter_idx(pivot_idx
- left
.get_size()));
805 replacement_left
.set_size(pivot_idx
);
808 replacement_right
.iter_end(),
809 right
.iter_idx(pivot_idx
- left
.get_size()),
811 replacement_right
.set_size(right
.get_size() + left
.get_size() - pivot_idx
);
814 auto [lmeta
, rmeta
] = omap_node_meta_t::rebalance(
815 left
.get_meta(), right
.get_meta());
816 replacement_left
.set_meta(lmeta
);
817 replacement_right
.set_meta(rmeta
);
818 return replacement_pivot
;
824 const std::string
&key
,
826 if (iter
!= iter_begin()) {
827 assert((iter
- 1)->get_key() < key
);
829 if (iter
!= iter_end()) {
830 assert(iter
->get_key() > key
);
832 assert(!is_overflow(key
.size()));
834 if (iter
!= iter_end()) {
835 copy_from_local(key
.size(), iter
+ 1, iter
, iter_end());
838 omap_inner_key_t nkey
;
839 nkey
.key_len
= key
.size();
841 if (iter
!= iter_begin()) {
842 auto pkey
= (iter
- 1).get_node_key();
843 nkey
.key_off
= nkey
.key_len
+ pkey
.key_off
;
845 nkey
.key_off
= nkey
.key_len
;
848 iter
->set_node_key(nkey
);
849 set_size(get_size() + 1);
850 iter
->set_node_val(key
);
856 assert(iter
!= iter_end());
857 auto node_key
= iter
->get_node_key();
858 node_key
.laddr
= addr
;
859 iter
->set_node_key(node_key
);
862 void inner_remove(iterator iter
) {
863 assert(iter
!= iter_end());
864 if ((iter
+ 1) != iter_end())
865 copy_from_local(iter
->get_node_key().key_len
, iter
, iter
+ 1, iter_end());
866 set_size(get_size() - 1);
872 * Get pointer to start of key array
874 omap_inner_key_le_t
*get_node_key_ptr() {
875 return L::Partial(1, 1, get_size()).template Pointer
<2>(buf
);
877 const omap_inner_key_le_t
*get_node_key_ptr() const {
878 return L::Partial(1, 1, get_size()).template Pointer
<2>(buf
);
884 * StringKVLeafNodeLayout
888 * # <----------------------------- node range -------------------------------------------------> #
889 * # #<~># free space #
890 * # <------------- left part ---------------------------> # <~# <----- right key-value pairs --> #
891 * # # <------------ left keys ------------> #~> # #
892 * # # keys [2, n) |<~># #<~>| right kvs [2, n) #
893 * # # <--- key 0 ---> | <--- key 1 ---> | # # | <-- kv 1 --> | <-- kv 0 --> #
895 * # num_ | meta # key | key | val | key | key | val | # # | key | val | key | val #
896 * # keys | depth # off | len | len | off | len | len | # # | buff | buff | buff | buff #
897 * # # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | val 1| key 0 | val 0 #
898 * # | | | <--- off ----+-------------> #
899 * # | | ^ | <--- off ---> #
901 * | +-----------------------------------+ |
902 * +-------------------------------------------------------------------+
904 class StringKVLeafNodeLayout
{
907 using L
= absl::container_internal::Layout
<ceph_le32
, omap_node_meta_le_t
, omap_leaf_key_le_t
>;
908 static constexpr L layout
{1, 1, 1}; // = L::Partial(1, 1, 1);
909 friend class delta_leaf_t
;
912 template <bool is_const
>
914 friend class StringKVLeafNodeLayout
;
915 using parent_t
= typename
crimson::common::maybe_const_t
<StringKVLeafNodeLayout
, is_const
>::type
;
917 template <typename iterator
, typename const_iterator
>
918 friend void copy_from_foreign(iterator
, const_iterator
, const_iterator
);
919 template <typename iterator
>
920 friend void copy_from_local(unsigned, iterator
, iterator
, iterator
);
927 uint16_t index
) : node(parent
), index(index
) {}
930 iter_t(const iter_t
&) = default;
931 iter_t(iter_t
&&) = default;
932 iter_t
&operator=(const iter_t
&) = default;
933 iter_t
&operator=(iter_t
&&) = default;
935 operator iter_t
<!is_const
>() const {
936 static_assert(!is_const
);
937 return iter_t
<!is_const
>(node
, index
);
940 iter_t
&operator*() { return *this; }
941 iter_t
*operator->() { return this; }
943 iter_t
operator++(int) {
949 iter_t
&operator++() {
954 uint16_t operator-(const iter_t
&rhs
) const {
955 assert(rhs
.node
== node
);
956 return index
- rhs
.index
;
959 iter_t
operator+(uint16_t off
) const {
964 iter_t
operator-(uint16_t off
) const {
970 uint16_t operator<(const iter_t
&rhs
) const {
971 assert(rhs
.node
== node
);
972 return index
< rhs
.index
;
975 uint16_t operator>(const iter_t
&rhs
) const {
976 assert(rhs
.node
== node
);
977 return index
> rhs
.index
;
980 bool operator==(const iter_t
&rhs
) const {
981 assert(node
== rhs
.node
);
982 return rhs
.index
== index
;
985 bool operator!=(const iter_t
&rhs
) const {
986 assert(node
== rhs
.node
);
987 return index
!= rhs
.index
;
991 omap_leaf_key_t
get_node_key() const {
992 omap_leaf_key_le_t kint
= node
->get_node_key_ptr()[index
];
993 return omap_leaf_key_t(kint
);
995 auto get_node_key_ptr() const {
996 return reinterpret_cast<
997 typename
crimson::common::maybe_const_t
<char, is_const
>::type
>(
998 node
->get_node_key_ptr() + index
);
1001 uint32_t get_node_val_offset() const {
1002 return get_node_key().key_off
;
1004 auto get_node_val_ptr() const {
1005 auto tail
= node
->buf
+ OMAP_BLOCK_SIZE
;
1006 if (*this == node
->iter_end())
1009 return tail
- get_node_val_offset();
1013 int get_right_offset_end() const {
1017 return (*this - 1)->get_node_val_offset();
1019 auto get_right_ptr_end() const {
1020 return node
->buf
+ OMAP_BLOCK_SIZE
- get_right_offset_end();
1023 void update_offset(int offset
) {
1024 auto key
= get_node_key();
1025 assert(offset
+ key
.key_off
>= 0);
1026 key
.key_off
+= offset
;
1030 void set_node_key(omap_leaf_key_t _lb
) const {
1031 static_assert(!is_const
);
1032 omap_leaf_key_le_t lb
;
1034 node
->get_node_key_ptr()[index
] = lb
;
1037 void set_node_val(const std::string
&key
, const ceph::bufferlist
&val
) {
1038 static_assert(!is_const
);
1039 auto node_key
= get_node_key();
1040 assert(key
.size() == node_key
.key_len
);
1041 assert(val
.length() == node_key
.val_len
);
1042 ::memcpy(get_node_val_ptr(), key
.data(), key
.size());
1043 auto bliter
= val
.begin();
1044 bliter
.copy(node_key
.val_len
, get_node_val_ptr() + node_key
.key_len
);
1048 uint16_t get_index() const {
1052 std::string
get_key() const {
1055 get_node_key().key_len
);
1058 std::string
get_str_val() const {
1059 auto node_key
= get_node_key();
1061 get_node_val_ptr() + node_key
.key_len
,
1062 get_node_key().val_len
);
1065 ceph::bufferlist
get_val() const {
1066 auto node_key
= get_node_key();
1067 ceph::bufferlist bl
;
1068 ceph::bufferptr
bptr(
1069 get_node_val_ptr() + node_key
.key_len
,
1070 get_node_key().val_len
);
1075 using const_iterator
= iter_t
<true>;
1076 using iterator
= iter_t
<false>;
1079 void journal_leaf_insert(
1080 const_iterator _iter
,
1081 const std::string
&key
,
1082 const ceph::bufferlist
&val
,
1083 delta_leaf_buffer_t
*recorder
) {
1084 auto iter
= iterator(this, _iter
.index
);
1090 leaf_insert(iter
, key
, val
);
1093 void journal_leaf_update(
1094 const_iterator _iter
,
1095 const std::string
&key
,
1096 const ceph::bufferlist
&val
,
1097 delta_leaf_buffer_t
*recorder
) {
1098 auto iter
= iterator(this, _iter
.index
);
1100 recorder
->remove(iter
->get_key());
1101 recorder
->insert(key
, val
);
1103 leaf_update(iter
, key
, val
);
1106 void journal_leaf_remove(
1107 const_iterator _iter
,
1108 delta_leaf_buffer_t
*recorder
) {
1109 auto iter
= iterator(this, _iter
.index
);
1111 recorder
->remove(iter
->get_key());
1116 StringKVLeafNodeLayout(char *buf
) :
1119 const_iterator
iter_begin() const {
1120 return const_iterator(
1125 const_iterator
iter_end() const {
1126 return const_iterator(
1131 iterator
iter_begin() {
1137 iterator
iter_end() {
1143 const_iterator
iter_idx(uint16_t off
) const {
1144 return const_iterator(
1149 const_iterator
string_lower_bound(std::string_view str
) const {
1150 uint16_t start
= 0, end
= get_size();
1151 while (start
!= end
) {
1152 unsigned mid
= (start
+ end
) / 2;
1153 const_iterator
iter(this, mid
);
1154 std::string s
= iter
->get_key();
1157 } else if (s
> str
) {
1163 return const_iterator(this, start
);
1166 iterator
string_lower_bound(std::string_view str
) {
1167 const auto &tref
= *this;
1168 return iterator(this, tref
.string_lower_bound(str
).index
);
1171 const_iterator
string_upper_bound(std::string_view str
) const {
1172 auto ret
= iter_begin();
1173 for (; ret
!= iter_end(); ++ret
) {
1174 std::string s
= ret
->get_key();
1181 iterator
string_upper_bound(std::string_view str
) {
1182 const auto &tref
= *this;
1183 return iterator(this, tref
.string_upper_bound(str
).index
);
1186 const_iterator
find_string_key(std::string_view str
) const {
1187 auto ret
= iter_begin();
1188 for (; ret
!= iter_end(); ++ret
) {
1189 std::string s
= ret
->get_key();
1195 iterator
find_string_key(std::string_view str
) {
1196 const auto &tref
= *this;
1197 return iterator(this, tref
.find_string_key(str
).index
);
1200 const_iterator
get_split_pivot() const {
1201 uint32_t total_size
= omap_leaf_key_t(get_node_key_ptr()[get_size()-1]).key_off
;
1202 uint32_t pivot_size
= total_size
/ 2;
1204 for (auto ite
= iter_begin(); ite
< iter_end(); ite
++) {
1205 auto node_key
= ite
->get_node_key();
1206 size
+= node_key
.key_len
+ node_key
.val_len
;
1207 if (size
>= pivot_size
){
1214 uint32_t get_size() const {
1215 ceph_le32
&size
= *layout
.template Pointer
<0>(buf
);
1216 return uint32_t(size
);
1222 * Set size representation to match size
1224 void set_size(uint32_t size
) {
1227 *layout
.template Pointer
<0>(buf
) = s
;
1233 * Enables stashing a templated type within the layout.
1234 * Cannot be modified after initial write as it is not represented
1237 omap_node_meta_t
get_meta() const {
1238 omap_node_meta_le_t
&metaint
= *layout
.template Pointer
<1>(buf
);
1239 return omap_node_meta_t(metaint
);
1241 void set_meta(const omap_node_meta_t
&meta
) {
1242 *layout
.template Pointer
<1>(buf
) = omap_node_meta_le_t(meta
);
1245 uint32_t used_space() const {
1246 uint32_t count
= get_size();
1248 omap_leaf_key_t last_key
= omap_leaf_key_t(get_node_key_ptr()[count
-1]);
1249 return last_key
.key_off
+ count
* sizeof(omap_leaf_key_le_t
);
1255 uint32_t free_space() const {
1256 return capacity() - used_space();
1259 uint32_t capacity() const {
1260 return OMAP_BLOCK_SIZE
- (reinterpret_cast<char*>(layout
.template Pointer
<2>(buf
))-
1261 reinterpret_cast<char*>(layout
.template Pointer
<0>(buf
)));
1264 bool is_overflow(size_t ksize
, size_t vsize
) const {
1265 return free_space() < (sizeof(omap_leaf_key_le_t
) + ksize
+ vsize
);
1267 bool below_min() const {
1268 return free_space() > (capacity() / 2);
1271 bool operator==(const StringKVLeafNodeLayout
&rhs
) const {
1272 if (get_size() != rhs
.get_size()) {
1276 auto iter
= iter_begin();
1277 auto iter2
= rhs
.iter_begin();
1278 while (iter
!= iter_end()) {
1279 if(iter
->get_key() != iter2
->get_key() ||
1280 iter
->get_val() != iter2
->get_val()) {
1292 * Takes *this and splits its contents into left and right.
1294 std::string
split_into(
1295 StringKVLeafNodeLayout
&left
,
1296 StringKVLeafNodeLayout
&right
) const {
1297 auto piviter
= get_split_pivot();
1298 assert (piviter
!= iter_end());
1300 copy_from_foreign(left
.iter_begin(), iter_begin(), piviter
);
1301 left
.set_size(piviter
- iter_begin());
1303 copy_from_foreign(right
.iter_begin(), piviter
, iter_end());
1304 right
.set_size(iter_end() - piviter
);
1306 auto [lmeta
, rmeta
] = get_meta().split_into();
1307 left
.set_meta(lmeta
);
1308 right
.set_meta(rmeta
);
1310 return piviter
->get_key();
1316 * Takes two nodes and copies their contents into *this.
1318 * precondition: left.size() + right.size() < CAPACITY
1321 const StringKVLeafNodeLayout
&left
,
1322 const StringKVLeafNodeLayout
&right
)
1328 set_size(left
.get_size());
1333 set_size(left
.get_size() + right
.get_size());
1334 set_meta(omap_node_meta_t::merge_from(left
.get_meta(), right
.get_meta()));
1338 * balance_into_new_nodes
1340 * Takes the contents of left and right and copies them into
1341 * replacement_left and replacement_right such that
1342 * the size of replacement_left side just >= 1/2 of the total size (left + right).
1344 static std::string
balance_into_new_nodes(
1345 const StringKVLeafNodeLayout
&left
,
1346 const StringKVLeafNodeLayout
&right
,
1347 StringKVLeafNodeLayout
&replacement_left
,
1348 StringKVLeafNodeLayout
&replacement_right
)
1350 uint32_t left_size
= omap_leaf_key_t(left
.get_node_key_ptr()[left
.get_size()-1]).key_off
;
1351 uint32_t right_size
= omap_leaf_key_t(right
.get_node_key_ptr()[right
.get_size()-1]).key_off
;
1352 uint32_t total
= left_size
+ right_size
;
1353 uint32_t pivot_size
= total
/ 2;
1354 uint32_t pivot_idx
= 0;
1355 if (pivot_size
< left_size
) {
1357 for (auto ite
= left
.iter_begin(); ite
< left
.iter_end(); ite
++) {
1358 auto node_key
= ite
->get_node_key();
1359 size
+= node_key
.key_len
+ node_key
.val_len
;
1360 if (size
>= pivot_size
){
1361 pivot_idx
= ite
.get_index();
1366 uint32_t more_size
= pivot_size
- left_size
;
1368 for (auto ite
= right
.iter_begin(); ite
< right
.iter_end(); ite
++) {
1369 auto node_key
= ite
->get_node_key();
1370 size
+= node_key
.key_len
+ node_key
.val_len
;
1371 if (size
>= more_size
){
1372 pivot_idx
= ite
.get_index() + left
.get_size();
1378 auto replacement_pivot
= pivot_idx
>= left
.get_size() ?
1379 right
.iter_idx(pivot_idx
- left
.get_size())->get_key() :
1380 left
.iter_idx(pivot_idx
)->get_key();
1382 if (pivot_size
< left_size
) {
1384 replacement_left
.iter_end(),
1386 left
.iter_idx(pivot_idx
));
1387 replacement_left
.set_size(pivot_idx
);
1390 replacement_right
.iter_end(),
1391 left
.iter_idx(pivot_idx
),
1393 replacement_right
.set_size(left
.get_size() - pivot_idx
);
1396 replacement_right
.iter_end(),
1399 replacement_right
.set_size(right
.get_size() + left
.get_size() - pivot_idx
);
1402 replacement_left
.iter_end(),
1405 replacement_left
.set_size(left
.get_size());
1408 replacement_left
.iter_end(),
1410 right
.iter_idx(pivot_idx
- left
.get_size()));
1411 replacement_left
.set_size(pivot_idx
);
1414 replacement_right
.iter_end(),
1415 right
.iter_idx(pivot_idx
- left
.get_size()),
1417 replacement_right
.set_size(right
.get_size() + left
.get_size() - pivot_idx
);
1420 auto [lmeta
, rmeta
] = omap_node_meta_t::rebalance(
1421 left
.get_meta(), right
.get_meta());
1422 replacement_left
.set_meta(lmeta
);
1423 replacement_right
.set_meta(rmeta
);
1424 return replacement_pivot
;
1430 const std::string
&key
,
1431 const bufferlist
&val
) {
1432 if (iter
!= iter_begin()) {
1433 assert((iter
- 1)->get_key() < key
);
1435 if (iter
!= iter_end()) {
1436 assert(iter
->get_key() > key
);
1438 assert(!is_overflow(key
.size(), val
.length()));
1439 omap_leaf_key_t node_key
;
1440 if (iter
== iter_begin()) {
1441 node_key
.key_off
= key
.size() + val
.length();
1442 node_key
.key_len
= key
.size();
1443 node_key
.val_len
= val
.length();
1445 node_key
.key_off
= (iter
- 1)->get_node_key().key_off
+
1446 (key
.size() + val
.length());
1447 node_key
.key_len
= key
.size();
1448 node_key
.val_len
= val
.length();
1450 if (get_size() != 0 && iter
!= iter_end())
1451 copy_from_local(node_key
.key_len
+ node_key
.val_len
, iter
+ 1, iter
, iter_end());
1453 iter
->set_node_key(node_key
);
1454 set_size(get_size() + 1);
1455 iter
->set_node_val(key
, val
);
1460 const std::string
&key
,
1461 const ceph::bufferlist
&val
) {
1462 assert(iter
!= iter_end());
1464 assert(!is_overflow(key
.size(), val
.length()));
1465 leaf_insert(iter
, key
, val
);
1468 void leaf_remove(iterator iter
) {
1469 assert(iter
!= iter_end());
1470 if ((iter
+ 1) != iter_end()) {
1471 omap_leaf_key_t key
= iter
->get_node_key();
1472 copy_from_local(key
.key_len
+ key
.val_len
, iter
, iter
+ 1, iter_end());
1474 set_size(get_size() - 1);
1480 * Get pointer to start of key array
1482 omap_leaf_key_le_t
*get_node_key_ptr() {
1483 return L::Partial(1, 1, get_size()).template Pointer
<2>(buf
);
1485 const omap_leaf_key_le_t
*get_node_key_ptr() const {
1486 return L::Partial(1, 1, get_size()).template Pointer
<2>(buf
);
1491 inline void delta_inner_t::replay(StringKVInnerNodeLayout
&l
) {
1493 case op_t::INSERT
: {
1494 l
.inner_insert(l
.string_lower_bound(key
), key
, addr
);
1497 case op_t::UPDATE
: {
1498 auto iter
= l
.find_string_key(key
);
1499 assert(iter
!= l
.iter_end());
1500 l
.inner_update(iter
, addr
);
1503 case op_t::REMOVE
: {
1504 auto iter
= l
.find_string_key(key
);
1505 assert(iter
!= l
.iter_end());
1506 l
.inner_remove(iter
);
1510 assert(0 == "Impossible");
1514 inline void delta_leaf_t::replay(StringKVLeafNodeLayout
&l
) {
1516 case op_t::INSERT
: {
1517 l
.leaf_insert(l
.string_lower_bound(key
), key
, val
);
1520 case op_t::UPDATE
: {
1521 auto iter
= l
.find_string_key(key
);
1522 assert(iter
!= l
.iter_end());
1523 l
.leaf_update(iter
, key
, val
);
1526 case op_t::REMOVE
: {
1527 auto iter
= l
.find_string_key(key
);
1528 assert(iter
!= l
.iter_end());
1529 l
.leaf_remove(iter
);
1533 assert(0 == "Impossible");