]>
git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/onode_manager/simple-fltree/onode_node.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2 // vim: ts=8 sw=2 smarttab
9 #include "common/hobject.h"
10 #include "crimson/common/layout.h"
11 #include "crimson/os/seastore/onode.h"
12 #include "crimson/os/seastore/seastore_types.h"
13 #include "onode_delta.h"
15 namespace asci
= absl::container_internal
;
17 namespace boost::beast
{
19 bool operator==(const span
<T
>& lhs
, const span
<T
>& rhs
) {
21 lhs
.begin(), lhs
.end(),
22 rhs
.begin(), rhs
.end());
27 // it only keeps the bits necessary to rebuild an in-memory onode
28 struct [[gnu::packed
]] onode_t
{
29 onode_t
& operator=(const onode_t
& onode
) {
31 std::memcpy(data
, onode
.data
, len
);
35 return sizeof(*this) + len
;
37 OnodeRef
decode() const {
38 return new crimson::os::seastore::Onode(std::string_view
{data
, len
});
41 uint8_t struct_compat
= 1;
43 // - use uint16_t for length, as the size of an onode should be less
44 // than a block (16K for now)
46 uint32_t struct_len
= 0;
51 static inline std::ostream
& operator<<(std::ostream
& os
, const onode_t
& onode
) {
52 return os
<< *onode
.decode();
55 using crimson::os::seastore::laddr_t
;
57 struct [[gnu::packed
]] child_addr_t
{
59 child_addr_t(laddr_t data
)
62 child_addr_t
& operator=(laddr_t addr
) {
69 operator laddr_t() const {
73 return sizeof(laddr_t
);
77 // poor man's operator<=>
78 enum class ordering_t
{
84 template<class L
, class R
>
85 ordering_t
compare_element(const L
& x
, const R
& y
)
87 if constexpr (std::is_arithmetic_v
<L
>) {
88 static_assert(std::is_arithmetic_v
<R
>);
90 return ordering_t::less
;
92 return ordering_t::greater
;
94 return ordering_t::equivalent
;
97 // string_view::compare(), string::compare(), ...
98 auto result
= x
.compare(y
);
100 return ordering_t::less
;
101 } else if (result
> 0) {
102 return ordering_t::greater
;
104 return ordering_t::equivalent
;
109 template<typename L
, typename R
>
110 constexpr ordering_t
tuple_cmp(const L
&, const R
&, std::index_sequence
<>)
112 return ordering_t::equivalent
;
115 template<typename L
, typename R
,
116 size_t Head
, size_t... Tail
>
117 constexpr ordering_t
tuple_cmp(const L
& x
, const R
& y
,
118 std::index_sequence
<Head
, Tail
...>)
120 auto ordering
= compare_element(std::get
<Head
>(x
), std::get
<Head
>(y
));
121 if (ordering
!= ordering_t::equivalent
) {
124 return tuple_cmp(x
, y
, std::index_sequence
<Tail
...>());
128 template<typename
... Ls
, typename
... Rs
>
129 constexpr ordering_t
cmp(const std::tuple
<Ls
...>& x
,
130 const std::tuple
<Rs
...>& y
)
132 static_assert(sizeof...(Ls
) == sizeof...(Rs
));
133 return tuple_cmp(x
, y
, std::index_sequence_for
<Ls
...>());
142 struct [[gnu::packed
]] variable_key_suffix
{
154 using layout_type
= asci::Layout
<char, char>;
155 layout_type
cell_layout() const {
156 return layout_type
{nspace_len
, name_len
};
158 void set(const ghobject_t
& oid
) {
159 snap
= oid
.hobj
.snap
;
160 gen
= oid
.generation
;
161 nspace_len
= oid
.hobj
.nspace
.size();
162 name_len
= oid
.hobj
.oid
.name
.size();
163 auto layout
= cell_layout();
164 std::memcpy(layout
.Pointer
<index_t::nspace_data
>(data
),
165 oid
.hobj
.nspace
.data(), oid
.hobj
.nspace
.size());
166 std::memcpy(layout
.Pointer
<index_t::name_data
>(data
),
167 oid
.hobj
.oid
.name
.data(), oid
.hobj
.oid
.name
.size());
170 void update_oid(ghobject_t
& oid
) const {
171 oid
.hobj
.snap
= snap
;
172 oid
.generation
= gen
;
173 oid
.hobj
.nspace
= nspace();
174 oid
.hobj
.oid
.name
= name();
177 variable_key_suffix
& operator=(const variable_key_suffix
& key
) {
180 auto layout
= cell_layout();
181 auto nspace
= key
.nspace();
182 std::copy_n(nspace
.data(), nspace
.size(),
183 layout
.Pointer
<index_t::nspace_data
>(data
));
184 auto name
= key
.name();
185 std::copy_n(name
.data(), name
.size(),
186 layout
.Pointer
<index_t::name_data
>(data
));
189 const std::string_view
nspace() const {
190 auto layout
= cell_layout();
191 auto nspace
= layout
.Slice
<index_t::nspace_data
>(data
);
192 return {nspace
.data(), nspace
.size()};
194 const std::string_view
name() const {
195 auto layout
= cell_layout();
196 auto name
= layout
.Slice
<index_t::name_data
>(data
);
197 return {name
.data(), name
.size()};
199 size_t size() const {
200 return sizeof(*this) + nspace_len
+ name_len
;
202 static size_t size_from(const ghobject_t
& oid
) {
203 return (sizeof(variable_key_suffix
) +
204 oid
.hobj
.nspace
.size() +
205 oid
.hobj
.oid
.name
.size());
207 ordering_t
compare(const ghobject_t
& oid
) const {
208 return cmp(std::tie(nspace(), name(), snap
, gen
),
209 std::tie(oid
.hobj
.nspace
, oid
.hobj
.oid
.name
, oid
.hobj
.snap
.val
,
212 bool likes(const variable_key_suffix
& key
) const {
213 return nspace() == key
.nspace() && name() == key
.name();
217 static inline std::ostream
& operator<<(std::ostream
& os
, const variable_key_suffix
& k
) {
218 if (k
.snap
!= CEPH_NOSNAP
) {
219 os
<< "s" << k
.snap
<< ",";
221 if (k
.gen
!= ghobject_t::NO_GEN
) {
222 os
<< "g" << k
.gen
<< ",";
224 return os
<< k
.nspace() << "/" << k
.name();
227 // should use [[no_unique_address]] in C++20
228 struct empty_key_suffix
{
229 static constexpr ordering_t
compare(const ghobject_t
&) {
230 return ordering_t::equivalent
;
232 static void set(const ghobject_t
&) {}
233 static constexpr size_t size() {
236 static size_t size_from(const ghobject_t
&) {
239 static void update_oid(ghobject_t
&) {}
242 static inline std::ostream
& operator<<(std::ostream
& os
, const empty_key_suffix
&)
247 enum class ntype_t
: uint8_t {
252 constexpr ntype_t
flip_ntype(ntype_t ntype
) noexcept
254 if (ntype
== ntype_t::leaf
) {
255 return ntype_t::inner
;
257 return ntype_t::leaf
;
261 template<int N
, ntype_t NodeType
>
262 struct FixedKeyPrefix
{};
264 template<ntype_t NodeType
>
265 struct FixedKeyPrefix
<0, NodeType
>
267 static constexpr bool item_in_key
= false;
273 FixedKeyPrefix() = default;
274 FixedKeyPrefix(const ghobject_t
& oid
, uint16_t offset
)
275 : shard
{oid
.shard_id
},
277 hash
{oid
.hobj
.get_hash()},
281 void set(const ghobject_t
& oid
, uint16_t new_offset
) {
282 shard
= oid
.shard_id
;
283 pool
= oid
.hobj
.pool
;
284 hash
= oid
.hobj
.get_hash();
288 void set(const FixedKeyPrefix
& k
, uint16_t new_offset
) {
295 void update(const ghobject_t
& oid
) {
296 shard
= oid
.shard_id
;
297 pool
= oid
.hobj
.pool
;
298 hash
= oid
.hobj
.get_hash();
301 void update_oid(ghobject_t
& oid
) const {
302 oid
.set_shard(shard_id_t
{shard
});
303 oid
.hobj
.pool
= pool
;
304 oid
.hobj
.set_hash(hash
);
307 ordering_t
compare(const ghobject_t
& oid
) const {
308 // so std::tie() can bind them by reference
309 int8_t rhs_shard
= oid
.shard_id
;
310 uint32_t rhs_hash
= oid
.hobj
.get_hash();
311 return cmp(std::tie(shard
, pool
, hash
),
312 std::tie(rhs_shard
, oid
.hobj
.pool
, rhs_hash
));
314 // @return true if i likes @c k, we will can be pushed down to next level
316 likes_t
likes(const FixedKeyPrefix
& k
) const {
317 if (shard
== k
.shard
&& pool
== k
.pool
) {
325 template<ntype_t NodeType
>
326 std::ostream
& operator<<(std::ostream
& os
, const FixedKeyPrefix
<0, NodeType
>& k
) {
327 if (k
.shard
!= shard_id_t::NO_SHARD
) {
328 os
<< "s" << k
.shard
;
330 return os
<< "p=" << k
.pool
<< ","
331 << "h=" << std::hex
<< k
.hash
<< std::dec
<< ","
335 // all elements in this node share the same <shard, pool>
336 template<ntype_t NodeType
>
337 struct FixedKeyPrefix
<1, NodeType
> {
338 static constexpr bool item_in_key
= false;
342 FixedKeyPrefix() = default;
343 FixedKeyPrefix(uint32_t hash
, uint16_t offset
)
347 FixedKeyPrefix(const ghobject_t
& oid
, uint16_t offset
)
348 : FixedKeyPrefix(oid
.hobj
.get_hash(), offset
)
350 void set(const ghobject_t
& oid
, uint16_t new_offset
) {
351 hash
= oid
.hobj
.get_hash();
355 void set(const FixedKeyPrefix
<N
, NodeType
>& k
, uint16_t new_offset
) {
356 static_assert(N
< 2, "only N0, N1 have hash");
360 void update_oid(ghobject_t
& oid
) const {
361 oid
.hobj
.set_hash(hash
);
363 void update(const ghobject_t
& oid
) {
364 hash
= oid
.hobj
.get_hash();
366 ordering_t
compare(const ghobject_t
& oid
) const {
367 return compare_element(hash
, oid
.hobj
.get_hash());
369 likes_t
likes(const FixedKeyPrefix
& k
) const {
370 return hash
== k
.hash
? likes_t::yes
: likes_t::no
;
374 template<ntype_t NodeType
>
375 std::ostream
& operator<<(std::ostream
& os
, const FixedKeyPrefix
<1, NodeType
>& k
) {
376 return os
<< "0x" << std::hex
<< k
.hash
<< std::dec
<< ","
380 // all elements in this node must share the same <shard, pool, hash>
381 template<ntype_t NodeType
>
382 struct FixedKeyPrefix
<2, NodeType
> {
383 static constexpr bool item_in_key
= false;
386 FixedKeyPrefix() = default;
388 static constexpr ordering_t
compare(const ghobject_t
& oid
) {
389 // need to compare the cell
390 return ordering_t::equivalent
;
392 // always defer to my cell for likeness
393 constexpr likes_t
likes(const FixedKeyPrefix
&) const {
394 return likes_t::maybe
;
396 void set(const ghobject_t
&, uint16_t new_offset
) {
400 void set(const FixedKeyPrefix
<N
, NodeType
>&, uint16_t new_offset
) {
403 void update(const ghobject_t
&) {}
404 void update_oid(ghobject_t
&) const {}
407 template<ntype_t NodeType
>
408 std::ostream
& operator<<(std::ostream
& os
, const FixedKeyPrefix
<2, NodeType
>& k
) {
409 return os
<< ">" << k
.offset
;
416 fixed_key_3() = default;
417 fixed_key_3(const ghobject_t
& oid
)
418 : snap
{oid
.hobj
.snap
}, gen
{oid
.generation
}
420 ordering_t
compare(const ghobject_t
& oid
) const {
421 return cmp(std::tie(snap
, gen
),
422 std::tie(oid
.hobj
.snap
.val
, oid
.generation
));
424 // no object likes each other at this level
425 constexpr likes_t
likes(const fixed_key_3
&) const {
428 void update_with_oid(const ghobject_t
& oid
) {
429 snap
= oid
.hobj
.snap
;
430 gen
= oid
.generation
;
432 void update_oid(ghobject_t
& oid
) const {
433 oid
.hobj
.snap
= snap
;
434 oid
.generation
= gen
;
438 static inline std::ostream
& operator<<(std::ostream
& os
, const fixed_key_3
& k
) {
439 if (k
.snap
!= CEPH_NOSNAP
) {
440 os
<< "s" << k
.snap
<< ",";
442 if (k
.gen
!= ghobject_t::NO_GEN
) {
443 os
<< "g" << k
.gen
<< ",";
448 // all elements in this node must share the same <shard, pool, hash, namespace, oid>
449 // but the unlike other FixedKeyPrefix<>, a node with FixedKeyPrefix<3> does not have
450 // variable_sized_key, so if it is an inner node, we can just embed the child
451 // addr right in the key.
453 struct FixedKeyPrefix
<3, ntype_t::inner
> : public fixed_key_3
{
454 // the item is embedded in the key
455 static constexpr bool item_in_key
= true;
456 laddr_t child_addr
= 0;
458 FixedKeyPrefix() = default;
459 void set(const ghobject_t
& oid
, laddr_t new_child_addr
) {
460 update_with_oid(oid
);
461 child_addr
= new_child_addr
;
463 // unlikely get called, though..
464 void update(const ghobject_t
& oid
) {}
466 std::enable_if_t
<N
< 3> set(const FixedKeyPrefix
<N
, ntype_t::inner
>&,
467 laddr_t new_child_addr
) {
468 child_addr
= new_child_addr
;
470 void set(const FixedKeyPrefix
& k
, laddr_t new_child_addr
) {
473 child_addr
= new_child_addr
;
475 void set(const variable_key_suffix
& k
, laddr_t new_child_addr
) {
478 child_addr
= new_child_addr
;
483 struct FixedKeyPrefix
<3, ntype_t::leaf
> : public fixed_key_3
{
484 static constexpr bool item_in_key
= false;
487 FixedKeyPrefix() = default;
488 void set(const ghobject_t
& oid
, uint16_t new_offset
) {
489 update_with_oid(oid
);
492 void set(const FixedKeyPrefix
& k
, uint16_t new_offset
) {
498 std::enable_if_t
<N
< 3> set(const FixedKeyPrefix
<N
, ntype_t::leaf
>&,
499 uint16_t new_offset
) {
505 template<int N
, ntype_t node_type
>
506 static constexpr tag_t
create() {
507 static_assert(std::clamp(N
, 0, 3) == N
);
508 return tag_t
{N
, static_cast<uint8_t>(node_type
)};
510 bool is_leaf() const {
511 return type() == ntype_t::leaf
;
516 ntype_t
type() const {
517 return ntype_t
{node_type
};
520 uint8_t node_type
: 4;
523 static inline std::ostream
& operator<<(std::ostream
& os
, const tag_t
& tag
) {
524 return os
<< "n=" << tag
.layout() << ", leaf=" << tag
.is_leaf();
527 // for calculating size of variable-sized item/key
529 size_t size_of(const T
& t
) {
530 using decayed_t
= std::decay_t
<T
>;
531 if constexpr (std::is_scalar_v
<decayed_t
>) {
532 return sizeof(decayed_t
);
538 enum class size_state_t
{
544 // layout of a node of B+ tree
546 // it is different from a typical B+ tree in following ways
547 // - the size of keys is not necessarily fixed, neither is the size of value.
548 // - the max number of elements in a node is determined by the total size of
549 // the keys and values in the node
550 // - in internal nodes, each key maps to the logical address of the child
551 // node whose minimum key is greater or equal to that key.
552 template<size_t BlockSize
,
556 static_assert(std::clamp(N
, 0, 3) == N
);
557 constexpr static ntype_t node_type
= NodeType
;
558 constexpr static int node_n
= N
;
560 using key_prefix_t
= FixedKeyPrefix
<N
, NodeType
>;
561 using item_t
= std::conditional_t
<NodeType
== ntype_t::leaf
,
564 using const_item_t
= std::conditional_t
<NodeType
== ntype_t::leaf
,
567 static constexpr bool item_in_key
= key_prefix_t::item_in_key
;
568 using key_suffix_t
= std::conditional_t
<N
< 3,
572 std::pair
<const key_prefix_t
&, const key_suffix_t
&>
573 key_at(unsigned slot
) const;
575 // update an existing oid with the specified item
576 ghobject_t
get_oid_at(unsigned slot
, const ghobject_t
& oid
) const;
577 const_item_t
item_at(const key_prefix_t
& key
) const;
578 void dump(std::ostream
& os
) const;
580 // for debugging only.
581 static constexpr bool is_leaf() {
582 return node_type
== ntype_t::leaf
;
585 bool _is_leaf() const {
586 return tag
.is_leaf();
589 char* from_end(uint16_t offset
);
590 const char* from_end(uint16_t offset
) const;
591 uint16_t used_space() const;
592 uint16_t free_space() const {
593 return capacity() - used_space();
595 static uint16_t capacity();
596 // TODO: if it's allowed to update 2 siblings at the same time, we can have
598 static constexpr uint16_t min_size();
601 // calculate the allowable bounds on bytes to remove from an overflow node
602 // with specified size
603 // @param size the overflowed size
604 // @return <minimum bytes to grab, maximum bytes to grab>
605 static constexpr std::pair
<int16_t, int16_t> bytes_to_remove(uint16_t size
);
607 // calculate the allowable bounds on bytes to add to an underflow node
608 // with specified size
609 // @param size the underflowed size
610 // @return <minimum bytes to push, maximum bytes to push>
611 static constexpr std::pair
<int16_t, int16_t> bytes_to_add(uint16_t size
);
613 size_state_t
size_state(uint16_t size
) const;
614 bool is_underflow(uint16_t size
) const;
615 int16_t size_with_key(unsigned slot
, const ghobject_t
& oid
) const;
616 ordering_t
compare_with_slot(unsigned slot
, const ghobject_t
& oid
) const;
617 /// return the slot number of the first slot that is greater or equal to
619 std::pair
<unsigned, bool> lower_bound(const ghobject_t
& oid
) const;
620 static uint16_t size_of_item(const ghobject_t
& oid
, const item_t
& item
);
621 bool is_overflow(const ghobject_t
& oid
, const item_t
& item
) const;
622 bool is_overflow(const ghobject_t
& oid
, const OnodeRef
& item
) const;
624 // inserts an item into the given slot, pushing all subsequent keys forward
625 // @note if the item is not embedded in key, shift the right half as well
626 void insert_at(unsigned slot
, const ghobject_t
& oid
, const item_t
& item
);
627 // used by InnerNode for updating the keys indexing its children when their lower boundaries
629 void update_key_at(unsigned slot
, const ghobject_t
& oid
);
630 // try to figure out the number of elements and total size when trying to
631 // rebalance by moving the elements from the front of this node when its
632 // left sibling node is underflow
634 // @param min_grab lower bound of the number of bytes to move
635 // @param max_grab upper bound of the number of bytes to move
636 // @return the number of element to grab
637 // @note return {0, 0} if current node would be underflow if
638 // @c min_grab bytes of elements are taken from it
639 std::pair
<unsigned, uint16_t> calc_grab_front(uint16_t min_grab
, uint16_t max_grab
) const;
640 // try to figure out the number of elements and their total size when trying to
641 // rebalance by moving the elements from the end of this node when its right
642 // sibling node is underflow
644 // @param min_grab lower bound of the number of bytes to move
645 // @param max_grab upper bound of the number of bytes to move
646 // @return the number of element to grab
647 // @note return {0, 0} if current node would be underflow if
648 // @c min_grab bytes of elements are taken from it
649 std::pair
<unsigned, uint16_t> calc_grab_back(uint16_t min_grab
, uint16_t max_grab
) const;
650 template<int LeftN
, class Mover
> void grab_from_left(
651 node_t
<BlockSize
, LeftN
, NodeType
>& left
,
652 unsigned n
, uint16_t bytes
,
654 template<int RightN
, class Mover
>
655 delta_t
acquire_right(node_t
<BlockSize
, RightN
, NodeType
>& right
,
656 unsigned whoami
, Mover
& mover
);
657 // transfer n elements at the front of given node to me
658 template<int RightN
, class Mover
>
659 void grab_from_right(node_t
<BlockSize
, RightN
, NodeType
>& right
,
660 unsigned n
, uint16_t bytes
,
662 template<int LeftN
, class Mover
>
663 void push_to_left(node_t
<BlockSize
, LeftN
, NodeType
>& left
,
664 unsigned n
, uint16_t bytes
,
666 template<int RightN
, class Mover
>
667 void push_to_right(node_t
<BlockSize
, RightN
, NodeType
>& right
,
668 unsigned n
, uint16_t bytes
,
670 // [to, from) are removed, so we need to shift left
671 // actually there are only two use cases:
672 // - to = 0: for giving elements in bulk
673 // - to = from - 1: for removing a single element
674 // old: |////|.....| |.....|/|........|
675 // new: |.....| |.....||........|
676 void shift_left(unsigned from
, unsigned to
);
677 void insert_front(const ceph::bufferptr
& keys_buf
, const ceph::bufferptr
& cells_buf
);
678 void insert_back(const ceph::bufferptr
& keys_buf
, const ceph::bufferptr
& cells_buf
);
679 // one or more elements are inserted, so we need to shift the elements right
680 // actually there are only two use cases:
681 // - bytes != 0: for inserting bytes before from
682 // - bytes = 0: for inserting a single element before from
684 // new: |/////|.....|
685 void shift_right(unsigned n
, unsigned bytes
);
686 // shift all keys after slot is removed.
687 // @note if the item is not embdedded in key, all items sitting at the left
688 // side of it will be shifted right
689 void remove_from(unsigned slot
);
690 void trim_right(unsigned n
);
691 void play_delta(const delta_t
& delta
);
692 // /-------------------------------|
694 // |header|k0|k1|k2|... | / / |k2'v2|k1'v1|k0'.v0| v_m |
696 tag_t tag
= tag_t::create
<N
, NodeType
>();
697 // the count of values in the node
702 template<class parent_t
,
709 EntryMover(const parent_t
&, from_t
&, to_t
& dst
, unsigned) {
712 void move_from(unsigned, unsigned, unsigned) {
715 delta_t
get_delta() {
716 return delta_t::nop();
720 // lower the layout, for instance, from L0 to L1, no reference oid is used
721 template<class parent_t
,
724 class EntryMover
<parent_t
,
727 std::enable_if_t
<from_t::node_n
< to_t::node_n
>>
730 EntryMover(const parent_t
&, from_t
& src
, to_t
& dst
, unsigned)
733 void move_from(unsigned src_first
, unsigned dst_first
, unsigned n
)
735 ceph::bufferptr keys_buf
{n
* sizeof(to_t::key_prefix_t
)};
736 ceph::bufferptr cells_buf
;
737 auto dst_keys
= reinterpret_cast<typename
to_t::key_prefix_t
*>(keys_buf
.c_str());
738 if constexpr (to_t::item_in_key
) {
739 for (unsigned i
= 0; i
< n
; i
++) {
740 const auto& [prefix
, suffix
] = src
.key_at(src_first
+ i
);
741 dst_keys
[i
].set(suffix
, src
.item_at(prefix
));
745 uint16_t src_offset
= src_first
> 0 ? src
.keys
[src_first
- 1].offset
: 0;
746 uint16_t dst_offset
= dst_first
> 0 ? dst
.keys
[dst_first
- 1].offset
: 0;
747 for (unsigned i
= 0; i
< n
; i
++) {
748 auto& src_key
= src
.keys
[src_first
+ i
];
749 uint16_t offset
= src_key
.offset
- src_offset
+ dst_offset
;
750 dst_keys
[i
].set(src_key
, offset
);
752 // copy cells in bulk, yay!
753 auto src_end
= src
.keys
[src_first
+ n
- 1].offset
;
754 uint16_t total_cell_size
= src_end
- src_offset
;
755 cells_buf
= ceph::bufferptr
{total_cell_size
};
756 cells_buf
.copy_in(0, total_cell_size
, src
.from_end(src_end
));
758 if (dst_first
== dst
.count
) {
759 dst_delta
= delta_t::insert_back(keys_buf
, cells_buf
);
761 dst_delta
= delta_t::insert_front(keys_buf
, cells_buf
);
763 if (src_first
> 0 && src_first
+ n
== src
.count
) {
764 src_delta
= delta_t::trim_right(src_first
);
765 } else if (src_first
== 0 && n
< src
.count
) {
766 src_delta
= delta_t::shift_left(n
);
767 } else if (src_first
== 0 && n
== src
.count
) {
768 // the caller will retire the src extent
770 // grab in the middle?
775 delta_t
from_delta() {
776 return std::move(src_delta
);
779 return std::move(dst_delta
);
788 // lift the layout, for instance, from L2 to L0, need a reference oid
789 template<class parent_t
,
792 class EntryMover
<parent_t
, from_t
, to_t
,
793 std::enable_if_t
<(from_t::node_n
> to_t::node_n
)>>
796 EntryMover(const parent_t
& parent
, from_t
& src
, to_t
& dst
, unsigned from_slot
)
797 : src
{src
}, dst
{dst
}, ref_oid
{parent
->get_oid_at(from_slot
, {})}
799 void move_from(unsigned src_first
, unsigned dst_first
, unsigned n
)
801 ceph::bufferptr keys_buf
{n
* sizeof(to_t::key_prefix_t
)};
802 ceph::bufferptr cells_buf
;
803 auto dst_keys
= reinterpret_cast<typename
to_t::key_prefix_t
*>(keys_buf
.c_str());
804 uint16_t in_node_offset
= dst_first
> 0 ? dst
.keys
[dst_first
- 1].offset
: 0;
805 static_assert(!std::is_same_v
<typename
to_t::key_suffix_t
, empty_key_suffix
>);
807 uint16_t buf_offset
= 0;
808 for (unsigned i
= 0; i
< n
; i
++) {
809 auto& src_key
= src
.keys
[src_first
+ i
];
810 if constexpr (std::is_same_v
<typename
from_t::key_suffix_t
, empty_key_suffix
>) {
811 // heterogeneous partial key, have to rebuild dst partial key from oid
812 src_key
.update_oid(ref_oid
);
813 const auto& src_item
= src
.item_at(src_key
);
814 size_t key2_size
= to_t::key_suffix_t::size_from(ref_oid
);
815 buf_offset
+= key2_size
+ size_of(src_item
);
816 dst_keys
[i
].set(ref_oid
, in_node_offset
+ buf_offset
);
817 auto p
= from_end(cells_buf
, buf_offset
);
818 auto partial_key
= reinterpret_cast<typename
to_t::key_suffix_t
*>(p
);
819 partial_key
->set(ref_oid
);
821 auto dst_item
= reinterpret_cast<typename
to_t::item_t
*>(p
);
822 *dst_item
= src_item
;
824 // homogeneous partial key, just update the pointers
825 uint16_t src_offset
= src_first
> 0 ? src
.keys
[src_first
- 1].offset
: 0;
826 uint16_t dst_offset
= dst_first
> 0 ? dst
.keys
[dst_first
- 1].offset
: 0;
827 uint16_t offset
= src_key
.offset
- src_offset
+ dst_offset
;
828 dst_keys
[i
].set(ref_oid
, in_node_offset
+ offset
);
831 if constexpr (std::is_same_v
<typename
to_t::key_suffix_t
,
832 typename
from_t::key_suffix_t
>) {
833 // copy cells in bulk, yay!
834 uint16_t src_offset
= src_first
> 0 ? src
.keys
[src_first
- 1].offset
: 0;
835 uint16_t src_end
= src
.keys
[src_first
+ n
- 1].offset
;
836 uint16_t total_cell_size
= src_end
- src_offset
;
837 cells_buf
.copy_in(0, total_cell_size
, src
.from_end(src_end
));
839 if (dst_first
== dst
.count
) {
840 dst_delta
= delta_t::insert_back(keys_buf
, cells_buf
);
842 dst_delta
= delta_t::insert_front(keys_buf
, cells_buf
);
844 if (src_first
+ n
== src
.count
&& src_first
> 0) {
845 src_delta
= delta_t::trim_right(src_first
);
847 // the caller will retire the src extent
848 assert(src_first
== 0);
852 delta_t
from_delta() {
853 return std::move(src_delta
);
856 return std::move(dst_delta
);
859 char* from_end(ceph::bufferptr
& ptr
, uint16_t offset
) {
860 return ptr
.end_c_str() - static_cast<int>(offset
);
870 // identical layout, yay!
871 template<class parent_t
,
873 class EntryMover
<parent_t
, child_t
, child_t
>
876 EntryMover(const parent_t
&, child_t
& src
, child_t
& dst
, unsigned)
880 void move_from(unsigned src_first
, unsigned dst_first
, unsigned n
)
882 ceph::bufferptr keys_buf
{static_cast<unsigned>(n
* sizeof(typename
child_t::key_prefix_t
))};
883 ceph::bufferptr cells_buf
;
884 auto dst_keys
= reinterpret_cast<typename
child_t::key_prefix_t
*>(keys_buf
.c_str());
887 std::copy(src
.keys
+ src_first
, src
.keys
+ src_first
+ n
,
889 if constexpr (!child_t::item_in_key
) {
890 uint16_t src_offset
= src_first
> 0 ? src
.keys
[src_first
- 1].offset
: 0;
891 uint16_t dst_offset
= dst_first
> 0 ? dst
.keys
[dst_first
- 1].offset
: 0;
892 const int offset_delta
= dst_offset
- src_offset
;
893 // update the pointers
894 for (unsigned i
= 0; i
< n
; i
++) {
895 dst_keys
[i
].offset
+= offset_delta
;
897 // copy cells in bulk, yay!
898 auto src_end
= src
.keys
[src_first
+ n
- 1].offset
;
899 uint16_t total_cell_size
= src_end
- src_offset
;
900 cells_buf
= ceph::bufferptr
{total_cell_size
};
901 cells_buf
.copy_in(0, total_cell_size
, src
.from_end(src_end
));
903 if (dst_first
== dst
.count
) {
904 dst_delta
= delta_t::insert_back(std::move(keys_buf
), std::move(cells_buf
));
906 dst_delta
= delta_t::insert_front(std::move(keys_buf
), std::move(cells_buf
));
908 if (src_first
+ n
== src
.count
&& src_first
> 0) {
909 src_delta
= delta_t::trim_right(n
);
910 } else if (src_first
== 0 && n
< src
.count
) {
911 src_delta
= delta_t::shift_left(n
);
912 } else if (src_first
== 0 && n
== src
.count
) {
913 // the caller will retire the src extent
915 // grab in the middle?
920 delta_t
from_delta() {
921 return std::move(src_delta
);
925 return std::move(dst_delta
);
928 char* from_end(ceph::bufferptr
& ptr
, uint16_t offset
) {
929 return ptr
.end_c_str() - static_cast<int>(offset
);
938 template<class parent_t
, class from_t
, class to_t
>
939 EntryMover
<parent_t
, from_t
, to_t
>
940 make_mover(const parent_t
& parent
, from_t
& src
, to_t
& dst
, unsigned from_slot
) {
941 return EntryMover
<parent_t
, from_t
, to_t
>(parent
, src
, dst
, from_slot
);