]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/onode_manager/simple-fltree/onode_node.h
buildsys: switch source download to quincy
[ceph.git] / ceph / src / crimson / os / seastore / onode_manager / simple-fltree / onode_node.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <algorithm>
5 #include <cstdint>
6 #include <type_traits>
7 #include <variant>
8
9 #include "common/hobject.h"
10 #include "crimson/common/layout.h"
11 #include "crimson/os/seastore/onode.h"
12 #include "crimson/os/seastore/seastore_types.h"
13 #include "onode_delta.h"
14
15 namespace asci = absl::container_internal;
16
17 namespace boost::beast {
18 template<class T>
19 bool operator==(const span<T>& lhs, const span<T>& rhs) {
20 return std::equal(
21 lhs.begin(), lhs.end(),
22 rhs.begin(), rhs.end());
23 }
24 }
25
26 // on-disk onode
27 // it only keeps the bits necessary to rebuild an in-memory onode
28 struct [[gnu::packed]] onode_t {
29 onode_t& operator=(const onode_t& onode) {
30 len = onode.len;
31 std::memcpy(data, onode.data, len);
32 return *this;
33 }
34 size_t size() const {
35 return sizeof(*this) + len;
36 }
37 OnodeRef decode() const {
38 return new crimson::os::seastore::Onode(std::string_view{data, len});
39 }
40 uint8_t struct_v = 1;
41 uint8_t struct_compat = 1;
42 // TODO:
43 // - use uint16_t for length, as the size of an onode should be less
44 // than a block (16K for now)
45 // - drop struct_len
46 uint32_t struct_len = 0;
47 uint32_t len;
48 char data[];
49 };
50
51 static inline std::ostream& operator<<(std::ostream& os, const onode_t& onode) {
52 return os << *onode.decode();
53 }
54
55 using crimson::os::seastore::laddr_t;
56
57 struct [[gnu::packed]] child_addr_t {
58 laddr_t data;
59 child_addr_t(laddr_t data)
60 : data{data}
61 {}
62 child_addr_t& operator=(laddr_t addr) {
63 data = addr;
64 return *this;
65 }
66 laddr_t get() const {
67 return data;
68 }
69 operator laddr_t() const {
70 return data;
71 }
72 size_t size() const {
73 return sizeof(laddr_t);
74 }
75 };
76
77 // poor man's operator<=>
78 enum class ordering_t {
79 less,
80 equivalent,
81 greater,
82 };
83
84 template<class L, class R>
85 ordering_t compare_element(const L& x, const R& y)
86 {
87 if constexpr (std::is_arithmetic_v<L>) {
88 static_assert(std::is_arithmetic_v<R>);
89 if (x < y) {
90 return ordering_t::less;
91 } else if (x > y) {
92 return ordering_t::greater;
93 } else {
94 return ordering_t::equivalent;
95 }
96 } else {
97 // string_view::compare(), string::compare(), ...
98 auto result = x.compare(y);
99 if (result < 0) {
100 return ordering_t::less;
101 } else if (result > 0) {
102 return ordering_t::greater;
103 } else {
104 return ordering_t::equivalent;
105 }
106 }
107 }
108
109 template<typename L, typename R>
110 constexpr ordering_t tuple_cmp(const L&, const R&, std::index_sequence<>)
111 {
112 return ordering_t::equivalent;
113 }
114
115 template<typename L, typename R,
116 size_t Head, size_t... Tail>
117 constexpr ordering_t tuple_cmp(const L& x, const R& y,
118 std::index_sequence<Head, Tail...>)
119 {
120 auto ordering = compare_element(std::get<Head>(x), std::get<Head>(y));
121 if (ordering != ordering_t::equivalent) {
122 return ordering;
123 } else {
124 return tuple_cmp(x, y, std::index_sequence<Tail...>());
125 }
126 }
127
128 template<typename... Ls, typename... Rs>
129 constexpr ordering_t cmp(const std::tuple<Ls...>& x,
130 const std::tuple<Rs...>& y)
131 {
132 static_assert(sizeof...(Ls) == sizeof...(Rs));
133 return tuple_cmp(x, y, std::index_sequence_for<Ls...>());
134 }
135
136 enum class likes_t {
137 yes,
138 no,
139 maybe,
140 };
141
142 struct [[gnu::packed]] variable_key_suffix {
143 uint64_t snap;
144 uint64_t gen;
145 uint8_t nspace_len;
146 uint8_t name_len;
147 char data[];
148 struct index_t {
149 enum {
150 nspace_data = 0,
151 name_data = 1,
152 };
153 };
154 using layout_type = asci::Layout<char, char>;
155 layout_type cell_layout() const {
156 return layout_type{nspace_len, name_len};
157 }
158 void set(const ghobject_t& oid) {
159 snap = oid.hobj.snap;
160 gen = oid.generation;
161 nspace_len = oid.hobj.nspace.size();
162 name_len = oid.hobj.oid.name.size();
163 auto layout = cell_layout();
164 std::memcpy(layout.Pointer<index_t::nspace_data>(data),
165 oid.hobj.nspace.data(), oid.hobj.nspace.size());
166 std::memcpy(layout.Pointer<index_t::name_data>(data),
167 oid.hobj.oid.name.data(), oid.hobj.oid.name.size());
168 }
169
170 void update_oid(ghobject_t& oid) const {
171 oid.hobj.snap = snap;
172 oid.generation = gen;
173 oid.hobj.nspace = nspace();
174 oid.hobj.oid.name = name();
175 }
176
177 variable_key_suffix& operator=(const variable_key_suffix& key) {
178 snap = key.snap;
179 gen = key.gen;
180 auto layout = cell_layout();
181 auto nspace = key.nspace();
182 std::copy_n(nspace.data(), nspace.size(),
183 layout.Pointer<index_t::nspace_data>(data));
184 auto name = key.name();
185 std::copy_n(name.data(), name.size(),
186 layout.Pointer<index_t::name_data>(data));
187 return *this;
188 }
189 const std::string_view nspace() const {
190 auto layout = cell_layout();
191 auto nspace = layout.Slice<index_t::nspace_data>(data);
192 return {nspace.data(), nspace.size()};
193 }
194 const std::string_view name() const {
195 auto layout = cell_layout();
196 auto name = layout.Slice<index_t::name_data>(data);
197 return {name.data(), name.size()};
198 }
199 size_t size() const {
200 return sizeof(*this) + nspace_len + name_len;
201 }
202 static size_t size_from(const ghobject_t& oid) {
203 return (sizeof(variable_key_suffix) +
204 oid.hobj.nspace.size() +
205 oid.hobj.oid.name.size());
206 }
207 ordering_t compare(const ghobject_t& oid) const {
208 return cmp(std::tie(nspace(), name(), snap, gen),
209 std::tie(oid.hobj.nspace, oid.hobj.oid.name, oid.hobj.snap.val,
210 oid.generation));
211 }
212 bool likes(const variable_key_suffix& key) const {
213 return nspace() == key.nspace() && name() == key.name();
214 }
215 };
216
217 static inline std::ostream& operator<<(std::ostream& os, const variable_key_suffix& k) {
218 if (k.snap != CEPH_NOSNAP) {
219 os << "s" << k.snap << ",";
220 }
221 if (k.gen != ghobject_t::NO_GEN) {
222 os << "g" << k.gen << ",";
223 }
224 return os << k.nspace() << "/" << k.name();
225 }
226
227 // should use [[no_unique_address]] in C++20
228 struct empty_key_suffix {
229 static constexpr ordering_t compare(const ghobject_t&) {
230 return ordering_t::equivalent;
231 }
232 static void set(const ghobject_t&) {}
233 static constexpr size_t size() {
234 return 0;
235 }
236 static size_t size_from(const ghobject_t&) {
237 return 0;
238 }
239 static void update_oid(ghobject_t&) {}
240 };
241
242 static inline std::ostream& operator<<(std::ostream& os, const empty_key_suffix&)
243 {
244 return os;
245 }
246
247 enum class ntype_t : uint8_t {
248 leaf = 0u,
249 inner,
250 };
251
252 constexpr ntype_t flip_ntype(ntype_t ntype) noexcept
253 {
254 if (ntype == ntype_t::leaf) {
255 return ntype_t::inner;
256 } else {
257 return ntype_t::leaf;
258 }
259 }
260
261 template<int N, ntype_t NodeType>
262 struct FixedKeyPrefix {};
263
264 template<ntype_t NodeType>
265 struct FixedKeyPrefix<0, NodeType>
266 {
267 static constexpr bool item_in_key = false;
268 int8_t shard = -1;
269 int64_t pool = -1;
270 uint32_t hash = 0;
271 uint16_t offset = 0;
272
273 FixedKeyPrefix() = default;
274 FixedKeyPrefix(const ghobject_t& oid, uint16_t offset)
275 : shard{oid.shard_id},
276 pool{oid.hobj.pool},
277 hash{oid.hobj.get_hash()},
278 offset{offset}
279 {}
280
281 void set(const ghobject_t& oid, uint16_t new_offset) {
282 shard = oid.shard_id;
283 pool = oid.hobj.pool;
284 hash = oid.hobj.get_hash();
285 offset = new_offset;
286 }
287
288 void set(const FixedKeyPrefix& k, uint16_t new_offset) {
289 shard = k.shard;
290 pool = k.pool;
291 hash = k.hash;
292 offset = new_offset;
293 }
294
295 void update(const ghobject_t& oid) {
296 shard = oid.shard_id;
297 pool = oid.hobj.pool;
298 hash = oid.hobj.get_hash();
299 }
300
301 void update_oid(ghobject_t& oid) const {
302 oid.set_shard(shard_id_t{shard});
303 oid.hobj.pool = pool;
304 oid.hobj.set_hash(hash);
305 }
306
307 ordering_t compare(const ghobject_t& oid) const {
308 // so std::tie() can bind them by reference
309 int8_t rhs_shard = oid.shard_id;
310 uint32_t rhs_hash = oid.hobj.get_hash();
311 return cmp(std::tie(shard, pool, hash),
312 std::tie(rhs_shard, oid.hobj.pool, rhs_hash));
313 }
314 // @return true if i likes @c k, we will can be pushed down to next level
315 // in the same node
316 likes_t likes(const FixedKeyPrefix& k) const {
317 if (shard == k.shard && pool == k.pool) {
318 return likes_t::yes;
319 } else {
320 return likes_t::no;
321 }
322 }
323 };
324
325 template<ntype_t NodeType>
326 std::ostream& operator<<(std::ostream& os, const FixedKeyPrefix<0, NodeType>& k) {
327 if (k.shard != shard_id_t::NO_SHARD) {
328 os << "s" << k.shard;
329 }
330 return os << "p=" << k.pool << ","
331 << "h=" << std::hex << k.hash << std::dec << ","
332 << ">" << k.offset;
333 }
334
335 // all elements in this node share the same <shard, pool>
336 template<ntype_t NodeType>
337 struct FixedKeyPrefix<1, NodeType> {
338 static constexpr bool item_in_key = false;
339 uint32_t hash = 0;
340 uint16_t offset = 0;
341
342 FixedKeyPrefix() = default;
343 FixedKeyPrefix(uint32_t hash, uint16_t offset)
344 : hash{hash},
345 offset{offset}
346 {}
347 FixedKeyPrefix(const ghobject_t& oid, uint16_t offset)
348 : FixedKeyPrefix(oid.hobj.get_hash(), offset)
349 {}
350 void set(const ghobject_t& oid, uint16_t new_offset) {
351 hash = oid.hobj.get_hash();
352 offset = new_offset;
353 }
354 template<int N>
355 void set(const FixedKeyPrefix<N, NodeType>& k, uint16_t new_offset) {
356 static_assert(N < 2, "only N0, N1 have hash");
357 hash = k.hash;
358 offset = new_offset;
359 }
360 void update_oid(ghobject_t& oid) const {
361 oid.hobj.set_hash(hash);
362 }
363 void update(const ghobject_t& oid) {
364 hash = oid.hobj.get_hash();
365 }
366 ordering_t compare(const ghobject_t& oid) const {
367 return compare_element(hash, oid.hobj.get_hash());
368 }
369 likes_t likes(const FixedKeyPrefix& k) const {
370 return hash == k.hash ? likes_t::yes : likes_t::no;
371 }
372 };
373
374 template<ntype_t NodeType>
375 std::ostream& operator<<(std::ostream& os, const FixedKeyPrefix<1, NodeType>& k) {
376 return os << "0x" << std::hex << k.hash << std::dec << ","
377 << ">" << k.offset;
378 }
379
380 // all elements in this node must share the same <shard, pool, hash>
381 template<ntype_t NodeType>
382 struct FixedKeyPrefix<2, NodeType> {
383 static constexpr bool item_in_key = false;
384 uint16_t offset = 0;
385
386 FixedKeyPrefix() = default;
387
388 static constexpr ordering_t compare(const ghobject_t& oid) {
389 // need to compare the cell
390 return ordering_t::equivalent;
391 }
392 // always defer to my cell for likeness
393 constexpr likes_t likes(const FixedKeyPrefix&) const {
394 return likes_t::maybe;
395 }
396 void set(const ghobject_t&, uint16_t new_offset) {
397 offset = new_offset;
398 }
399 template<int N>
400 void set(const FixedKeyPrefix<N, NodeType>&, uint16_t new_offset) {
401 offset = new_offset;
402 }
403 void update(const ghobject_t&) {}
404 void update_oid(ghobject_t&) const {}
405 };
406
407 template<ntype_t NodeType>
408 std::ostream& operator<<(std::ostream& os, const FixedKeyPrefix<2, NodeType>& k) {
409 return os << ">" << k.offset;
410 }
411
412 struct fixed_key_3 {
413 uint64_t snap = 0;
414 uint64_t gen = 0;
415
416 fixed_key_3() = default;
417 fixed_key_3(const ghobject_t& oid)
418 : snap{oid.hobj.snap}, gen{oid.generation}
419 {}
420 ordering_t compare(const ghobject_t& oid) const {
421 return cmp(std::tie(snap, gen),
422 std::tie(oid.hobj.snap.val, oid.generation));
423 }
424 // no object likes each other at this level
425 constexpr likes_t likes(const fixed_key_3&) const {
426 return likes_t::no;
427 }
428 void update_with_oid(const ghobject_t& oid) {
429 snap = oid.hobj.snap;
430 gen = oid.generation;
431 }
432 void update_oid(ghobject_t& oid) const {
433 oid.hobj.snap = snap;
434 oid.generation = gen;
435 }
436 };
437
438 static inline std::ostream& operator<<(std::ostream& os, const fixed_key_3& k) {
439 if (k.snap != CEPH_NOSNAP) {
440 os << "s" << k.snap << ",";
441 }
442 if (k.gen != ghobject_t::NO_GEN) {
443 os << "g" << k.gen << ",";
444 }
445 return os;
446 }
447
448 // all elements in this node must share the same <shard, pool, hash, namespace, oid>
449 // but the unlike other FixedKeyPrefix<>, a node with FixedKeyPrefix<3> does not have
450 // variable_sized_key, so if it is an inner node, we can just embed the child
451 // addr right in the key.
452 template<>
453 struct FixedKeyPrefix<3, ntype_t::inner> : public fixed_key_3 {
454 // the item is embedded in the key
455 static constexpr bool item_in_key = true;
456 laddr_t child_addr = 0;
457
458 FixedKeyPrefix() = default;
459 void set(const ghobject_t& oid, laddr_t new_child_addr) {
460 update_with_oid(oid);
461 child_addr = new_child_addr;
462 }
463 // unlikely get called, though..
464 void update(const ghobject_t& oid) {}
465 template<int N>
466 std::enable_if_t<N < 3> set(const FixedKeyPrefix<N, ntype_t::inner>&,
467 laddr_t new_child_addr) {
468 child_addr = new_child_addr;
469 }
470 void set(const FixedKeyPrefix& k, laddr_t new_child_addr) {
471 snap = k.snap;
472 gen = k.gen;
473 child_addr = new_child_addr;
474 }
475 void set(const variable_key_suffix& k, laddr_t new_child_addr) {
476 snap = k.snap;
477 gen = k.gen;
478 child_addr = new_child_addr;
479 }
480 };
481
482 template<>
483 struct FixedKeyPrefix<3, ntype_t::leaf> : public fixed_key_3 {
484 static constexpr bool item_in_key = false;
485 uint16_t offset = 0;
486
487 FixedKeyPrefix() = default;
488 void set(const ghobject_t& oid, uint16_t new_offset) {
489 update_with_oid(oid);
490 offset = new_offset;
491 }
492 void set(const FixedKeyPrefix& k, uint16_t new_offset) {
493 snap = k.snap;
494 gen = k.gen;
495 offset = new_offset;
496 }
497 template<int N>
498 std::enable_if_t<N < 3> set(const FixedKeyPrefix<N, ntype_t::leaf>&,
499 uint16_t new_offset) {
500 offset = new_offset;
501 }
502 };
503
504 struct tag_t {
505 template<int N, ntype_t node_type>
506 static constexpr tag_t create() {
507 static_assert(std::clamp(N, 0, 3) == N);
508 return tag_t{N, static_cast<uint8_t>(node_type)};
509 }
510 bool is_leaf() const {
511 return type() == ntype_t::leaf;
512 }
513 int layout() const {
514 return layout_type;
515 }
516 ntype_t type() const {
517 return ntype_t{node_type};
518 }
519 int layout_type : 4;
520 uint8_t node_type : 4;
521 };
522
523 static inline std::ostream& operator<<(std::ostream& os, const tag_t& tag) {
524 return os << "n=" << tag.layout() << ", leaf=" << tag.is_leaf();
525 }
526
527 // for calculating size of variable-sized item/key
528 template<class T>
529 size_t size_of(const T& t) {
530 using decayed_t = std::decay_t<T>;
531 if constexpr (std::is_scalar_v<decayed_t>) {
532 return sizeof(decayed_t);
533 } else {
534 return t.size();
535 }
536 }
537
538 enum class size_state_t {
539 okay,
540 underflow,
541 overflow,
542 };
543
544 // layout of a node of B+ tree
545 //
546 // it is different from a typical B+ tree in following ways
547 // - the size of keys is not necessarily fixed, neither is the size of value.
548 // - the max number of elements in a node is determined by the total size of
549 // the keys and values in the node
550 // - in internal nodes, each key maps to the logical address of the child
551 // node whose minimum key is greater or equal to that key.
552 template<size_t BlockSize,
553 int N,
554 ntype_t NodeType>
555 struct node_t {
556 static_assert(std::clamp(N, 0, 3) == N);
557 constexpr static ntype_t node_type = NodeType;
558 constexpr static int node_n = N;
559
560 using key_prefix_t = FixedKeyPrefix<N, NodeType>;
561 using item_t = std::conditional_t<NodeType == ntype_t::leaf,
562 onode_t,
563 child_addr_t>;
564 using const_item_t = std::conditional_t<NodeType == ntype_t::leaf,
565 const onode_t&,
566 child_addr_t>;
567 static constexpr bool item_in_key = key_prefix_t::item_in_key;
568 using key_suffix_t = std::conditional_t<N < 3,
569 variable_key_suffix,
570 empty_key_suffix>;
571
572 std::pair<const key_prefix_t&, const key_suffix_t&>
573 key_at(unsigned slot) const;
574
575 // update an existing oid with the specified item
576 ghobject_t get_oid_at(unsigned slot, const ghobject_t& oid) const;
577 const_item_t item_at(const key_prefix_t& key) const;
578 void dump(std::ostream& os) const;
579
580 // for debugging only.
581 static constexpr bool is_leaf() {
582 return node_type == ntype_t::leaf;
583 }
584
585 bool _is_leaf() const {
586 return tag.is_leaf();
587 }
588
589 char* from_end(uint16_t offset);
590 const char* from_end(uint16_t offset) const;
591 uint16_t used_space() const;
592 uint16_t free_space() const {
593 return capacity() - used_space();
594 }
595 static uint16_t capacity();
596 // TODO: if it's allowed to update 2 siblings at the same time, we can have
597 // B* tree
598 static constexpr uint16_t min_size();
599
600
601 // calculate the allowable bounds on bytes to remove from an overflow node
602 // with specified size
603 // @param size the overflowed size
604 // @return <minimum bytes to grab, maximum bytes to grab>
605 static constexpr std::pair<int16_t, int16_t> bytes_to_remove(uint16_t size);
606
607 // calculate the allowable bounds on bytes to add to an underflow node
608 // with specified size
609 // @param size the underflowed size
610 // @return <minimum bytes to push, maximum bytes to push>
611 static constexpr std::pair<int16_t, int16_t> bytes_to_add(uint16_t size);
612
613 size_state_t size_state(uint16_t size) const;
614 bool is_underflow(uint16_t size) const;
615 int16_t size_with_key(unsigned slot, const ghobject_t& oid) const;
616 ordering_t compare_with_slot(unsigned slot, const ghobject_t& oid) const;
617 /// return the slot number of the first slot that is greater or equal to
618 /// key
619 std::pair<unsigned, bool> lower_bound(const ghobject_t& oid) const;
620 static uint16_t size_of_item(const ghobject_t& oid, const item_t& item);
621 bool is_overflow(const ghobject_t& oid, const item_t& item) const;
622 bool is_overflow(const ghobject_t& oid, const OnodeRef& item) const;
623
624 // inserts an item into the given slot, pushing all subsequent keys forward
625 // @note if the item is not embedded in key, shift the right half as well
626 void insert_at(unsigned slot, const ghobject_t& oid, const item_t& item);
627 // used by InnerNode for updating the keys indexing its children when their lower boundaries
628 // is updated
629 void update_key_at(unsigned slot, const ghobject_t& oid);
630 // try to figure out the number of elements and total size when trying to
631 // rebalance by moving the elements from the front of this node when its
632 // left sibling node is underflow
633 //
634 // @param min_grab lower bound of the number of bytes to move
635 // @param max_grab upper bound of the number of bytes to move
636 // @return the number of element to grab
637 // @note return {0, 0} if current node would be underflow if
638 // @c min_grab bytes of elements are taken from it
639 std::pair<unsigned, uint16_t> calc_grab_front(uint16_t min_grab, uint16_t max_grab) const;
640 // try to figure out the number of elements and their total size when trying to
641 // rebalance by moving the elements from the end of this node when its right
642 // sibling node is underflow
643 //
644 // @param min_grab lower bound of the number of bytes to move
645 // @param max_grab upper bound of the number of bytes to move
646 // @return the number of element to grab
647 // @note return {0, 0} if current node would be underflow if
648 // @c min_grab bytes of elements are taken from it
649 std::pair<unsigned, uint16_t> calc_grab_back(uint16_t min_grab, uint16_t max_grab) const;
650 template<int LeftN, class Mover> void grab_from_left(
651 node_t<BlockSize, LeftN, NodeType>& left,
652 unsigned n, uint16_t bytes,
653 Mover& mover);
654 template<int RightN, class Mover>
655 delta_t acquire_right(node_t<BlockSize, RightN, NodeType>& right,
656 unsigned whoami, Mover& mover);
657 // transfer n elements at the front of given node to me
658 template<int RightN, class Mover>
659 void grab_from_right(node_t<BlockSize, RightN, NodeType>& right,
660 unsigned n, uint16_t bytes,
661 Mover& mover);
662 template<int LeftN, class Mover>
663 void push_to_left(node_t<BlockSize, LeftN, NodeType>& left,
664 unsigned n, uint16_t bytes,
665 Mover& mover);
666 template<int RightN, class Mover>
667 void push_to_right(node_t<BlockSize, RightN, NodeType>& right,
668 unsigned n, uint16_t bytes,
669 Mover& mover);
670 // [to, from) are removed, so we need to shift left
671 // actually there are only two use cases:
672 // - to = 0: for giving elements in bulk
673 // - to = from - 1: for removing a single element
674 // old: |////|.....| |.....|/|........|
675 // new: |.....| |.....||........|
676 void shift_left(unsigned from, unsigned to);
677 void insert_front(const ceph::bufferptr& keys_buf, const ceph::bufferptr& cells_buf);
678 void insert_back(const ceph::bufferptr& keys_buf, const ceph::bufferptr& cells_buf);
679 // one or more elements are inserted, so we need to shift the elements right
680 // actually there are only two use cases:
681 // - bytes != 0: for inserting bytes before from
682 // - bytes = 0: for inserting a single element before from
683 // old: ||.....|
684 // new: |/////|.....|
685 void shift_right(unsigned n, unsigned bytes);
686 // shift all keys after slot is removed.
687 // @note if the item is not embdedded in key, all items sitting at the left
688 // side of it will be shifted right
689 void remove_from(unsigned slot);
690 void trim_right(unsigned n);
691 void play_delta(const delta_t& delta);
692 // /-------------------------------|
693 // | V
694 // |header|k0|k1|k2|... | / / |k2'v2|k1'v1|k0'.v0| v_m |
695 // |<-- count -->|
696 tag_t tag = tag_t::create<N, NodeType>();
697 // the count of values in the node
698 uint16_t count = 0;
699 key_prefix_t keys[];
700 };
701
702 template<class parent_t,
703 class from_t,
704 class to_t,
705 typename=void>
706 class EntryMover {
707 public:
708 // a "trap" mover
709 EntryMover(const parent_t&, from_t&, to_t& dst, unsigned) {
710 assert(0);
711 }
712 void move_from(unsigned, unsigned, unsigned) {
713 assert(0);
714 }
715 delta_t get_delta() {
716 return delta_t::nop();
717 }
718 };
719
720 // lower the layout, for instance, from L0 to L1, no reference oid is used
721 template<class parent_t,
722 class from_t,
723 class to_t>
724 class EntryMover<parent_t,
725 from_t,
726 to_t,
727 std::enable_if_t<from_t::node_n < to_t::node_n>>
728 {
729 public:
730 EntryMover(const parent_t&, from_t& src, to_t& dst, unsigned)
731 : src{src}, dst{dst}
732 {}
733 void move_from(unsigned src_first, unsigned dst_first, unsigned n)
734 {
735 ceph::bufferptr keys_buf{n * sizeof(to_t::key_prefix_t)};
736 ceph::bufferptr cells_buf;
737 auto dst_keys = reinterpret_cast<typename to_t::key_prefix_t*>(keys_buf.c_str());
738 if constexpr (to_t::item_in_key) {
739 for (unsigned i = 0; i < n; i++) {
740 const auto& [prefix, suffix] = src.key_at(src_first + i);
741 dst_keys[i].set(suffix, src.item_at(prefix));
742 }
743 } else {
744 // copy keys
745 uint16_t src_offset = src_first > 0 ? src.keys[src_first - 1].offset : 0;
746 uint16_t dst_offset = dst_first > 0 ? dst.keys[dst_first - 1].offset : 0;
747 for (unsigned i = 0; i < n; i++) {
748 auto& src_key = src.keys[src_first + i];
749 uint16_t offset = src_key.offset - src_offset + dst_offset;
750 dst_keys[i].set(src_key, offset);
751 }
752 // copy cells in bulk, yay!
753 auto src_end = src.keys[src_first + n - 1].offset;
754 uint16_t total_cell_size = src_end - src_offset;
755 cells_buf = ceph::bufferptr{total_cell_size};
756 cells_buf.copy_in(0, total_cell_size, src.from_end(src_end));
757 }
758 if (dst_first == dst.count) {
759 dst_delta = delta_t::insert_back(keys_buf, cells_buf);
760 } else {
761 dst_delta = delta_t::insert_front(keys_buf, cells_buf);
762 }
763 if (src_first > 0 && src_first + n == src.count) {
764 src_delta = delta_t::trim_right(src_first);
765 } else if (src_first == 0 && n < src.count) {
766 src_delta = delta_t::shift_left(n);
767 } else if (src_first == 0 && n == src.count) {
768 // the caller will retire the src extent
769 } else {
770 // grab in the middle?
771 assert(0);
772 }
773 }
774
775 delta_t from_delta() {
776 return std::move(src_delta);
777 }
778 delta_t to_delta() {
779 return std::move(dst_delta);
780 }
781 private:
782 const from_t& src;
783 const to_t& dst;
784 delta_t dst_delta;
785 delta_t src_delta;
786 };
787
788 // lift the layout, for instance, from L2 to L0, need a reference oid
789 template<class parent_t,
790 class from_t,
791 class to_t>
792 class EntryMover<parent_t, from_t, to_t,
793 std::enable_if_t<(from_t::node_n > to_t::node_n)>>
794 {
795 public:
796 EntryMover(const parent_t& parent, from_t& src, to_t& dst, unsigned from_slot)
797 : src{src}, dst{dst}, ref_oid{parent->get_oid_at(from_slot, {})}
798 {}
799 void move_from(unsigned src_first, unsigned dst_first, unsigned n)
800 {
801 ceph::bufferptr keys_buf{n * sizeof(to_t::key_prefix_t)};
802 ceph::bufferptr cells_buf;
803 auto dst_keys = reinterpret_cast<typename to_t::key_prefix_t*>(keys_buf.c_str());
804 uint16_t in_node_offset = dst_first > 0 ? dst.keys[dst_first - 1].offset : 0;
805 static_assert(!std::is_same_v<typename to_t::key_suffix_t, empty_key_suffix>);
806 // copy keys
807 uint16_t buf_offset = 0;
808 for (unsigned i = 0; i < n; i++) {
809 auto& src_key = src.keys[src_first + i];
810 if constexpr (std::is_same_v<typename from_t::key_suffix_t, empty_key_suffix>) {
811 // heterogeneous partial key, have to rebuild dst partial key from oid
812 src_key.update_oid(ref_oid);
813 const auto& src_item = src.item_at(src_key);
814 size_t key2_size = to_t::key_suffix_t::size_from(ref_oid);
815 buf_offset += key2_size + size_of(src_item);
816 dst_keys[i].set(ref_oid, in_node_offset + buf_offset);
817 auto p = from_end(cells_buf, buf_offset);
818 auto partial_key = reinterpret_cast<typename to_t::key_suffix_t*>(p);
819 partial_key->set(ref_oid);
820 p += key2_size;
821 auto dst_item = reinterpret_cast<typename to_t::item_t*>(p);
822 *dst_item = src_item;
823 } else {
824 // homogeneous partial key, just update the pointers
825 uint16_t src_offset = src_first > 0 ? src.keys[src_first - 1].offset : 0;
826 uint16_t dst_offset = dst_first > 0 ? dst.keys[dst_first - 1].offset : 0;
827 uint16_t offset = src_key.offset - src_offset + dst_offset;
828 dst_keys[i].set(ref_oid, in_node_offset + offset);
829 }
830 }
831 if constexpr (std::is_same_v<typename to_t::key_suffix_t,
832 typename from_t::key_suffix_t>) {
833 // copy cells in bulk, yay!
834 uint16_t src_offset = src_first > 0 ? src.keys[src_first - 1].offset : 0;
835 uint16_t src_end = src.keys[src_first + n - 1].offset;
836 uint16_t total_cell_size = src_end - src_offset;
837 cells_buf.copy_in(0, total_cell_size, src.from_end(src_end));
838 }
839 if (dst_first == dst.count) {
840 dst_delta = delta_t::insert_back(keys_buf, cells_buf);
841 } else {
842 dst_delta = delta_t::insert_front(keys_buf, cells_buf);
843 }
844 if (src_first + n == src.count && src_first > 0) {
845 src_delta = delta_t::trim_right(src_first);
846 } else {
847 // the caller will retire the src extent
848 assert(src_first == 0);
849 }
850 }
851
852 delta_t from_delta() {
853 return std::move(src_delta);
854 }
855 delta_t to_delta() {
856 return std::move(dst_delta);
857 }
858 private:
859 char* from_end(ceph::bufferptr& ptr, uint16_t offset) {
860 return ptr.end_c_str() - static_cast<int>(offset);
861 }
862 private:
863 const from_t& src;
864 const to_t& dst;
865 delta_t dst_delta;
866 delta_t src_delta;
867 ghobject_t ref_oid;
868 };
869
870 // identical layout, yay!
871 template<class parent_t,
872 class child_t>
873 class EntryMover<parent_t, child_t, child_t>
874 {
875 public:
876 EntryMover(const parent_t&, child_t& src, child_t& dst, unsigned)
877 : src{src}, dst{dst}
878 {}
879
880 void move_from(unsigned src_first, unsigned dst_first, unsigned n)
881 {
882 ceph::bufferptr keys_buf{static_cast<unsigned>(n * sizeof(typename child_t::key_prefix_t))};
883 ceph::bufferptr cells_buf;
884 auto dst_keys = reinterpret_cast<typename child_t::key_prefix_t*>(keys_buf.c_str());
885
886 // copy keys
887 std::copy(src.keys + src_first, src.keys + src_first + n,
888 dst_keys);
889 if constexpr (!child_t::item_in_key) {
890 uint16_t src_offset = src_first > 0 ? src.keys[src_first - 1].offset : 0;
891 uint16_t dst_offset = dst_first > 0 ? dst.keys[dst_first - 1].offset : 0;
892 const int offset_delta = dst_offset - src_offset;
893 // update the pointers
894 for (unsigned i = 0; i < n; i++) {
895 dst_keys[i].offset += offset_delta;
896 }
897 // copy cells in bulk, yay!
898 auto src_end = src.keys[src_first + n - 1].offset;
899 uint16_t total_cell_size = src_end - src_offset;
900 cells_buf = ceph::bufferptr{total_cell_size};
901 cells_buf.copy_in(0, total_cell_size, src.from_end(src_end));
902 }
903 if (dst_first == dst.count) {
904 dst_delta = delta_t::insert_back(std::move(keys_buf), std::move(cells_buf));
905 } else {
906 dst_delta = delta_t::insert_front(std::move(keys_buf), std::move(cells_buf));
907 }
908 if (src_first + n == src.count && src_first > 0) {
909 src_delta = delta_t::trim_right(n);
910 } else if (src_first == 0 && n < src.count) {
911 src_delta = delta_t::shift_left(n);
912 } else if (src_first == 0 && n == src.count) {
913 // the caller will retire the src extent
914 } else {
915 // grab in the middle?
916 assert(0);
917 }
918 }
919
920 delta_t from_delta() {
921 return std::move(src_delta);
922 }
923
924 delta_t to_delta() {
925 return std::move(dst_delta);
926 }
927 private:
928 char* from_end(ceph::bufferptr& ptr, uint16_t offset) {
929 return ptr.end_c_str() - static_cast<int>(offset);
930 }
931 private:
932 const child_t& src;
933 const child_t& dst;
934 delta_t src_delta;
935 delta_t dst_delta;
936 };
937
938 template<class parent_t, class from_t, class to_t>
939 EntryMover<parent_t, from_t, to_t>
940 make_mover(const parent_t& parent, from_t& src, to_t& dst, unsigned from_slot) {
941 return EntryMover<parent_t, from_t, to_t>(parent, src, dst, from_slot);
942 }