]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/btree/fixed_kv_btree.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / btree / fixed_kv_btree.h
CommitLineData
1e59de90
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2// vim: ts=8 sw=2 smarttab expandtab
3
4#pragma once
5
6#include <boost/container/static_vector.hpp>
7#include <sys/mman.h>
8#include <memory>
9#include <string.h>
10
11#include "crimson/os/seastore/logging.h"
12
13#include "crimson/os/seastore/cache.h"
14#include "crimson/os/seastore/seastore_types.h"
15#include "crimson/os/seastore/btree/btree_range_pin.h"
16#include "crimson/os/seastore/root_block.h"
17
18#define RESERVATION_PTR reinterpret_cast<ChildableCachedExtent*>(0x1)
19
20namespace crimson::os::seastore::lba_manager::btree {
21struct lba_map_val_t;
22}
23
24namespace crimson::os::seastore {
25
26bool is_valid_child_ptr(ChildableCachedExtent* child);
27
28template <typename T>
29phy_tree_root_t& get_phy_tree_root(root_t& r);
30
31using get_phy_tree_root_node_ret =
32 std::pair<bool,
33 ::crimson::interruptible::interruptible_future<
34 typename trans_intr::condition, CachedExtentRef>>;
35
36template <typename T, typename key_t>
37const get_phy_tree_root_node_ret get_phy_tree_root_node(
38 const RootBlockRef &root_block,
39 op_context_t<key_t> c);
40
41template <typename ROOT_T>
42void link_phy_tree_root_node(RootBlockRef &root_block, ROOT_T* root_node);
43
44template <typename T>
45void unlink_phy_tree_root_node(RootBlockRef &root_block);
46
47template <typename T>
48Transaction::tree_stats_t& get_tree_stats(Transaction &t);
49
50template <
51 typename node_key_t,
52 typename node_val_t,
53 typename internal_node_t,
54 typename leaf_node_t,
55 typename pin_t,
56 size_t node_size,
57 bool leaf_has_children>
58class FixedKVBtree {
59 static constexpr size_t MAX_DEPTH = 16;
60 using self_type = FixedKVBtree<
61 node_key_t,
62 node_val_t,
63 internal_node_t,
64 leaf_node_t,
65 pin_t,
66 node_size,
67 leaf_has_children>;
68public:
69 using InternalNodeRef = TCachedExtentRef<internal_node_t>;
70 using LeafNodeRef = TCachedExtentRef<leaf_node_t>;
71
72 using base_ertr = crimson::errorator<
73 crimson::ct_error::input_output_error>;
74 using base_iertr = trans_iertr<base_ertr>;
75
76 class iterator;
77 using iterator_fut = base_iertr::future<iterator>;
78
79 using mapped_space_visitor_t = std::function<
80 void(paddr_t, node_key_t, extent_len_t, depth_t, extent_types_t, iterator&)>;
81
82 class iterator {
83 public:
84 iterator(const iterator &rhs) noexcept :
85 internal(rhs.internal), leaf(rhs.leaf) {}
86 iterator(iterator &&rhs) noexcept :
87 internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
88
89 iterator &operator=(const iterator &) = default;
90 iterator &operator=(iterator &&) = default;
91
92 iterator_fut next(
93 op_context_t<node_key_t> c,
94 mapped_space_visitor_t *visitor=nullptr) const
95 {
96 assert_valid();
97 assert(!is_end());
98
99 auto ret = *this;
100 ret.leaf.pos++;
101 if (ret.at_boundary()) {
102 return seastar::do_with(
103 ret,
104 [c, visitor](auto &ret) mutable {
105 return ret.handle_boundary(
106 c, visitor
107 ).si_then([&ret] {
108 return std::move(ret);
109 });
110 });
111 } else {
112 return iterator_fut(
113 interruptible::ready_future_marker{},
114 ret);
115 }
116
117 }
118
119 iterator_fut prev(op_context_t<node_key_t> c) const
120 {
121 assert_valid();
122 assert(!is_begin());
123
124 auto ret = *this;
125
126 if (ret.leaf.pos > 0) {
127 ret.leaf.pos--;
128 return iterator_fut(
129 interruptible::ready_future_marker{},
130 ret);
131 }
132
133 depth_t depth_with_space = 2;
134 for (; depth_with_space <= get_depth(); ++depth_with_space) {
135 if (ret.get_internal(depth_with_space).pos > 0) {
136 break;
137 }
138 }
139
140 assert(depth_with_space <= ret.get_depth()); // must not be begin()
141 return seastar::do_with(
142 std::move(ret),
143 [](const internal_node_t &internal) { return --internal.end(); },
144 [](const leaf_node_t &leaf) { return --leaf.end(); },
145 [c, depth_with_space](auto &ret, auto &li, auto &ll) {
146 for (depth_t depth = 2; depth < depth_with_space; ++depth) {
147 ret.get_internal(depth).reset();
148 }
149 ret.leaf.reset();
150 ret.get_internal(depth_with_space).pos--;
151 // note, cannot result in at_boundary() by construction
152 return lookup_depth_range(
153 c, ret, depth_with_space - 1, 0, li, ll, nullptr
154 ).si_then([&ret] {
155 assert(!ret.at_boundary());
156 return std::move(ret);
157 });
158 });
159 }
160
161 void assert_valid() const {
162 assert(leaf.node);
163 assert(leaf.pos <= leaf.node->get_size());
164
165 for (auto &i: internal) {
166 (void)i;
167 assert(i.node);
168 assert(i.pos < i.node->get_size());
169 }
170 }
171
172 depth_t get_depth() const {
173 return internal.size() + 1;
174 }
175
176 auto &get_internal(depth_t depth) {
177 assert(depth > 1);
178 assert((depth - 2) < internal.size());
179 return internal[depth - 2];
180 }
181
182 const auto &get_internal(depth_t depth) const {
183 assert(depth > 1);
184 assert((depth - 2) < internal.size());
185 return internal[depth - 2];
186 }
187
188 node_key_t get_key() const {
189 assert(!is_end());
190 return leaf.node->iter_idx(leaf.pos).get_key();
191 }
192 node_val_t get_val() const {
193 assert(!is_end());
194 auto ret = leaf.node->iter_idx(leaf.pos).get_val();
195 if constexpr (
196 std::is_same_v<crimson::os::seastore::lba_manager::btree::lba_map_val_t,
197 node_val_t>) {
198 ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
199 }
200 return ret;
201 }
202
203 bool is_end() const {
204 // external methods may only resolve at a boundary if at end
205 return at_boundary();
206 }
207
208 bool is_begin() const {
209 for (auto &i: internal) {
210 if (i.pos != 0)
211 return false;
212 }
213 return leaf.pos == 0;
214 }
215
216 PhysicalNodeMappingRef<node_key_t, typename pin_t::val_type>
217 get_pin(op_context_t<node_key_t> ctx) const {
218 assert(!is_end());
219 auto val = get_val();
220 auto key = get_key();
221 return std::make_unique<pin_t>(
222 ctx,
223 leaf.node,
224 leaf.pos,
225 val,
226 fixed_kv_node_meta_t<node_key_t>{ key, key + val.len, 0 });
227 }
228
229 typename leaf_node_t::Ref get_leaf_node() {
230 return leaf.node;
231 }
232
233 uint16_t get_leaf_pos() {
234 return leaf.pos;
235 }
236 private:
237 iterator() noexcept {}
238 iterator(depth_t depth) noexcept : internal(depth - 1) {}
239
240 friend class FixedKVBtree;
241 static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
242 template <typename NodeType>
243 struct node_position_t {
244 typename NodeType::Ref node;
245 uint16_t pos = INVALID;
246
247 node_position_t() = default;
248 node_position_t(
249 typename NodeType::Ref node,
250 uint16_t pos)
251 : node(node), pos(pos) {}
252
253 void reset() {
254 *this = node_position_t{};
255 }
256
257 auto get_iter() {
258 assert(pos != INVALID);
259 assert(pos < node->get_size());
260 return node->iter_idx(pos);
261 }
262 };
263 boost::container::static_vector<
264 node_position_t<internal_node_t>, MAX_DEPTH> internal;
265 node_position_t<leaf_node_t> leaf;
266
267 bool at_boundary() const {
268 assert(leaf.pos <= leaf.node->get_size());
269 return leaf.pos == leaf.node->get_size();
270 }
271
272 using handle_boundary_ertr = base_iertr;
273 using handle_boundary_ret = handle_boundary_ertr::future<>;
274 handle_boundary_ret handle_boundary(
275 op_context_t<node_key_t> c,
276 mapped_space_visitor_t *visitor)
277 {
278 assert(at_boundary());
279 depth_t depth_with_space = 2;
280 for (; depth_with_space <= get_depth(); ++depth_with_space) {
281 if ((get_internal(depth_with_space).pos + 1) <
282 get_internal(depth_with_space).node->get_size()) {
283 break;
284 }
285 }
286
287 if (depth_with_space <= get_depth()) {
288 return seastar::do_with(
289 [](const internal_node_t &internal) { return internal.begin(); },
290 [](const leaf_node_t &leaf) { return leaf.begin(); },
291 [this, c, depth_with_space, visitor](auto &li, auto &ll) {
292 for (depth_t depth = 2; depth < depth_with_space; ++depth) {
293 get_internal(depth).reset();
294 }
295 leaf.reset();
296 get_internal(depth_with_space).pos++;
297 // note, cannot result in at_boundary() by construction
298 return lookup_depth_range(
299 c, *this, depth_with_space - 1, 0, li, ll, visitor
300 );
301 });
302 } else {
303 // end
304 return seastar::now();
305 }
306 }
307
308 depth_t check_split() const {
309 if (!leaf.node->at_max_capacity()) {
310 return 0;
311 }
312 for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
313 if (!get_internal(split_from + 1).node->at_max_capacity())
314 return split_from;
315 }
316 return get_depth();
317 }
318
319 depth_t check_merge() const {
320 if (!leaf.node->below_min_capacity()) {
321 return 0;
322 }
323 for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
324 if (!get_internal(merge_from + 1).node->below_min_capacity())
325 return merge_from;
326 }
327 return get_depth();
328 }
329 };
330
331 FixedKVBtree(RootBlockRef &root_block) : root_block(root_block) {}
332
333 auto& get_root() {
334 return get_phy_tree_root<self_type>(root_block->get_root());
335 }
336
337 auto& get_root() const {
338 return get_phy_tree_root<self_type>(root_block->get_root());
339 }
340
341 template <typename T>
342 void set_root_node(const TCachedExtentRef<T> &root_node) {
343 static_assert(std::is_base_of_v<typename internal_node_t::base_t, T>);
344 link_phy_tree_root_node(root_block, root_node.get());
345 }
346
347 auto get_root_node(op_context_t<node_key_t> c) const {
348 return get_phy_tree_root_node<self_type>(root_block, c);
349 }
350
351 /// mkfs
352 using mkfs_ret = phy_tree_root_t;
353 static mkfs_ret mkfs(RootBlockRef &root_block, op_context_t<node_key_t> c) {
354 assert(root_block->is_mutation_pending());
355 auto root_leaf = c.cache.template alloc_new_extent<leaf_node_t>(
356 c.trans,
357 node_size,
358 placement_hint_t::HOT,
359 INIT_GENERATION);
360 root_leaf->set_size(0);
361 fixed_kv_node_meta_t<node_key_t> meta{min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, 1};
362 root_leaf->set_meta(meta);
363 root_leaf->range = meta;
364 get_tree_stats<self_type>(c.trans).depth = 1u;
365 get_tree_stats<self_type>(c.trans).extents_num_delta++;
366 link_phy_tree_root_node(root_block, root_leaf.get());
367 return phy_tree_root_t{root_leaf->get_paddr(), 1u};
368 }
369
370 /**
371 * lower_bound
372 *
373 * @param c [in] context
374 * @param addr [in] ddr
375 * @return least iterator >= key
376 */
377 iterator_fut lower_bound(
378 op_context_t<node_key_t> c,
379 node_key_t addr,
380 mapped_space_visitor_t *visitor=nullptr,
381 depth_t min_depth = 1) const
382 {
383 LOG_PREFIX(FixedKVBtree::lower_bound);
384 return lookup(
385 c,
386 [addr](const internal_node_t &internal) {
387 assert(internal.get_size() > 0);
388 auto iter = internal.upper_bound(addr);
389 assert(iter != internal.begin());
390 --iter;
391 return iter;
392 },
393 [FNAME, c, addr](const leaf_node_t &leaf) {
394 auto ret = leaf.lower_bound(addr);
395 SUBTRACET(
396 seastore_fixedkv_tree,
397 "leaf addr {}, got ret offset {}, size {}, end {}",
398 c.trans,
399 addr,
400 ret.get_offset(),
401 leaf.get_size(),
402 ret == leaf.end());
403 return ret;
404 },
405 min_depth,
406 visitor
407 ).si_then([FNAME, c, min_depth](auto &&ret) {
408 SUBTRACET(
409 seastore_fixedkv_tree,
410 "ret.leaf.pos {}",
411 c.trans,
412 ret.leaf.pos);
413 if (min_depth == 1) {
414 ret.assert_valid();
415 }
416 return std::move(ret);
417 });
418 }
419
420
421 /**
422 * upper_bound
423 *
424 * @param c [in] context
425 * @param addr [in] ddr
426 * @return least iterator > key
427 */
428 iterator_fut upper_bound(
429 op_context_t<node_key_t> c,
430 node_key_t addr
431 ) const {
432 return lower_bound(
433 c, addr
434 ).si_then([c, addr](auto iter) {
435 if (!iter.is_end() && iter.get_key() == addr) {
436 return iter.next(c);
437 } else {
438 return iterator_fut(
439 interruptible::ready_future_marker{},
440 iter);
441 }
442 });
443 }
444
445 /**
446 * upper_bound_right
447 *
448 * @param c [in] context
449 * @param addr [in] addr
450 * @return least iterator i s.t. i.get_key() + i.get_val().len > key
451 */
452 iterator_fut upper_bound_right(
453 op_context_t<node_key_t> c,
454 node_key_t addr) const
455 {
456 return lower_bound(
457 c, addr
458 ).si_then([c, addr](auto iter) {
459 if (iter.is_begin()) {
460 return iterator_fut(
461 interruptible::ready_future_marker{},
462 iter);
463 } else {
464 return iter.prev(
465 c
466 ).si_then([iter, addr](auto prev) {
467 if ((prev.get_key() + prev.get_val().len) > addr) {
468 return iterator_fut(
469 interruptible::ready_future_marker{},
470 prev);
471 } else {
472 return iterator_fut(
473 interruptible::ready_future_marker{},
474 iter);
475 }
476 });
477 }
478 });
479 }
480
481 iterator_fut begin(op_context_t<node_key_t> c) const {
482 return lower_bound(c, 0);
483 }
484 iterator_fut end(op_context_t<node_key_t> c) const {
485 return upper_bound(c, min_max_t<node_key_t>::max);
486 }
487
488 template <typename child_node_t, typename node_t>
489 void check_node(
490 op_context_t<node_key_t> c,
491 TCachedExtentRef<node_t> node)
492 {
493 for (auto i : *node) {
494 CachedExtentRef child_node;
495 Transaction::get_extent_ret ret;
496
497 if constexpr (std::is_base_of_v<typename internal_node_t::base_t, child_node_t>) {
498 ret = c.trans.get_extent(
499 i->get_val().maybe_relative_to(node->get_paddr()),
500 &child_node);
501 } else {
502 if constexpr (leaf_has_children) {
503 ret = c.trans.get_extent(
504 i->get_val().paddr.maybe_relative_to(node->get_paddr()),
505 &child_node);
506 }
507 }
508 if (ret == Transaction::get_extent_ret::PRESENT) {
509 if (child_node->is_mutation_pending()) {
510 auto &prior = (child_node_t &)*child_node->prior_instance;
511 assert(prior.is_valid());
512 assert(prior.is_parent_valid());
513 if (node->is_mutation_pending()) {
514 auto &n = node->get_stable_for_key(i->get_key());
515 assert(prior.get_parent_node().get() == &n);
516 auto pos = n.lower_bound_offset(i->get_key());
517 assert(pos < n.get_node_size());
518 assert(n.children[pos] == &prior);
519 } else {
520 assert(prior.get_parent_node().get() == node.get());
521 assert(node->children[i->get_offset()] == &prior);
522 }
523 } else if (child_node->is_initial_pending()) {
524 auto cnode = child_node->template cast<child_node_t>();
525 auto pos = node->find(i->get_key()).get_offset();
526 auto child = node->children[pos];
527 assert(child);
528 assert(child == cnode.get());
529 assert(cnode->is_parent_valid());
530 } else {
531 assert(child_node->is_valid());
532 auto cnode = child_node->template cast<child_node_t>();
533 assert(cnode->has_parent_tracker());
534 if (node->is_pending()) {
535 auto &n = node->get_stable_for_key(i->get_key());
536 assert(cnode->get_parent_node().get() == &n);
537 auto pos = n.lower_bound_offset(i->get_key());
538 assert(pos < n.get_node_size());
539 assert(n.children[pos] == cnode.get());
540 } else {
541 assert(cnode->get_parent_node().get() == node.get());
542 assert(node->children[i->get_offset()] == cnode.get());
543 }
544 }
545 } else if (ret == Transaction::get_extent_ret::ABSENT) {
546 ChildableCachedExtent* child = nullptr;
547 if (node->is_pending()) {
548 auto &n = node->get_stable_for_key(i->get_key());
549 auto pos = n.lower_bound_offset(i->get_key());
550 assert(pos < n.get_node_size());
551 child = n.children[pos];
552 if (is_valid_child_ptr(child)) {
553 auto c = (child_node_t*)child;
554 assert(c->has_parent_tracker());
555 assert(c->get_parent_node().get() == &n);
556 }
557 } else {
558 child = node->children[i->get_offset()];
559 if (is_valid_child_ptr(child)) {
560 auto c = (child_node_t*)child;
561 assert(c->has_parent_tracker());
562 assert(c->get_parent_node().get() == node.get());
563 }
564 }
565
566 if (!is_valid_child_ptr(child)) {
567 if constexpr (
568 std::is_base_of_v<typename internal_node_t::base_t, child_node_t>)
569 {
570 assert(!c.cache.query_cache(i->get_val(), nullptr));
571 } else {
572 if constexpr (leaf_has_children) {
573 assert(!c.cache.query_cache(i->get_val().paddr, nullptr));
574 }
575 }
576 }
577 } else {
578 ceph_abort("impossible");
579 }
580 }
581 }
582
583 using check_child_trackers_ret = base_iertr::future<>;
584 check_child_trackers_ret check_child_trackers(
585 op_context_t<node_key_t> c) {
586 mapped_space_visitor_t checker = [c, this](
587 paddr_t,
588 node_key_t,
589 extent_len_t,
590 depth_t depth,
591 extent_types_t,
592 iterator& iter) {
593 if constexpr (!leaf_has_children) {
594 if (depth == 1) {
595 return seastar::now();
596 }
597 }
598 if (depth > 1) {
599 auto &node = iter.get_internal(depth).node;
600 assert(node->is_valid());
601 check_node<typename internal_node_t::base_t>(c, node);
602 } else {
603 assert(depth == 1);
604 auto &node = iter.leaf.node;
605 assert(node->is_valid());
606 check_node<LogicalCachedExtent>(c, node);
607 }
608 return seastar::now();
609 };
610
611 return seastar::do_with(
612 std::move(checker),
613 [this, c](auto &checker) {
614 return iterate_repeat(
615 c,
616 lower_bound(
617 c,
618 min_max_t<node_key_t>::min,
619 &checker),
620 [](auto &pos) {
621 if (pos.is_end()) {
622 return base_iertr::make_ready_future<
623 seastar::stop_iteration>(
624 seastar::stop_iteration::yes);
625 }
626 return base_iertr::make_ready_future<
627 seastar::stop_iteration>(
628 seastar::stop_iteration::no);
629 },
630 &checker);
631 });
632 }
633
634 using iterate_repeat_ret_inner = base_iertr::future<
635 seastar::stop_iteration>;
636 template <typename F>
637 static base_iertr::future<> iterate_repeat(
638 op_context_t<node_key_t> c,
639 iterator_fut &&iter_fut,
640 F &&f,
641 mapped_space_visitor_t *visitor=nullptr) {
642 return std::move(
643 iter_fut
644 ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
645 return seastar::do_with(
646 iter,
647 std::move(f),
648 [c, visitor](auto &pos, auto &f) {
649 return trans_intr::repeat(
650 [c, visitor, &f, &pos] {
651 return f(
652 pos
653 ).si_then([c, visitor, &pos](auto done) {
654 if (done == seastar::stop_iteration::yes) {
655 return iterate_repeat_ret_inner(
656 interruptible::ready_future_marker{},
657 seastar::stop_iteration::yes);
658 } else {
659 ceph_assert(!pos.is_end());
660 return pos.next(
661 c, visitor
662 ).si_then([&pos](auto next) {
663 pos = next;
664 return iterate_repeat_ret_inner(
665 interruptible::ready_future_marker{},
666 seastar::stop_iteration::no);
667 });
668 }
669 });
670 });
671 });
672 });
673 }
674
675 /**
676 * insert
677 *
678 * Inserts val at laddr with iter as a hint. If element at laddr already
679 * exists returns iterator to that element unchanged and returns false.
680 *
681 * Invalidates all outstanding iterators for this tree on this transaction.
682 *
683 * @param c [in] op context
684 * @param iter [in] hint, insertion constant if immediately prior to iter
685 * @param laddr [in] addr at which to insert
686 * @param val [in] val to insert
687 * @return pair<iter, bool> where iter points to element at addr, bool true
688 * iff element at laddr did not exist.
689 */
690 using insert_iertr = base_iertr;
691 using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
692 insert_ret insert(
693 op_context_t<node_key_t> c,
694 iterator iter,
695 node_key_t laddr,
696 node_val_t val,
697 LogicalCachedExtent* nextent
698 ) {
699 LOG_PREFIX(FixedKVBtree::insert);
700 SUBTRACET(
701 seastore_fixedkv_tree,
702 "inserting laddr {} at iter {}",
703 c.trans,
704 laddr,
705 iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
706 return seastar::do_with(
707 iter,
708 [this, c, laddr, val, nextent](auto &ret) {
709 return find_insertion(
710 c, laddr, ret
711 ).si_then([this, c, laddr, val, &ret, nextent] {
712 if (!ret.at_boundary() && ret.get_key() == laddr) {
713 return insert_ret(
714 interruptible::ready_future_marker{},
715 std::make_pair(ret, false));
716 } else {
717 ++(get_tree_stats<self_type>(c.trans).num_inserts);
718 return handle_split(
719 c, ret
720 ).si_then([c, laddr, val, &ret, nextent] {
721 if (!ret.leaf.node->is_mutable()) {
722 CachedExtentRef mut = c.cache.duplicate_for_write(
723 c.trans, ret.leaf.node
724 );
725 ret.leaf.node = mut->cast<leaf_node_t>();
726 }
727 auto iter = typename leaf_node_t::const_iterator(
728 ret.leaf.node.get(), ret.leaf.pos);
729 assert(iter == ret.leaf.node->lower_bound(laddr));
730 assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
731 assert(laddr >= ret.leaf.node->get_meta().begin &&
732 laddr < ret.leaf.node->get_meta().end);
733 ret.leaf.node->insert(iter, laddr, val, nextent);
734 return insert_ret(
735 interruptible::ready_future_marker{},
736 std::make_pair(ret, true));
737 });
738 }
739 });
740 });
741 }
742
743 insert_ret insert(
744 op_context_t<node_key_t> c,
745 node_key_t laddr,
746 node_val_t val,
747 LogicalCachedExtent* nextent) {
748 return lower_bound(
749 c, laddr
750 ).si_then([this, c, laddr, val, nextent](auto iter) {
751 return this->insert(c, iter, laddr, val, nextent);
752 });
753 }
754
755 /**
756 * update
757 *
758 * Invalidates all outstanding iterators for this tree on this transaction.
759 *
760 * @param c [in] op context
761 * @param iter [in] iterator to element to update, must not be end
762 * @param val [in] val with which to update
763 * @return iterator to newly updated element
764 */
765 using update_iertr = base_iertr;
766 using update_ret = update_iertr::future<iterator>;
767 update_ret update(
768 op_context_t<node_key_t> c,
769 iterator iter,
770 node_val_t val,
771 LogicalCachedExtent* nextent)
772 {
773 LOG_PREFIX(FixedKVBtree::update);
774 SUBTRACET(
775 seastore_fixedkv_tree,
776 "update element at {}",
777 c.trans,
778 iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
779 if (!iter.leaf.node->is_mutable()) {
780 CachedExtentRef mut = c.cache.duplicate_for_write(
781 c.trans, iter.leaf.node
782 );
783 iter.leaf.node = mut->cast<leaf_node_t>();
784 }
785 ++(get_tree_stats<self_type>(c.trans).num_updates);
786 iter.leaf.node->update(
787 iter.leaf.node->iter_idx(iter.leaf.pos),
788 val,
789 nextent);
790 return update_ret(
791 interruptible::ready_future_marker{},
792 iter);
793 }
794
795
796 /**
797 * remove
798 *
799 * Invalidates all outstanding iterators for this tree on this transaction.
800 *
801 * @param c [in] op context
802 * @param iter [in] iterator to element to remove, must not be end
803 */
804 using remove_iertr = base_iertr;
805 using remove_ret = remove_iertr::future<>;
806 remove_ret remove(
807 op_context_t<node_key_t> c,
808 iterator iter)
809 {
810 LOG_PREFIX(FixedKVBtree::remove);
811 SUBTRACET(
812 seastore_fixedkv_tree,
813 "remove element at {}",
814 c.trans,
815 iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
816 assert(!iter.is_end());
817 ++(get_tree_stats<self_type>(c.trans).num_erases);
818 return seastar::do_with(
819 iter,
820 [this, c](auto &ret) {
821 if (!ret.leaf.node->is_mutable()) {
822 CachedExtentRef mut = c.cache.duplicate_for_write(
823 c.trans, ret.leaf.node
824 );
825 ret.leaf.node = mut->cast<leaf_node_t>();
826 }
827 ret.leaf.node->remove(
828 ret.leaf.node->iter_idx(ret.leaf.pos));
829
830 return handle_merge(
831 c, ret
832 );
833 });
834 }
835
836 /**
837 * init_cached_extent
838 *
839 * Checks whether e is live (reachable from fixed kv tree) and drops or initializes
840 * accordingly.
841 *
842 * Returns if e is live.
843 */
844 using init_cached_extent_iertr = base_iertr;
845 using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
846 init_cached_extent_ret init_cached_extent(
847 op_context_t<node_key_t> c,
848 CachedExtentRef e)
849 {
850 assert(!e->is_logical());
851 LOG_PREFIX(FixedKVTree::init_cached_extent);
852 SUBTRACET(seastore_fixedkv_tree, "extent {}", c.trans, *e);
853 if (e->get_type() == internal_node_t::TYPE) {
854 auto eint = e->cast<internal_node_t>();
855 return lower_bound(
856 c, eint->get_node_meta().begin
857 ).si_then([e, c, eint](auto iter) {
858 // Note, this check is valid even if iter.is_end()
859 LOG_PREFIX(FixedKVTree::init_cached_extent);
860 depth_t cand_depth = eint->get_node_meta().depth;
861 if (cand_depth <= iter.get_depth() &&
862 &*iter.get_internal(cand_depth).node == &*eint) {
863 SUBTRACET(
864 seastore_fixedkv_tree,
865 "extent {} is live",
866 c.trans,
867 *eint);
868 return true;
869 } else {
870 SUBTRACET(
871 seastore_fixedkv_tree,
872 "extent {} is not live",
873 c.trans,
874 *eint);
875 return false;
876 }
877 });
878 } else if (e->get_type() == leaf_node_t::TYPE) {
879 auto eleaf = e->cast<leaf_node_t>();
880 return lower_bound(
881 c, eleaf->get_node_meta().begin
882 ).si_then([c, e, eleaf](auto iter) {
883 // Note, this check is valid even if iter.is_end()
884 LOG_PREFIX(FixedKVTree::init_cached_extent);
885 if (iter.leaf.node == &*eleaf) {
886 SUBTRACET(
887 seastore_fixedkv_tree,
888 "extent {} is live",
889 c.trans,
890 *eleaf);
891 return true;
892 } else {
893 SUBTRACET(
894 seastore_fixedkv_tree,
895 "extent {} is not live",
896 c.trans,
897 *eleaf);
898 return false;
899 }
900 });
901 } else {
902 SUBTRACET(
903 seastore_fixedkv_tree,
904 "found other extent {} type {}",
905 c.trans,
906 *e,
907 e->get_type());
908 return init_cached_extent_ret(
909 interruptible::ready_future_marker{},
910 true);
911 }
912 }
913
914 /// get_leaf_if_live: get leaf node at laddr/addr if still live
915 using get_leaf_if_live_iertr = base_iertr;
916 using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
917 get_leaf_if_live_ret get_leaf_if_live(
918 op_context_t<node_key_t> c,
919 paddr_t addr,
920 node_key_t laddr,
921 extent_len_t len)
922 {
923 LOG_PREFIX(FixedKVBtree::get_leaf_if_live);
924 return lower_bound(
925 c, laddr
926 ).si_then([FNAME, c, addr, laddr, len](auto iter) {
927 if (iter.leaf.node->get_paddr() == addr) {
928 SUBTRACET(
929 seastore_fixedkv_tree,
930 "extent laddr {} addr {}~{} found: {}",
931 c.trans,
932 laddr,
933 addr,
934 len,
935 *iter.leaf.node);
936 return CachedExtentRef(iter.leaf.node);
937 } else {
938 SUBTRACET(
939 seastore_fixedkv_tree,
940 "extent laddr {} addr {}~{} is not live, does not match node {}",
941 c.trans,
942 laddr,
943 addr,
944 len,
945 *iter.leaf.node);
946 return CachedExtentRef();
947 }
948 });
949 }
950
951
952 /// get_internal_if_live: get internal node at laddr/addr if still live
953 using get_internal_if_live_iertr = base_iertr;
954 using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
955 get_internal_if_live_ret get_internal_if_live(
956 op_context_t<node_key_t> c,
957 paddr_t addr,
958 node_key_t laddr,
959 extent_len_t len)
960 {
961 LOG_PREFIX(FixedKVBtree::get_internal_if_live);
962 return lower_bound(
963 c, laddr
964 ).si_then([FNAME, c, addr, laddr, len](auto iter) {
965 for (depth_t d = 2; d <= iter.get_depth(); ++d) {
966 CachedExtent &node = *iter.get_internal(d).node;
967 auto internal_node = node.cast<internal_node_t>();
968 if (internal_node->get_paddr() == addr) {
969 SUBTRACET(
970 seastore_fixedkv_tree,
971 "extent laddr {} addr {}~{} found: {}",
972 c.trans,
973 laddr,
974 addr,
975 len,
976 *internal_node);
977 assert(internal_node->get_node_meta().begin == laddr);
978 return CachedExtentRef(internal_node);
979 }
980 }
981 SUBTRACET(
982 seastore_fixedkv_tree,
983 "extent laddr {} addr {}~{} is not live, no matching internal node",
984 c.trans,
985 laddr,
986 addr,
987 len);
988 return CachedExtentRef();
989 });
990 }
991
992
993 /**
994 * rewrite_extent
995 *
996 * Rewrites a fresh copy of extent into transaction and updates internal
997 * references.
998 */
999 using rewrite_extent_iertr = base_iertr;
1000 using rewrite_extent_ret = rewrite_extent_iertr::future<>;
1001 rewrite_extent_ret rewrite_extent(
1002 op_context_t<node_key_t> c,
1003 CachedExtentRef e) {
1004 LOG_PREFIX(FixedKVBtree::rewrite_extent);
1005 assert(is_lba_backref_node(e->get_type()));
1006
1007 auto do_rewrite = [&](auto &fixed_kv_extent) {
1008 auto n_fixed_kv_extent = c.cache.template alloc_new_extent<
1009 std::remove_reference_t<decltype(fixed_kv_extent)>
1010 >(
1011 c.trans,
1012 fixed_kv_extent.get_length(),
1013 fixed_kv_extent.get_user_hint(),
1014 // get target rewrite generation
1015 fixed_kv_extent.get_rewrite_generation());
1016 fixed_kv_extent.get_bptr().copy_out(
1017 0,
1018 fixed_kv_extent.get_length(),
1019 n_fixed_kv_extent->get_bptr().c_str());
1020 n_fixed_kv_extent->set_modify_time(fixed_kv_extent.get_modify_time());
1021 n_fixed_kv_extent->range = n_fixed_kv_extent->get_node_meta();
1022
1023 if (fixed_kv_extent.get_type() == internal_node_t::TYPE ||
1024 leaf_node_t::do_has_children) {
1025 if (!fixed_kv_extent.is_pending()) {
1026 n_fixed_kv_extent->copy_sources.emplace(&fixed_kv_extent);
1027 n_fixed_kv_extent->prior_instance = &fixed_kv_extent;
1028 } else {
1029 ceph_assert(fixed_kv_extent.is_mutation_pending());
1030 n_fixed_kv_extent->copy_sources.emplace(
1031 (typename internal_node_t::base_t*
1032 )fixed_kv_extent.get_prior_instance().get());
1033 n_fixed_kv_extent->children = std::move(fixed_kv_extent.children);
1034 n_fixed_kv_extent->prior_instance = fixed_kv_extent.get_prior_instance();
1035 n_fixed_kv_extent->adjust_ptracker_for_children();
1036 }
1037 }
1038
1039 /* This is a bit underhanded. Any relative addrs here must necessarily
1040 * be record relative as we are rewriting a dirty extent. Thus, we
1041 * are using resolve_relative_addrs with a (likely negative) block
1042 * relative offset to correct them to block-relative offsets adjusted
1043 * for our new transaction location.
1044 *
1045 * Upon commit, these now block relative addresses will be interpretted
1046 * against the real final address.
1047 */
1048 if (!n_fixed_kv_extent->get_paddr().is_absolute()) {
1049 // backend_type_t::SEGMENTED
1050 assert(n_fixed_kv_extent->get_paddr().is_record_relative());
1051 n_fixed_kv_extent->resolve_relative_addrs(
1052 make_record_relative_paddr(0).block_relative_to(
1053 n_fixed_kv_extent->get_paddr()));
1054 } // else: backend_type_t::RANDOM_BLOCK
1055
1056 SUBTRACET(
1057 seastore_fixedkv_tree,
1058 "rewriting {} into {}",
1059 c.trans,
1060 fixed_kv_extent,
1061 *n_fixed_kv_extent);
1062
1063 return update_internal_mapping(
1064 c,
1065 n_fixed_kv_extent->get_node_meta().depth,
1066 n_fixed_kv_extent->get_node_meta().begin,
1067 e->get_paddr(),
1068 n_fixed_kv_extent->get_paddr(),
1069 n_fixed_kv_extent
1070 ).si_then([c, e] {
1071 c.cache.retire_extent(c.trans, e);
1072 });
1073 };
1074
1075 CachedExtentRef n_fixed_kv_extent;
1076 if (e->get_type() == internal_node_t::TYPE) {
1077 auto lint = e->cast<internal_node_t>();
1078 return do_rewrite(*lint);
1079 } else {
1080 assert(e->get_type() == leaf_node_t::TYPE);
1081 auto lleaf = e->cast<leaf_node_t>();
1082 return do_rewrite(*lleaf);
1083 }
1084 }
1085
1086 using update_internal_mapping_iertr = base_iertr;
1087 using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
1088 update_internal_mapping_ret update_internal_mapping(
1089 op_context_t<node_key_t> c,
1090 depth_t depth,
1091 node_key_t laddr,
1092 paddr_t old_addr,
1093 paddr_t new_addr,
1094 typename internal_node_t::base_ref nextent)
1095 {
1096 LOG_PREFIX(FixedKVBtree::update_internal_mapping);
1097 SUBTRACET(
1098 seastore_fixedkv_tree,
1099 "updating laddr {} at depth {} from {} to {}, nextent {}",
1100 c.trans,
1101 laddr,
1102 depth,
1103 old_addr,
1104 new_addr,
1105 *nextent);
1106
1107 return lower_bound(
1108 c, laddr, nullptr, depth + 1
1109 ).si_then([=, this](auto iter) {
1110 assert(iter.get_depth() >= depth);
1111 if (depth == iter.get_depth()) {
1112 SUBTRACET(seastore_fixedkv_tree, "update at root", c.trans);
1113
1114 if (laddr != min_max_t<node_key_t>::min) {
1115 SUBERRORT(
1116 seastore_fixedkv_tree,
1117 "updating root laddr {} at depth {} from {} to {},"
1118 "laddr is not 0",
1119 c.trans,
1120 laddr,
1121 depth,
1122 old_addr,
1123 new_addr,
1124 get_root().get_location());
1125 ceph_assert(0 == "impossible");
1126 }
1127
1128 if (get_root().get_location() != old_addr) {
1129 SUBERRORT(
1130 seastore_fixedkv_tree,
1131 "updating root laddr {} at depth {} from {} to {},"
1132 "root addr {} does not match",
1133 c.trans,
1134 laddr,
1135 depth,
1136 old_addr,
1137 new_addr,
1138 get_root().get_location());
1139 ceph_assert(0 == "impossible");
1140 }
1141
1142 root_block = c.cache.duplicate_for_write(
1143 c.trans, root_block)->template cast<RootBlock>();
1144 get_root().set_location(new_addr);
1145 set_root_node(nextent);
1146 } else {
1147 auto &parent = iter.get_internal(depth + 1);
1148 assert(parent.node);
1149 assert(parent.pos < parent.node->get_size());
1150 auto piter = parent.node->iter_idx(parent.pos);
1151
1152 if (piter->get_key() != laddr) {
1153 SUBERRORT(
1154 seastore_fixedkv_tree,
1155 "updating laddr {} at depth {} from {} to {},"
1156 "node {} pos {} val pivot addr {} does not match",
1157 c.trans,
1158 laddr,
1159 depth,
1160 old_addr,
1161 new_addr,
1162 *(parent.node),
1163 parent.pos,
1164 piter->get_key());
1165 ceph_assert(0 == "impossible");
1166 }
1167
1168
1169 if (piter->get_val() != old_addr) {
1170 SUBERRORT(
1171 seastore_fixedkv_tree,
1172 "updating laddr {} at depth {} from {} to {},"
1173 "node {} pos {} val addr {} does not match",
1174 c.trans,
1175 laddr,
1176 depth,
1177 old_addr,
1178 new_addr,
1179 *(parent.node),
1180 parent.pos,
1181 piter->get_val());
1182 ceph_assert(0 == "impossible");
1183 }
1184
1185 CachedExtentRef mut = c.cache.duplicate_for_write(
1186 c.trans,
1187 parent.node
1188 );
1189 typename internal_node_t::Ref mparent = mut->cast<internal_node_t>();
1190 mparent->update(piter, new_addr, nextent.get());
1191
1192 /* Note, iter is now invalid as we didn't udpate either the parent
1193 * node reference to the new mutable instance nor did we update the
1194 * child pointer to the new node. Not a problem as we'll now just
1195 * destruct it.
1196 */
1197 }
1198 return seastar::now();
1199 });
1200 }
1201
1202
1203private:
1204 RootBlockRef root_block;
1205
1206 template <typename T>
1207 using node_position_t = typename iterator::template node_position_t<T>;
1208
1209 using get_internal_node_iertr = base_iertr;
1210 using get_internal_node_ret = get_internal_node_iertr::future<InternalNodeRef>;
1211 static get_internal_node_ret get_internal_node(
1212 op_context_t<node_key_t> c,
1213 depth_t depth,
1214 paddr_t offset,
1215 node_key_t begin,
1216 node_key_t end,
1217 typename std::optional<node_position_t<internal_node_t>> parent_pos)
1218 {
1219 LOG_PREFIX(FixedKVBtree::get_internal_node);
1220 SUBTRACET(
1221 seastore_fixedkv_tree,
1222 "reading internal at offset {}, depth {}, begin {}, end {}",
1223 c.trans,
1224 offset,
1225 depth,
1226 begin,
1227 end);
1228 assert(depth > 1);
1229 auto init_internal = [c, depth, begin, end,
1230 parent_pos=std::move(parent_pos)]
1231 (internal_node_t &node) {
1232 assert(!node.is_pending());
1233 assert(!node.is_linked());
1234 node.range = fixed_kv_node_meta_t<node_key_t>{begin, end, depth};
1235 if (parent_pos) {
1236 auto &parent = parent_pos->node;
1237 parent->link_child(&node, parent_pos->pos);
1238 } else {
1239 assert(node.range.is_root());
1240 auto root_block = c.cache.get_root_fast(c.trans);
1241 if (root_block->is_mutation_pending()) {
1242 auto &stable_root = (RootBlockRef&)*root_block->get_prior_instance();
1243 link_phy_tree_root_node(stable_root, &node);
1244 } else {
1245 assert(!root_block->is_pending());
1246 link_phy_tree_root_node(root_block, &node);
1247 }
1248 }
1249 };
1250 return c.cache.template get_absent_extent<internal_node_t>(
1251 c.trans,
1252 offset,
1253 node_size,
1254 init_internal
1255 ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
1256 typename internal_node_t::Ref ret) {
1257 SUBTRACET(
1258 seastore_fixedkv_tree,
1259 "read internal at offset {} {}",
1260 c.trans,
1261 offset,
1262 *ret);
1263 // This can only happen during init_cached_extent
1264 // or when backref extent being rewritten by gc space reclaiming
1265 if (!ret->is_pending() && !ret->is_linked()) {
1266 assert(ret->is_dirty()
1267 || (is_backref_node(ret->get_type())
1268 && ret->is_clean()));
1269 init_internal(*ret);
1270 }
1271 auto meta = ret->get_meta();
1272 if (ret->get_size()) {
1273 ceph_assert(meta.begin <= ret->begin()->get_key());
1274 ceph_assert(meta.end > (ret->end() - 1)->get_key());
1275 }
1276 ceph_assert(depth == meta.depth);
1277 ceph_assert(begin == meta.begin);
1278 ceph_assert(end == meta.end);
1279 return get_internal_node_ret(
1280 interruptible::ready_future_marker{},
1281 ret);
1282 });
1283 }
1284
1285
1286 using get_leaf_node_iertr = base_iertr;
1287 using get_leaf_node_ret = get_leaf_node_iertr::future<LeafNodeRef>;
1288 static get_leaf_node_ret get_leaf_node(
1289 op_context_t<node_key_t> c,
1290 paddr_t offset,
1291 node_key_t begin,
1292 node_key_t end,
1293 typename std::optional<node_position_t<leaf_node_t>> parent_pos)
1294 {
1295 LOG_PREFIX(FixedKVBtree::get_leaf_node);
1296 SUBTRACET(
1297 seastore_fixedkv_tree,
1298 "reading leaf at offset {}, begin {}, end {}",
1299 c.trans,
1300 offset,
1301 begin,
1302 end);
1303 auto init_leaf = [c, begin, end,
1304 parent_pos=std::move(parent_pos)]
1305 (leaf_node_t &node) {
1306 assert(!node.is_pending());
1307 assert(!node.is_linked());
1308 node.range = fixed_kv_node_meta_t<node_key_t>{begin, end, 1};
1309 if (parent_pos) {
1310 auto &parent = parent_pos->node;
1311 parent->link_child(&node, parent_pos->pos);
1312 } else {
1313 assert(node.range.is_root());
1314 auto root_block = c.cache.get_root_fast(c.trans);
1315 if (root_block->is_mutation_pending()) {
1316 auto &stable_root = (RootBlockRef&)*root_block->get_prior_instance();
1317 link_phy_tree_root_node(stable_root, &node);
1318 } else {
1319 assert(!root_block->is_pending());
1320 link_phy_tree_root_node(root_block, &node);
1321 }
1322 }
1323 };
1324 return c.cache.template get_absent_extent<leaf_node_t>(
1325 c.trans,
1326 offset,
1327 node_size,
1328 init_leaf
1329 ).si_then([FNAME, c, offset, init_leaf, begin, end]
1330 (typename leaf_node_t::Ref ret) {
1331 SUBTRACET(
1332 seastore_fixedkv_tree,
1333 "read leaf at offset {} {}",
1334 c.trans,
1335 offset,
1336 *ret);
1337 // This can only happen during init_cached_extent
1338 // or when backref extent being rewritten by gc space reclaiming
1339 if (!ret->is_pending() && !ret->is_linked()) {
1340 assert(ret->is_dirty()
1341 || (is_backref_node(ret->get_type())
1342 && ret->is_clean()));
1343 init_leaf(*ret);
1344 }
1345 auto meta = ret->get_meta();
1346 if (ret->get_size()) {
1347 ceph_assert(meta.begin <= ret->begin()->get_key());
1348 ceph_assert(meta.end > (ret->end() - 1)->get_key());
1349 }
1350 ceph_assert(1 == meta.depth);
1351 ceph_assert(begin == meta.begin);
1352 ceph_assert(end == meta.end);
1353 return get_leaf_node_ret(
1354 interruptible::ready_future_marker{},
1355 ret);
1356 });
1357 }
1358
1359 using lookup_root_iertr = base_iertr;
1360 using lookup_root_ret = lookup_root_iertr::future<>;
1361 lookup_root_ret lookup_root(
1362 op_context_t<node_key_t> c,
1363 iterator &iter,
1364 mapped_space_visitor_t *visitor) const {
1365 LOG_PREFIX(FixedKVBtree::lookup_root);
1366 SUBTRACET(seastore_fixedkv_tree,
1367 "looking up root on {}",
1368 c.trans,
1369 *root_block);
1370 auto [found, fut] = get_root_node(c);
1371
1372 auto on_found_internal =
1373 [this, visitor, &iter](InternalNodeRef &root_node) {
1374 iter.get_internal(get_root().get_depth()).node = root_node;
1375 if (visitor) (*visitor)(
1376 root_node->get_paddr(),
1377 root_node->get_node_meta().begin,
1378 root_node->get_length(),
1379 get_root().get_depth(),
1380 internal_node_t::TYPE,
1381 iter);
1382 return lookup_root_iertr::now();
1383 };
1384 auto on_found_leaf =
1385 [visitor, &iter, this](LeafNodeRef root_node) {
1386 iter.leaf.node = root_node;
1387 if (visitor) (*visitor)(
1388 root_node->get_paddr(),
1389 root_node->get_node_meta().begin,
1390 root_node->get_length(),
1391 get_root().get_depth(),
1392 leaf_node_t::TYPE,
1393 iter);
1394 return lookup_root_iertr::now();
1395 };
1396
1397 if (found) {
1398 return fut.then_interruptible(
1399 [this, c, on_found_internal=std::move(on_found_internal),
1400 on_found_leaf=std::move(on_found_leaf)](auto root) {
1401 LOG_PREFIX(FixedKVBtree::lookup_root);
1402 ceph_assert(root);
1403 SUBTRACET(seastore_fixedkv_tree,
1404 "got root node on {}, res: {}",
1405 c.trans,
1406 *root_block,
1407 *root);
1408
1409 if (get_root().get_depth() > 1) {
1410 auto root_node = root->template cast<internal_node_t>();
1411 return on_found_internal(root_node);
1412 } else {
1413 auto root_node = root->template cast<leaf_node_t>();
1414 return on_found_leaf(root_node);
1415 }
1416 });
1417 } else {
1418 if (get_root().get_depth() > 1) {
1419 return get_internal_node(
1420 c,
1421 get_root().get_depth(),
1422 get_root().get_location(),
1423 min_max_t<node_key_t>::min,
1424 min_max_t<node_key_t>::max,
1425 std::nullopt
1426 ).si_then([on_found=std::move(on_found_internal)](InternalNodeRef root_node) {
1427 return on_found(root_node);
1428 });
1429 } else {
1430 return get_leaf_node(
1431 c,
1432 get_root().get_location(),
1433 min_max_t<node_key_t>::min,
1434 min_max_t<node_key_t>::max,
1435 std::nullopt
1436 ).si_then([on_found=std::move(on_found_leaf)](LeafNodeRef root_node) {
1437 return on_found(root_node);
1438 });
1439 }
1440 }
1441 }
1442
1443 using lookup_internal_level_iertr = base_iertr;
1444 using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
1445 template <typename F>
1446 static lookup_internal_level_ret lookup_internal_level(
1447 op_context_t<node_key_t> c,
1448 depth_t depth,
1449 iterator &iter,
1450 F &f,
1451 mapped_space_visitor_t *visitor
1452 ) {
1453 assert(depth > 1);
1454 auto &parent_entry = iter.get_internal(depth + 1);
1455 auto parent = parent_entry.node;
1456 auto node_iter = parent->iter_idx(parent_entry.pos);
1457
1458 auto on_found = [depth, visitor, &iter, &f](InternalNodeRef node) {
1459 auto &entry = iter.get_internal(depth);
1460 entry.node = node;
1461 auto node_iter = f(*node);
1462 assert(node_iter != node->end());
1463 entry.pos = node_iter->get_offset();
1464 if (visitor)
1465 (*visitor)(
1466 node->get_paddr(),
1467 node->get_node_meta().begin,
1468 node->get_length(),
1469 depth,
1470 node->get_type(),
1471 iter);
1472 return seastar::now();
1473 };
1474
1475 auto v = parent->template get_child<internal_node_t>(c, node_iter);
1476 if (v.has_child()) {
1477 return v.get_child_fut().then(
1478 [on_found=std::move(on_found), node_iter, c,
1479 parent_entry](auto child) mutable {
1480 LOG_PREFIX(FixedKVBtree::lookup_internal_level);
1481 SUBTRACET(seastore_fixedkv_tree,
1482 "got child on {}, pos: {}, res: {}",
1483 c.trans,
1484 *parent_entry.node,
1485 parent_entry.pos,
1486 *child);
1487 auto &cnode = (typename internal_node_t::base_t &)*child;
1488 assert(cnode.get_node_meta().begin == node_iter.get_key());
1489 assert(cnode.get_node_meta().end > node_iter.get_key());
1490 return on_found(child->template cast<internal_node_t>());
1491 });
1492 }
1493
1494 auto child_pos = v.get_child_pos();
1495 auto next_iter = node_iter + 1;
1496 auto begin = node_iter->get_key();
1497 auto end = next_iter == parent->end()
1498 ? parent->get_node_meta().end
1499 : next_iter->get_key();
1500 return get_internal_node(
1501 c,
1502 depth,
1503 node_iter->get_val().maybe_relative_to(parent->get_paddr()),
1504 begin,
1505 end,
1506 std::make_optional<node_position_t<internal_node_t>>(
1507 child_pos.template get_parent<internal_node_t>(),
1508 child_pos.get_pos())
1509 ).si_then([on_found=std::move(on_found)](InternalNodeRef node) {
1510 return on_found(node);
1511 });
1512 }
1513
1514 using lookup_leaf_iertr = base_iertr;
1515 using lookup_leaf_ret = lookup_leaf_iertr::future<>;
1516 template <typename F>
1517 static lookup_internal_level_ret lookup_leaf(
1518 op_context_t<node_key_t> c,
1519 iterator &iter,
1520 F &f,
1521 mapped_space_visitor_t *visitor
1522 ) {
1523 auto &parent_entry = iter.get_internal(2);
1524 auto parent = parent_entry.node;
1525 assert(parent);
1526 auto node_iter = parent->iter_idx(parent_entry.pos);
1527
1528 auto on_found = [visitor, &iter, &f](LeafNodeRef node) {
1529 iter.leaf.node = node;
1530 auto node_iter = f(*node);
1531 iter.leaf.pos = node_iter->get_offset();
1532 if (visitor)
1533 (*visitor)(
1534 node->get_paddr(),
1535 node->get_node_meta().begin,
1536 node->get_length(),
1537 1,
1538 node->get_type(),
1539 iter);
1540 return seastar::now();
1541 };
1542
1543 auto v = parent->template get_child<leaf_node_t>(c, node_iter);
1544 if (v.has_child()) {
1545 return v.get_child_fut().then(
1546 [on_found=std::move(on_found), node_iter, c,
1547 parent_entry](auto child) mutable {
1548 LOG_PREFIX(FixedKVBtree::lookup_leaf);
1549 SUBTRACET(seastore_fixedkv_tree,
1550 "got child on {}, pos: {}, res: {}",
1551 c.trans,
1552 *parent_entry.node,
1553 parent_entry.pos,
1554 *child);
1555 auto &cnode = (typename internal_node_t::base_t &)*child;
1556 assert(cnode.get_node_meta().begin == node_iter.get_key());
1557 assert(cnode.get_node_meta().end > node_iter.get_key());
1558 return on_found(child->template cast<leaf_node_t>());
1559 });
1560 }
1561
1562 auto child_pos = v.get_child_pos();
1563 auto next_iter = node_iter + 1;
1564 auto begin = node_iter->get_key();
1565 auto end = next_iter == parent->end()
1566 ? parent->get_node_meta().end
1567 : next_iter->get_key();
1568
1569 return get_leaf_node(
1570 c,
1571 node_iter->get_val().maybe_relative_to(parent->get_paddr()),
1572 begin,
1573 end,
1574 std::make_optional<node_position_t<leaf_node_t>>(
1575 child_pos.template get_parent<leaf_node_t>(),
1576 child_pos.get_pos())
1577 ).si_then([on_found=std::move(on_found)](LeafNodeRef node) {
1578 return on_found(node);
1579 });
1580 }
1581
1582 /**
1583 * lookup_depth_range
1584 *
1585 * Performs node lookups on depths [from, to) using li and ll to
1586 * specific target at each level. Note, may leave the iterator
1587 * at_boundary(), call handle_boundary() prior to returning out
1588 * lf FixedKVBtree.
1589 */
1590 using lookup_depth_range_iertr = base_iertr;
1591 using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
1592 template <typename LI, typename LL>
1593 static lookup_depth_range_ret lookup_depth_range(
1594 op_context_t<node_key_t> c, ///< [in] context
1595 iterator &iter, ///< [in,out] iterator to populate
1596 depth_t from, ///< [in] from inclusive
1597 depth_t to, ///< [in] to exclusive, (to <= from, to == from is a noop)
1598 LI &li, ///< [in] internal->iterator
1599 LL &ll, ///< [in] leaf->iterator
1600 mapped_space_visitor_t *visitor ///< [in] mapped space visitor
1601 ) {
1602 LOG_PREFIX(FixedKVBtree::lookup_depth_range);
1603 SUBTRACET(seastore_fixedkv_tree, "{} -> {}", c.trans, from, to);
1604 return seastar::do_with(
1605 from,
1606 [c, to, visitor, &iter, &li, &ll](auto &d) {
1607 return trans_intr::repeat(
1608 [c, to, visitor, &iter, &li, &ll, &d] {
1609 if (d > to) {
1610 return [&] {
1611 if (d > 1) {
1612 return lookup_internal_level(
1613 c,
1614 d,
1615 iter,
1616 li,
1617 visitor);
1618 } else {
1619 assert(d == 1);
1620 return lookup_leaf(
1621 c,
1622 iter,
1623 ll,
1624 visitor);
1625 }
1626 }().si_then([&d] {
1627 --d;
1628 return lookup_depth_range_iertr::make_ready_future<
1629 seastar::stop_iteration
1630 >(seastar::stop_iteration::no);
1631 });
1632 } else {
1633 return lookup_depth_range_iertr::make_ready_future<
1634 seastar::stop_iteration
1635 >(seastar::stop_iteration::yes);
1636 }
1637 });
1638 });
1639 }
1640
1641 using lookup_iertr = base_iertr;
1642 using lookup_ret = lookup_iertr::future<iterator>;
1643 template <typename LI, typename LL>
1644 lookup_ret lookup(
1645 op_context_t<node_key_t> c,
1646 LI &&lookup_internal,
1647 LL &&lookup_leaf,
1648 depth_t min_depth,
1649 mapped_space_visitor_t *visitor
1650 ) const {
1651 LOG_PREFIX(FixedKVBtree::lookup);
1652 assert(min_depth > 0);
1653 return seastar::do_with(
1654 iterator{get_root().get_depth()},
1655 std::forward<LI>(lookup_internal),
1656 std::forward<LL>(lookup_leaf),
1657 [FNAME, this, visitor, c, min_depth](auto &iter, auto &li, auto &ll) {
1658 return lookup_root(
1659 c, iter, visitor
1660 ).si_then([FNAME, this, visitor, c, &iter, &li, &ll, min_depth] {
1661 if (iter.get_depth() > 1) {
1662 auto &root_entry = *(iter.internal.rbegin());
1663 root_entry.pos = li(*(root_entry.node)).get_offset();
1664 } else {
1665 auto &root_entry = iter.leaf;
1666 auto riter = ll(*(root_entry.node));
1667 root_entry.pos = riter->get_offset();
1668 }
1669 SUBTRACET(seastore_fixedkv_tree, "got root, depth {}",
1670 c.trans, get_root().get_depth());
1671 return lookup_depth_range(
1672 c,
1673 iter,
1674 get_root().get_depth() - 1,
1675 min_depth - 1,
1676 li,
1677 ll,
1678 visitor
1679 ).si_then([c, visitor, &iter, min_depth] {
1680 // It's only when the lookup is triggered by
1681 // update_internal_mapping() that min_depth is
1682 // NOT 1
1683 if (min_depth == 1 && iter.at_boundary()) {
1684 return iter.handle_boundary(c, visitor);
1685 } else {
1686 return lookup_iertr::now();
1687 }
1688 });
1689 }).si_then([&iter] {
1690 return std::move(iter);
1691 });
1692 });
1693 }
1694
1695 /**
1696 * find_insertion
1697 *
1698 * Prepare iter for insertion. iter should begin pointing at
1699 * the valid insertion point (lower_bound(laddr)).
1700 *
1701 * Upon completion, iter will point at the
1702 * position at which laddr should be inserted. iter may, upon completion,
1703 * point at the end of a leaf other than the end leaf if that's the correct
1704 * insertion point.
1705 */
1706 using find_insertion_iertr = base_iertr;
1707 using find_insertion_ret = find_insertion_iertr::future<>;
1708 static find_insertion_ret find_insertion(
1709 op_context_t<node_key_t> c,
1710 node_key_t laddr,
1711 iterator &iter)
1712 {
1713 assert(iter.is_end() || iter.get_key() >= laddr);
1714 if (!iter.is_end() && iter.get_key() == laddr) {
1715 return seastar::now();
1716 } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
1717#ifndef NDEBUG
1718 auto p = iter;
1719 if (p.leaf.pos > 0) {
1720 --p.leaf.pos;
1721 assert(p.get_key() < laddr);
1722 }
1723#endif
1724 return seastar::now();
1725 } else {
1726 assert(iter.leaf.pos == 0);
1727 return iter.prev(
1728 c
1729 ).si_then([laddr, &iter](auto p) {
1730 boost::ignore_unused(laddr); // avoid clang warning;
1731 assert(p.leaf.node->get_node_meta().begin <= laddr);
1732 assert(p.get_key() < laddr);
1733 // Note, this is specifically allowed to violate the iterator
1734 // invariant that pos is a valid index for the node in the event
1735 // that the insertion point is at the end of a node.
1736 p.leaf.pos++;
1737 assert(p.at_boundary());
1738 iter = p;
1739 return seastar::now();
1740 });
1741 }
1742 }
1743
1744 /**
1745 * handle_split
1746 *
1747 * Split nodes in iter as needed for insertion. First, scan iter from leaf
1748 * to find first non-full level. Then, split from there towards leaf.
1749 *
1750 * Upon completion, iter will point at the newly split insertion point. As
1751 * with find_insertion, iter's leaf pointer may be end without iter being
1752 * end.
1753 */
1754 using handle_split_iertr = base_iertr;
1755 using handle_split_ret = handle_split_iertr::future<>;
1756 handle_split_ret handle_split(
1757 op_context_t<node_key_t> c,
1758 iterator &iter)
1759 {
1760 LOG_PREFIX(FixedKVBtree::handle_split);
1761
1762 depth_t split_from = iter.check_split();
1763
1764 SUBTRACET(seastore_fixedkv_tree, "split_from {}, depth {}", c.trans, split_from, iter.get_depth());
1765
1766 if (split_from == iter.get_depth()) {
1767 auto nroot = c.cache.template alloc_new_extent<internal_node_t>(
1768 c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
1769 fixed_kv_node_meta_t<node_key_t> meta{
1770 min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, iter.get_depth() + 1};
1771 nroot->set_meta(meta);
1772 nroot->range = meta;
1773 nroot->journal_insert(
1774 nroot->begin(),
1775 min_max_t<node_key_t>::min,
1776 get_root().get_location(),
1777 nullptr);
1778 iter.internal.push_back({nroot, 0});
1779
1780 get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
1781 get_tree_stats<self_type>(c.trans).extents_num_delta++;
1782
1783 root_block = c.cache.duplicate_for_write(
1784 c.trans, root_block)->template cast<RootBlock>();
1785 get_root().set_location(nroot->get_paddr());
1786 get_root().set_depth(iter.get_depth());
1787 ceph_assert(get_root().get_depth() <= MAX_FIXEDKVBTREE_DEPTH);
1788 set_root_node(nroot);
1789 }
1790
1791 /* pos may be either node_position_t<leaf_node_t> or
1792 * node_position_t<internal_node_t> */
1793 auto split_level = [&](auto &parent_pos, auto &pos) {
1794 LOG_PREFIX(FixedKVBtree::handle_split);
1795 auto [left, right, pivot] = pos.node->make_split_children(c);
1796
1797 auto parent_node = parent_pos.node;
1798 auto parent_iter = parent_pos.get_iter();
1799
1800 parent_node->update(
1801 parent_iter,
1802 left->get_paddr(),
1803 left.get());
1804 parent_node->insert(
1805 parent_iter + 1,
1806 pivot,
1807 right->get_paddr(),
1808 right.get());
1809
1810 SUBTRACET(
1811 seastore_fixedkv_tree,
1812 "splitted {} into left: {}, right: {}",
1813 c.trans,
1814 *pos.node,
1815 *left,
1816 *right);
1817 c.cache.retire_extent(c.trans, pos.node);
1818
1819 get_tree_stats<self_type>(c.trans).extents_num_delta++;
1820 return std::make_pair(left, right);
1821 };
1822
1823 for (; split_from > 0; --split_from) {
1824 auto &parent_pos = iter.get_internal(split_from + 1);
1825 if (!parent_pos.node->is_mutable()) {
1826 parent_pos.node = c.cache.duplicate_for_write(
1827 c.trans, parent_pos.node
1828 )->template cast<internal_node_t>();
1829 }
1830
1831 if (split_from > 1) {
1832 auto &pos = iter.get_internal(split_from);
1833 SUBTRACET(
1834 seastore_fixedkv_tree,
1835 "splitting internal {} at depth {}, parent: {} at pos: {}",
1836 c.trans,
1837 *pos.node,
1838 split_from,
1839 *parent_pos.node,
1840 parent_pos.pos);
1841 auto [left, right] = split_level(parent_pos, pos);
1842
1843 if (pos.pos < left->get_size()) {
1844 pos.node = left;
1845 } else {
1846 pos.node = right;
1847 pos.pos -= left->get_size();
1848
1849 parent_pos.pos += 1;
1850 }
1851 } else {
1852 auto &pos = iter.leaf;
1853 SUBTRACET(
1854 seastore_fixedkv_tree,
1855 "splitting leaf {}, parent: {} at pos: {}",
1856 c.trans,
1857 *pos.node,
1858 *parent_pos.node,
1859 parent_pos.pos);
1860 auto [left, right] = split_level(parent_pos, pos);
1861
1862 /* right->get_node_meta().begin == pivot == right->begin()->get_key()
1863 * Thus, if pos.pos == left->get_size(), we want iter to point to
1864 * left with pos.pos at the end rather than right with pos.pos = 0
1865 * since the insertion would be to the left of the first element
1866 * of right and thus necessarily less than right->get_node_meta().begin.
1867 */
1868 if (pos.pos <= left->get_size()) {
1869 pos.node = left;
1870 } else {
1871 pos.node = right;
1872 pos.pos -= left->get_size();
1873
1874 parent_pos.pos += 1;
1875 }
1876 }
1877 }
1878
1879 return seastar::now();
1880 }
1881
1882
1883 using handle_merge_iertr = base_iertr;
1884 using handle_merge_ret = handle_merge_iertr::future<>;
1885 handle_merge_ret handle_merge(
1886 op_context_t<node_key_t> c,
1887 iterator &iter)
1888 {
1889 LOG_PREFIX(FixedKVBtree::handle_merge);
1890 if (iter.get_depth() == 1 ||
1891 !iter.leaf.node->below_min_capacity()) {
1892 SUBTRACET(
1893 seastore_fixedkv_tree,
1894 "no need to merge leaf, leaf size {}, depth {}",
1895 c.trans,
1896 iter.leaf.node->get_size(),
1897 iter.get_depth());
1898 return seastar::now();
1899 }
1900
1901 return seastar::do_with(
1902 depth_t{1},
1903 [FNAME, this, c, &iter](auto &to_merge) {
1904 return trans_intr::repeat(
1905 [FNAME, this, c, &iter, &to_merge] {
1906 SUBTRACET(
1907 seastore_fixedkv_tree,
1908 "merging depth {}",
1909 c.trans,
1910 to_merge);
1911 auto &parent_pos = iter.get_internal(to_merge + 1);
1912 auto merge_fut = handle_merge_iertr::now();
1913 if (to_merge > 1) {
1914 auto &pos = iter.get_internal(to_merge);
1915 merge_fut = merge_level(c, to_merge, parent_pos, pos);
1916 } else {
1917 auto &pos = iter.leaf;
1918 merge_fut = merge_level(c, to_merge, parent_pos, pos);
1919 }
1920
1921 return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
1922 ++to_merge;
1923 auto &pos = iter.get_internal(to_merge);
1924 if (to_merge == iter.get_depth()) {
1925 if (pos.node->get_size() == 1) {
1926 SUBTRACET(seastore_fixedkv_tree, "collapsing root", c.trans);
1927 c.cache.retire_extent(c.trans, pos.node);
1928 assert(pos.pos == 0);
1929 auto node_iter = pos.get_iter();
1930 iter.internal.pop_back();
1931 get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
1932 get_tree_stats<self_type>(c.trans).extents_num_delta--;
1933
1934 root_block = c.cache.duplicate_for_write(
1935 c.trans, root_block
1936 )->template cast<RootBlock>();
1937 get_root().set_location(
1938 node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
1939 get_root().set_depth(iter.get_depth());
1940 if (iter.get_depth() > 1) {
1941 auto root_node = iter.get_internal(iter.get_depth()).node;
1942 set_root_node(root_node);
1943 } else {
1944 set_root_node(iter.leaf.node);
1945 }
1946 } else {
1947 SUBTRACET(seastore_fixedkv_tree, "no need to collapse root", c.trans);
1948 }
1949 return seastar::stop_iteration::yes;
1950 } else if (pos.node->below_min_capacity()) {
1951 SUBTRACET(
1952 seastore_fixedkv_tree,
1953 "continuing, next node {} depth {} at min",
1954 c.trans,
1955 *pos.node,
1956 to_merge);
1957 return seastar::stop_iteration::no;
1958 } else {
1959 SUBTRACET(
1960 seastore_fixedkv_tree,
1961 "complete, next node {} depth {} not min",
1962 c.trans,
1963 *pos.node,
1964 to_merge);
1965 return seastar::stop_iteration::yes;
1966 }
1967 });
1968 });
1969 });
1970 }
1971
1972 template <typename NodeType,
1973 std::enable_if_t<std::is_same_v<NodeType, leaf_node_t>, int> = 0>
1974 base_iertr::future<typename NodeType::Ref> get_node(
1975 op_context_t<node_key_t> c,
1976 depth_t depth,
1977 paddr_t addr,
1978 node_key_t begin,
1979 node_key_t end,
1980 typename std::optional<node_position_t<leaf_node_t>> parent_pos) {
1981 assert(depth == 1);
1982 return get_leaf_node(c, addr, begin, end, std::move(parent_pos));
1983 }
1984
1985 template <typename NodeType,
1986 std::enable_if_t<std::is_same_v<NodeType, internal_node_t>, int> = 0>
1987 base_iertr::future<typename NodeType::Ref> get_node(
1988 op_context_t<node_key_t> c,
1989 depth_t depth,
1990 paddr_t addr,
1991 node_key_t begin,
1992 node_key_t end,
1993 typename std::optional<node_position_t<internal_node_t>> parent_pos) {
1994 return get_internal_node(c, depth, addr, begin, end, std::move(parent_pos));
1995 }
1996
1997 template <typename NodeType>
1998 handle_merge_ret merge_level(
1999 op_context_t<node_key_t> c,
2000 depth_t depth,
2001 node_position_t<internal_node_t> &parent_pos,
2002 node_position_t<NodeType> &pos)
2003 {
2004 LOG_PREFIX(FixedKVBtree::merge_level);
2005 if (!parent_pos.node->is_mutable()) {
2006 parent_pos.node = c.cache.duplicate_for_write(
2007 c.trans, parent_pos.node
2008 )->template cast<internal_node_t>();
2009 }
2010
2011 auto iter = parent_pos.get_iter();
2012 assert(iter.get_offset() < parent_pos.node->get_size());
2013 bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
2014 auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
2015 auto next_iter = donor_iter + 1;
2016 auto begin = donor_iter->get_key();
2017 auto end = next_iter == parent_pos.node->end()
2018 ? parent_pos.node->get_node_meta().end
2019 : next_iter->get_key();
2020
2021 SUBTRACET(seastore_fixedkv_tree, "parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
2022 auto do_merge = [c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
2023 typename NodeType::Ref donor) {
2024 LOG_PREFIX(FixedKVBtree::merge_level);
2025 auto [l, r] = donor_is_left ?
2026 std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
2027
2028 auto [liter, riter] = donor_is_left ?
2029 std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
2030
2031 if (donor->at_min_capacity()) {
2032 auto replacement = l->make_full_merge(c, r);
2033
2034 parent_pos.node->update(
2035 liter,
2036 replacement->get_paddr(),
2037 replacement.get());
2038 parent_pos.node->remove(riter);
2039
2040 pos.node = replacement;
2041 if (donor_is_left) {
2042 pos.pos += r->get_size();
2043 parent_pos.pos--;
2044 }
2045
2046 SUBTRACET(seastore_fixedkv_tree, "l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement);
2047 c.cache.retire_extent(c.trans, l);
2048 c.cache.retire_extent(c.trans, r);
2049 get_tree_stats<self_type>(c.trans).extents_num_delta--;
2050 } else {
2051 LOG_PREFIX(FixedKVBtree::merge_level);
2052 auto [replacement_l, replacement_r, pivot] =
2053 l->make_balanced(
2054 c,
2055 r,
2056 !donor_is_left);
2057
2058 parent_pos.node->update(
2059 liter,
2060 replacement_l->get_paddr(),
2061 replacement_l.get());
2062 parent_pos.node->replace(
2063 riter,
2064 pivot,
2065 replacement_r->get_paddr(),
2066 replacement_r.get());
2067
2068 if (donor_is_left) {
2069 assert(parent_pos.pos > 0);
2070 parent_pos.pos--;
2071 }
2072
2073 auto orig_position = donor_is_left ?
2074 l->get_size() + pos.pos :
2075 pos.pos;
2076 if (orig_position < replacement_l->get_size()) {
2077 pos.node = replacement_l;
2078 pos.pos = orig_position;
2079 } else {
2080 parent_pos.pos++;
2081 pos.node = replacement_r;
2082 pos.pos = orig_position - replacement_l->get_size();
2083 }
2084
2085 SUBTRACET(
2086 seastore_fixedkv_tree,
2087 "l: {}, r: {}, replacement_l: {}, replacement_r: {}",
2088 c.trans, *l, *r, *replacement_l, *replacement_r);
2089 c.cache.retire_extent(c.trans, l);
2090 c.cache.retire_extent(c.trans, r);
2091 }
2092
2093 return seastar::now();
2094 };
2095
2096 auto v = parent_pos.node->template get_child<NodeType>(c, donor_iter);
2097 if (v.has_child()) {
2098 return v.get_child_fut().then(
2099 [do_merge=std::move(do_merge), &pos,
2100 donor_iter, donor_is_left, c, parent_pos](auto child) mutable {
2101 LOG_PREFIX(FixedKVBtree::merge_level);
2102 SUBTRACET(seastore_fixedkv_tree,
2103 "got child on {}, pos: {}, res: {}",
2104 c.trans,
2105 *parent_pos.node,
2106 donor_iter.get_offset(),
2107 *child);
2108 auto &node = (typename internal_node_t::base_t&)*child;
2109 assert(donor_is_left ?
2110 node.get_node_meta().end == pos.node->get_node_meta().begin :
2111 node.get_node_meta().begin == pos.node->get_node_meta().end);
2112 assert(node.get_node_meta().begin == donor_iter.get_key());
2113 assert(node.get_node_meta().end > donor_iter.get_key());
2114 return do_merge(child->template cast<NodeType>());
2115 });
2116 }
2117
2118 auto child_pos = v.get_child_pos();
2119 return get_node<NodeType>(
2120 c,
2121 depth,
2122 donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
2123 begin,
2124 end,
2125 std::make_optional<node_position_t<NodeType>>(
2126 child_pos.template get_parent<NodeType>(),
2127 child_pos.get_pos())
2128 ).si_then([do_merge=std::move(do_merge)](typename NodeType::Ref donor) {
2129 return do_merge(donor);
2130 });
2131 }
2132};
2133
2134template <typename T>
2135struct is_fixed_kv_tree : std::false_type {};
2136
2137template <
2138 typename node_key_t,
2139 typename node_val_t,
2140 typename internal_node_t,
2141 typename leaf_node_t,
2142 typename pin_t,
2143 size_t node_size,
2144 bool leaf_has_children>
2145struct is_fixed_kv_tree<
2146 FixedKVBtree<
2147 node_key_t,
2148 node_val_t,
2149 internal_node_t,
2150 leaf_node_t,
2151 pin_t,
2152 node_size,
2153 leaf_has_children>> : std::true_type {};
2154
2155template <
2156 typename tree_type_t,
2157 typename node_key_t,
2158 typename F,
2159 std::enable_if_t<is_fixed_kv_tree<tree_type_t>::value, int> = 0>
2160auto with_btree(
2161 Cache &cache,
2162 op_context_t<node_key_t> c,
2163 F &&f) {
2164 return cache.get_root(
2165 c.trans
2166 ).si_then([f=std::forward<F>(f)](RootBlockRef croot) mutable {
2167 return seastar::do_with(
2168 tree_type_t(croot),
2169 [f=std::move(f)](auto &btree) mutable {
2170 return f(btree);
2171 });
2172 });
2173}
2174
2175template <
2176 typename tree_type_t,
2177 typename State,
2178 typename node_key_t,
2179 typename F,
2180 std::enable_if_t<is_fixed_kv_tree<tree_type_t>::value, int> = 0>
2181auto with_btree_state(
2182 Cache &cache,
2183 op_context_t<node_key_t> c,
2184 State &&init,
2185 F &&f) {
2186 return seastar::do_with(
2187 std::forward<State>(init),
2188 [&cache, c, f=std::forward<F>(f)](auto &state) mutable {
2189 return with_btree<tree_type_t>(
2190 cache,
2191 c,
2192 [&state, f=std::move(f)](auto &btree) mutable {
2193 return f(btree, state);
2194 }).si_then([&state] {
2195 return seastar::make_ready_future<State>(std::move(state));
2196 });
2197 });
2198}
2199
2200template <
2201 typename tree_type_t,
2202 typename State,
2203 typename node_key_t,
2204 typename F,
2205 std::enable_if_t<is_fixed_kv_tree<tree_type_t>::value, int> = 0>
2206auto with_btree_state(
2207 Cache &cache,
2208 op_context_t<node_key_t> c,
2209 F &&f) {
2210 return crimson::os::seastore::with_btree_state<tree_type_t, State>(
2211 cache, c, State{}, std::forward<F>(f));
2212}
2213
2214template <
2215 typename tree_type_t,
2216 typename Ret,
2217 typename node_key_t,
2218 typename F>
2219auto with_btree_ret(
2220 Cache &cache,
2221 op_context_t<node_key_t> c,
2222 F &&f) {
2223 return with_btree_state<tree_type_t, Ret>(
2224 cache,
2225 c,
2226 [f=std::forward<F>(f)](auto &btree, auto &ret) mutable {
2227 return f(
2228 btree
2229 ).si_then([&ret](auto &&_ret) {
2230 ret = std::move(_ret);
2231 });
2232 });
2233}
2234
2235}
2236