]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / lba_btree_node.h
index 7f5c97c8893dfd72102d20eccf737ec2f040e45a..62ceae6cc462a4c7ef7c33a74c8623ed91bb6c94 100644 (file)
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
+
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
+#include "crimson/os/seastore/btree/fixed_kv_node.h"
 
 namespace crimson::os::seastore::lba_manager::btree {
 
 using base_iertr = LBAManager::base_iertr;
-
-struct op_context_t {
-  Cache &cache;
-  Transaction &trans;
-  btree_pin_set_t *pins = nullptr;
-};
+using LBANode = FixedKVNode<laddr_t>;
 
 /**
  * lba_map_val_t
@@ -47,79 +44,16 @@ struct lba_map_val_t {
     uint32_t refcount,
     uint32_t checksum)
     : len(len), paddr(paddr), refcount(refcount), checksum(checksum) {}
+  bool operator==(const lba_map_val_t&) const = default;
 };
-WRITE_EQ_OPERATORS_4(
-  lba_map_val_t,
-  len,
-  paddr,
-  refcount,
-  checksum);
 
-class BtreeLBAPin;
-using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;
+std::ostream& operator<<(std::ostream& out, const lba_map_val_t&);
 
 constexpr size_t LBA_BLOCK_SIZE = 4096;
 
-/**
- * lba_node_meta_le_t
- *
- * On disk layout for lba_node_meta_t
- */
-struct lba_node_meta_le_t {
-  laddr_le_t begin = laddr_le_t(0);
-  laddr_le_t end = laddr_le_t(0);
-  depth_le_t depth = init_depth_le(0);
-
-  lba_node_meta_le_t() = default;
-  lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
-  explicit lba_node_meta_le_t(const lba_node_meta_t &val)
-    : begin(ceph_le64(val.begin)),
-      end(ceph_le64(val.end)),
-      depth(init_depth_le(val.depth)) {}
-
-  operator lba_node_meta_t() const {
-    return lba_node_meta_t{ begin, end, depth };
-  }
-};
-
-/**
- * LBANode
- *
- * Base class enabling recursive lookup between internal and leaf nodes.
- */
-struct LBANode : CachedExtent {
-  using LBANodeRef = TCachedExtentRef<LBANode>;
-
-  btree_range_pin_t pin;
+using lba_node_meta_t = fixed_kv_node_meta_t<laddr_t>;
 
-  LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
-  LBANode(const LBANode &rhs)
-    : CachedExtent(rhs), pin(rhs.pin, this) {}
-
-  virtual lba_node_meta_t get_node_meta() const = 0;
-
-  virtual ~LBANode() = default;
-
-  void on_delta_write(paddr_t record_block_offset) final {
-    // All in-memory relative addrs are necessarily record-relative
-    assert(get_prior_instance());
-    pin.take_pin(get_prior_instance()->cast<LBANode>()->pin);
-    resolve_relative_addrs(record_block_offset);
-  }
-
-  void on_initial_write() final {
-    // All in-memory relative addrs are necessarily block-relative
-    resolve_relative_addrs(get_paddr());
-  }
-
-  void on_clean_read() final {
-    // From initial write of block, relative addrs are necessarily block-relative
-    resolve_relative_addrs(get_paddr());
-  }
-
-  virtual void resolve_relative_addrs(paddr_t base) = 0;
-};
-using LBANodeRef = LBANode::LBANodeRef;
+using lba_node_meta_le_t = fixed_kv_node_meta_le_t<laddr_le_t>;
 
 /**
  * LBAInternalNode
@@ -140,197 +74,22 @@ using LBANodeRef = LBANode::LBANodeRef;
  */
 constexpr size_t INTERNAL_NODE_CAPACITY = 254;
 struct LBAInternalNode
-  : LBANode,
-    common::FixedKVNodeLayout<
+  : FixedKVInternalNode<
       INTERNAL_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
       laddr_t, laddr_le_t,
-      paddr_t, paddr_le_t> {
+      LBA_BLOCK_SIZE,
+      LBAInternalNode> {
   using Ref = TCachedExtentRef<LBAInternalNode>;
   using internal_iterator_t = const_iterator;
   template <typename... T>
   LBAInternalNode(T&&... t) :
-    LBANode(std::forward<T>(t)...),
-    FixedKVNodeLayout(get_bptr().c_str()) {}
+    FixedKVInternalNode(std::forward<T>(t)...) {}
 
   static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
 
-  lba_node_meta_t get_node_meta() const { return get_meta(); }
-
-  CachedExtentRef duplicate_for_write() final {
-    assert(delta_buffer.empty());
-    return CachedExtentRef(new LBAInternalNode(*this));
-  };
-
-  delta_buffer_t delta_buffer;
-  delta_buffer_t *maybe_get_delta_buffer() {
-    return is_mutation_pending() ? &delta_buffer : nullptr;
-  }
-
-  void update(
-    const_iterator iter,
-    paddr_t addr) {
-    return journal_update(
-      iter,
-      maybe_generate_relative(addr),
-      maybe_get_delta_buffer());
-  }
-
-  void insert(
-    const_iterator iter,
-    laddr_t pivot,
-    paddr_t addr) {
-    return journal_insert(
-      iter,
-      pivot,
-      maybe_generate_relative(addr),
-      maybe_get_delta_buffer());
-  }
-
-  void remove(const_iterator iter) {
-    return journal_remove(
-      iter,
-      maybe_get_delta_buffer());
-  }
-
-  void replace(
-    const_iterator iter,
-    laddr_t pivot,
-    paddr_t addr) {
-    return journal_replace(
-      iter,
-      pivot,
-      maybe_generate_relative(addr),
-      maybe_get_delta_buffer());
-  }
-
-  std::tuple<Ref, Ref, laddr_t>
-  make_split_children(op_context_t c) {
-    auto left = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto right = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto pivot = split_into(*left, *right);
-    left->pin.set_range(left->get_meta());
-    right->pin.set_range(right->get_meta());
-    return std::make_tuple(
-      left,
-      right,
-      pivot);
-  }
-
-  Ref make_full_merge(
-    op_context_t c,
-    Ref &right) {
-    auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    replacement->merge_from(*this, *right->cast<LBAInternalNode>());
-    replacement->pin.set_range(replacement->get_meta());
-    return replacement;
-  }
-
-  std::tuple<Ref, Ref, laddr_t>
-  make_balanced(
-    op_context_t c,
-    Ref &_right,
-    bool prefer_left) {
-    ceph_assert(_right->get_type() == get_type());
-    auto &right = *_right->cast<LBAInternalNode>();
-    auto replacement_left = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto replacement_right = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-
-    auto pivot = balance_into_new_nodes(
-      *this,
-      right,
-      prefer_left,
-      *replacement_left,
-      *replacement_right);
-
-    replacement_left->pin.set_range(replacement_left->get_meta());
-    replacement_right->pin.set_range(replacement_right->get_meta());
-    return std::make_tuple(
-      replacement_left,
-      replacement_right,
-      pivot);
-  }
-
-  /**
-   * Internal relative addresses on read or in memory prior to commit
-   * are either record or block relative depending on whether this
-   * physical node is is_initial_pending() or just is_pending().
-   *
-   * User passes appropriate base depending on lifecycle and
-   * resolve_relative_addrs fixes up relative internal references
-   * based on base.
-   */
-  void resolve_relative_addrs(paddr_t base);
-  void node_resolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       if (i->get_val().is_relative()) {
-         assert(i->get_val().is_block_relative());
-         i->set_val(get_paddr().add_relative(i->get_val()));
-       }
-      }
-    }
-  }
-  void node_unresolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       if (i->get_val().is_relative()) {
-         assert(i->get_val().is_record_relative());
-         i->set_val(i->get_val() - get_paddr());
-       }
-      }
-    }
-  }
-
   extent_types_t get_type() const final {
     return TYPE;
   }
-
-  std::ostream &print_detail(std::ostream &out) const final;
-
-  ceph::bufferlist get_delta() final {
-    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
-    delta_buffer.copy_out(bptr.c_str(), bptr.length());
-    ceph::bufferlist bl;
-    bl.push_back(bptr);
-    return bl;
-  }
-
-  void apply_delta_and_adjust_crc(
-    paddr_t base, const ceph::bufferlist &_bl) final {
-    assert(_bl.length());
-    ceph::bufferlist bl = _bl;
-    bl.rebuild();
-    delta_buffer_t buffer;
-    buffer.copy_in(bl.front().c_str(), bl.front().length());
-    buffer.replay(*this);
-    set_last_committed_crc(get_crc32c());
-    resolve_relative_addrs(base);
-  }
-
-  constexpr static size_t get_min_capacity() {
-    return (get_capacity() - 1) / 2;
-  }
-
-  bool at_max_capacity() const {
-    assert(get_size() <= get_capacity());
-    return get_size() == get_capacity();
-  }
-
-  bool at_min_capacity() const {
-    assert(get_size() >= (get_min_capacity() - 1));
-    return get_size() <= get_min_capacity();
-  }
-
-  bool below_min_capacity() const {
-    assert(get_size() >= (get_min_capacity() - 1));
-    return get_size() < get_min_capacity();
-  }
 };
 using LBAInternalNodeRef = LBAInternalNode::Ref;
 
@@ -378,188 +137,152 @@ struct lba_map_val_le_t {
 };
 
 struct LBALeafNode
-  : LBANode,
-    common::FixedKVNodeLayout<
+  : FixedKVLeafNode<
       LEAF_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
       laddr_t, laddr_le_t,
-      lba_map_val_t, lba_map_val_le_t> {
+      lba_map_val_t, lba_map_val_le_t,
+      LBA_BLOCK_SIZE,
+      LBALeafNode,
+      true> {
   using Ref = TCachedExtentRef<LBALeafNode>;
-  using internal_iterator_t = const_iterator;
+  using parent_type_t = FixedKVLeafNode<
+                         LEAF_NODE_CAPACITY,
+                         laddr_t, laddr_le_t,
+                         lba_map_val_t, lba_map_val_le_t,
+                         LBA_BLOCK_SIZE,
+                         LBALeafNode,
+                         true>;
+  using internal_const_iterator_t =
+    typename parent_type_t::node_layout_t::const_iterator;
+  using internal_iterator_t =
+    typename parent_type_t::node_layout_t::iterator;
   template <typename... T>
   LBALeafNode(T&&... t) :
-    LBANode(std::forward<T>(t)...),
-    FixedKVNodeLayout(get_bptr().c_str()) {}
+    parent_type_t(std::forward<T>(t)...) {}
 
   static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
 
-  lba_node_meta_t get_node_meta() const { return get_meta(); }
-
-  CachedExtentRef duplicate_for_write() final {
-    assert(delta_buffer.empty());
-    return CachedExtentRef(new LBALeafNode(*this));
-  };
+  bool validate_stable_children() final {
+    LOG_PREFIX(LBALeafNode::validate_stable_children);
+    if (this->children.empty()) {
+      return false;
+    }
 
-  delta_buffer_t delta_buffer;
-  delta_buffer_t *maybe_get_delta_buffer() {
-    return is_mutation_pending() ? &delta_buffer : nullptr;
+    for (auto i : *this) {
+      auto child = (LogicalCachedExtent*)this->children[i.get_offset()];
+      if (is_valid_child_ptr(child) && child->get_laddr() != i.get_key()) {
+       SUBERROR(seastore_fixedkv_tree,
+         "stable child not valid: child {}, key {}",
+         *child,
+         i.get_key());
+       ceph_abort();
+       return false;
+      }
+    }
+    return true;
   }
 
   void update(
-    const_iterator iter,
-    lba_map_val_t val) {
-    val.paddr = maybe_generate_relative(val.paddr);
-    return journal_update(
+    internal_const_iterator_t iter,
+    lba_map_val_t val,
+    LogicalCachedExtent* nextent) final {
+    LOG_PREFIX(LBALeafNode::update);
+    if (nextent) {
+      SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}",
+       this->pending_for_transaction,
+       iter.get_offset(),
+       *nextent);
+      // child-ptr may already be correct, see LBAManager::update_mappings()
+      this->update_child_ptr(iter, nextent);
+    }
+    val.paddr = this->maybe_generate_relative(val.paddr);
+    return this->journal_update(
       iter,
       val,
-      maybe_get_delta_buffer());
+      this->maybe_get_delta_buffer());
   }
 
-  auto insert(
-    const_iterator iter,
+  internal_const_iterator_t insert(
+    internal_const_iterator_t iter,
     laddr_t addr,
-    lba_map_val_t val) {
-    val.paddr = maybe_generate_relative(val.paddr);
-    journal_insert(
+    lba_map_val_t val,
+    LogicalCachedExtent* nextent) final {
+    LOG_PREFIX(LBALeafNode::insert);
+    SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, extent {}",
+      this->pending_for_transaction,
+      iter.get_offset(),
+      addr,
+      (void*)nextent);
+    this->insert_child_ptr(iter, nextent);
+    val.paddr = this->maybe_generate_relative(val.paddr);
+    this->journal_insert(
       iter,
       addr,
       val,
-      maybe_get_delta_buffer());
+      this->maybe_get_delta_buffer());
     return iter;
   }
 
-  void remove(const_iterator iter) {
-    return journal_remove(
+  void remove(internal_const_iterator_t iter) final {
+    LOG_PREFIX(LBALeafNode::remove);
+    SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
+      this->pending_for_transaction,
+      iter.get_offset(),
+      iter.get_key());
+    assert(iter != this->end());
+    this->remove_child_ptr(iter);
+    return this->journal_remove(
       iter,
-      maybe_get_delta_buffer());
-  }
-
-
-  std::tuple<Ref, Ref, laddr_t>
-  make_split_children(op_context_t c) {
-    auto left = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto right = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto pivot = split_into(*left, *right);
-    left->pin.set_range(left->get_meta());
-    right->pin.set_range(right->get_meta());
-    return std::make_tuple(
-      left,
-      right,
-      pivot);
-  }
-
-  Ref make_full_merge(
-    op_context_t c,
-    Ref &right) {
-    auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    replacement->merge_from(*this, *right->cast<LBALeafNode>());
-    replacement->pin.set_range(replacement->get_meta());
-    return replacement;
-  }
-
-  std::tuple<Ref, Ref, laddr_t>
-  make_balanced(
-    op_context_t c,
-    Ref &_right,
-    bool prefer_left) {
-    ceph_assert(_right->get_type() == get_type());
-    auto &right = *_right->cast<LBALeafNode>();
-    auto replacement_left = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto replacement_right = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-
-    auto pivot = balance_into_new_nodes(
-      *this,
-      right,
-      prefer_left,
-      *replacement_left,
-      *replacement_right);
-
-    replacement_left->pin.set_range(replacement_left->get_meta());
-    replacement_right->pin.set_range(replacement_right->get_meta());
-    return std::make_tuple(
-      replacement_left,
-      replacement_right,
-      pivot);
+      this->maybe_get_delta_buffer());
   }
 
   // See LBAInternalNode, same concept
   void resolve_relative_addrs(paddr_t base);
-  void node_resolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
+  void node_resolve_vals(
+    internal_iterator_t from,
+    internal_iterator_t to) const final
+  {
+    if (this->is_initial_pending()) {
       for (auto i = from; i != to; ++i) {
        auto val = i->get_val();
        if (val.paddr.is_relative()) {
          assert(val.paddr.is_block_relative());
-         val.paddr = get_paddr().add_relative(val.paddr);
+         val.paddr = this->get_paddr().add_relative(val.paddr);
          i->set_val(val);
        }
       }
     }
   }
-  void node_unresolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
+  void node_unresolve_vals(
+    internal_iterator_t from,
+    internal_iterator_t to) const final
+  {
+    if (this->is_initial_pending()) {
       for (auto i = from; i != to; ++i) {
        auto val = i->get_val();
        if (val.paddr.is_relative()) {
          auto val = i->get_val();
          assert(val.paddr.is_record_relative());
-         val.paddr = val.paddr - get_paddr();
+         val.paddr = val.paddr.block_relative_to(this->get_paddr());
          i->set_val(val);
        }
       }
     }
   }
 
-  ceph::bufferlist get_delta() final {
-    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
-    delta_buffer.copy_out(bptr.c_str(), bptr.length());
-    ceph::bufferlist bl;
-    bl.push_back(bptr);
-    return bl;
-  }
-
-  void apply_delta_and_adjust_crc(
-    paddr_t base, const ceph::bufferlist &_bl) final {
-    assert(_bl.length());
-    ceph::bufferlist bl = _bl;
-    bl.rebuild();
-    delta_buffer_t buffer;
-    buffer.copy_in(bl.front().c_str(), bl.front().length());
-    buffer.replay(*this);
-    set_last_committed_crc(get_crc32c());
-    resolve_relative_addrs(base);
-  }
-
   extent_types_t get_type() const final {
     return TYPE;
   }
 
-  std::ostream &print_detail(std::ostream &out) const final;
-
-  constexpr static size_t get_min_capacity() {
-    return (get_capacity() - 1) / 2;
-  }
-
-  bool at_max_capacity() const {
-    assert(get_size() <= get_capacity());
-    return get_size() == get_capacity();
-  }
-
-  bool at_min_capacity() const {
-    assert(get_size() >= (get_min_capacity() - 1));
-    return get_size() <= get_min_capacity();
-  }
-
-  bool below_min_capacity() const {
-    assert(get_size() >= (get_min_capacity() - 1));
-    return get_size() < get_min_capacity();
-  }
+  std::ostream &_print_detail(std::ostream &out) const final;
 };
 using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
 
 }
+
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::lba_node_meta_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::lba_map_val_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::LBAInternalNode> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::LBALeafNode> : fmt::ostream_formatter {};
+#endif