1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
12 #include "include/buffer.h"
14 #include "crimson/common/fixed_kv_node_layout.h"
15 #include "crimson/common/errorator.h"
16 #include "crimson/os/seastore/lba_manager.h"
17 #include "crimson/os/seastore/seastore_types.h"
18 #include "crimson/os/seastore/cache.h"
19 #include "crimson/os/seastore/cached_extent.h"
20 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
21 #include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
23 namespace crimson::os::seastore::lba_manager::btree
{
25 constexpr size_t LBA_BLOCK_SIZE
= 4096;
30 * On disk layout for lba_node_meta_t
32 struct lba_node_meta_le_t
{
33 laddr_le_t begin
= laddr_le_t(0);
34 laddr_le_t end
= laddr_le_t(0);
35 depth_le_t depth
= init_les32(0);
37 lba_node_meta_le_t() = default;
38 lba_node_meta_le_t(const lba_node_meta_le_t
&) = default;
39 explicit lba_node_meta_le_t(const lba_node_meta_t
&val
)
40 : begin(init_le64(val
.begin
)),
41 end(init_le64(val
.end
)),
42 depth(init_les32(val
.depth
)) {}
44 operator lba_node_meta_t() const {
45 return lba_node_meta_t
{ begin
, end
, depth
};
53 * Abstracts operations on and layout of internal nodes for the
57 * size : uint32_t[1] 4b
59 * meta : lba_node_meta_le_t[3] (1*24)b
60 * keys : laddr_t[255] (254*8)b
61 * values : paddr_t[255] (254*8)b
64 * TODO: make the above capacity calculation part of FixedKVNodeLayout
65 * TODO: the above alignment probably isn't portable without further work
67 constexpr size_t INTERNAL_NODE_CAPACITY
= 254;
68 struct LBAInternalNode
70 common::FixedKVNodeLayout
<
71 INTERNAL_NODE_CAPACITY
,
72 lba_node_meta_t
, lba_node_meta_le_t
,
74 paddr_t
, paddr_le_t
> {
75 using internal_iterator_t
= const_iterator
;
76 template <typename
... T
>
77 LBAInternalNode(T
&&... t
) :
78 LBANode(std::forward
<T
>(t
)...),
79 FixedKVNodeLayout(get_bptr().c_str()) {}
81 static constexpr extent_types_t type
= extent_types_t::LADDR_INTERNAL
;
83 lba_node_meta_t
get_node_meta() const final
{ return get_meta(); }
85 CachedExtentRef
duplicate_for_write() final
{
86 assert(delta_buffer
.empty());
87 return CachedExtentRef(new LBAInternalNode(*this));
90 delta_buffer_t delta_buffer
;
91 delta_buffer_t
*maybe_get_delta_buffer() {
92 return is_mutation_pending() ? &delta_buffer
: nullptr;
95 lookup_ret
lookup(op_context_t c
, laddr_t addr
, depth_t depth
) final
;
97 lookup_range_ret
lookup_range(
100 extent_len_t len
) final
;
105 lba_map_val_t val
) final
;
107 mutate_mapping_ret
mutate_mapping(
110 mutate_func_t
&&f
) final
;
111 mutate_mapping_ret
mutate_mapping_internal(
115 mutate_func_t
&&f
) final
;
117 mutate_internal_address_ret
mutate_internal_address(
121 paddr_t paddr
) final
;
123 find_hole_ret
find_hole(
127 extent_len_t len
) final
;
129 scan_mappings_ret
scan_mappings(
133 scan_mappings_func_t
&f
) final
;
135 scan_mapped_space_ret
scan_mapped_space(
137 scan_mapped_space_func_t
&f
) final
;
139 std::tuple
<LBANodeRef
, LBANodeRef
, laddr_t
>
140 make_split_children(op_context_t c
) final
{
141 auto left
= c
.cache
.alloc_new_extent
<LBAInternalNode
>(
142 c
.trans
, LBA_BLOCK_SIZE
);
143 auto right
= c
.cache
.alloc_new_extent
<LBAInternalNode
>(
144 c
.trans
, LBA_BLOCK_SIZE
);
145 auto pivot
= split_into(*left
, *right
);
146 left
->pin
.set_range(left
->get_meta());
147 right
->pin
.set_range(right
->get_meta());
148 return std::make_tuple(
154 LBANodeRef
make_full_merge(
156 LBANodeRef
&right
) final
{
157 auto replacement
= c
.cache
.alloc_new_extent
<LBAInternalNode
>(
158 c
.trans
, LBA_BLOCK_SIZE
);
159 replacement
->merge_from(*this, *right
->cast
<LBAInternalNode
>());
160 replacement
->pin
.set_range(replacement
->get_meta());
164 std::tuple
<LBANodeRef
, LBANodeRef
, laddr_t
>
168 bool prefer_left
) final
{
169 ceph_assert(_right
->get_type() == type
);
170 auto &right
= *_right
->cast
<LBAInternalNode
>();
171 auto replacement_left
= c
.cache
.alloc_new_extent
<LBAInternalNode
>(
172 c
.trans
, LBA_BLOCK_SIZE
);
173 auto replacement_right
= c
.cache
.alloc_new_extent
<LBAInternalNode
>(
174 c
.trans
, LBA_BLOCK_SIZE
);
176 auto pivot
= balance_into_new_nodes(
183 replacement_left
->pin
.set_range(replacement_left
->get_meta());
184 replacement_right
->pin
.set_range(replacement_right
->get_meta());
185 return std::make_tuple(
192 * Internal relative addresses on read or in memory prior to commit
193 * are either record or block relative depending on whether this
194 * physical node is is_initial_pending() or just is_pending().
196 * User passes appropriate base depending on lifecycle and
197 * resolve_relative_addrs fixes up relative internal references
200 void resolve_relative_addrs(paddr_t base
) final
;
201 void node_resolve_vals(iterator from
, iterator to
) const final
{
202 if (is_initial_pending()) {
203 for (auto i
= from
; i
!= to
; ++i
) {
204 if (i
->get_val().is_relative()) {
205 assert(i
->get_val().is_block_relative());
206 i
->set_val(get_paddr().add_relative(i
->get_val()));
211 void node_unresolve_vals(iterator from
, iterator to
) const final
{
212 if (is_initial_pending()) {
213 for (auto i
= from
; i
!= to
; ++i
) {
214 if (i
->get_val().is_relative()) {
215 assert(i
->get_val().is_record_relative());
216 i
->set_val(i
->get_val() - get_paddr());
222 extent_types_t
get_type() const final
{
226 std::ostream
&print_detail(std::ostream
&out
) const final
;
228 ceph::bufferlist
get_delta() final
{
229 assert(!delta_buffer
.empty());
230 ceph::buffer::ptr
bptr(delta_buffer
.get_bytes());
231 delta_buffer
.copy_out(bptr
.c_str(), bptr
.length());
237 void apply_delta_and_adjust_crc(
238 paddr_t base
, const ceph::bufferlist
&_bl
) final
{
239 assert(_bl
.length());
240 ceph::bufferlist bl
= _bl
;
242 delta_buffer_t buffer
;
243 buffer
.copy_in(bl
.front().c_str(), bl
.front().length());
244 buffer
.replay(*this);
245 set_last_committed_crc(get_crc32c());
246 resolve_relative_addrs(base
);
249 bool at_max_capacity() const final
{
250 return get_size() == get_capacity();
253 bool at_min_capacity() const {
254 return get_size() == (get_capacity() / 2);
257 /// returns iterators containing [l, r)
258 std::pair
<internal_iterator_t
, internal_iterator_t
> bound(
259 laddr_t l
, laddr_t r
) {
262 for (; retl
!= end(); ++retl
) {
263 if (retl
->get_next_key_or_max() > l
)
267 for (; retr
!= end(); ++retr
) {
268 if (retr
->get_key() >= r
)
271 return std::make_pair(retl
, retr
);
274 using split_ertr
= crimson::errorator
<
275 crimson::ct_error::input_output_error
277 using split_ret
= split_ertr::future
<LBANodeRef
>;
278 split_ret
split_entry(
284 using merge_ertr
= crimson::errorator
<
285 crimson::ct_error::input_output_error
287 using merge_ret
= merge_ertr::future
<LBANodeRef
>;
288 merge_ret
merge_entry(
295 /// returns iterator for subtree containing laddr
296 internal_iterator_t
get_containing_child(laddr_t laddr
);
302 * Abstracts operations on and layout of leaf nodes for the
306 * size : uint32_t[1] 4b
308 * meta : lba_node_meta_le_t[3] (1*24)b
309 * keys : laddr_t[170] (145*8)b
310 * values : lba_map_val_t[170] (145*20)b
313 * TODO: update FixedKVNodeLayout to handle the above calculation
314 * TODO: the above alignment probably isn't portable without further work
316 constexpr size_t LEAF_NODE_CAPACITY
= 145;
321 * On disk layout for lba_map_val_t.
323 struct lba_map_val_le_t
{
324 extent_len_le_t len
= init_extent_len_le_t(0);
326 ceph_le32 refcount
= init_le32(0);
327 ceph_le32 checksum
= init_le32(0);
329 lba_map_val_le_t() = default;
330 lba_map_val_le_t(const lba_map_val_le_t
&) = default;
331 explicit lba_map_val_le_t(const lba_map_val_t
&val
)
332 : len(init_extent_len_le_t(val
.len
)),
333 paddr(paddr_le_t(val
.paddr
)),
334 refcount(init_le32(val
.refcount
)),
335 checksum(init_le32(val
.checksum
)) {}
337 operator lba_map_val_t() const {
338 return lba_map_val_t
{ len
, paddr
, refcount
, checksum
};
344 common::FixedKVNodeLayout
<
346 lba_node_meta_t
, lba_node_meta_le_t
,
348 lba_map_val_t
, lba_map_val_le_t
> {
349 using internal_iterator_t
= const_iterator
;
350 template <typename
... T
>
351 LBALeafNode(T
&&... t
) :
352 LBANode(std::forward
<T
>(t
)...),
353 FixedKVNodeLayout(get_bptr().c_str()) {}
355 static constexpr extent_types_t type
= extent_types_t::LADDR_LEAF
;
357 lba_node_meta_t
get_node_meta() const final
{ return get_meta(); }
359 CachedExtentRef
duplicate_for_write() final
{
360 assert(delta_buffer
.empty());
361 return CachedExtentRef(new LBALeafNode(*this));
364 delta_buffer_t delta_buffer
;
365 delta_buffer_t
*maybe_get_delta_buffer() {
366 return is_mutation_pending() ? &delta_buffer
: nullptr;
369 lookup_ret
lookup(op_context_t c
, laddr_t addr
, depth_t depth
) final
372 lookup_ertr::ready_future_marker
{},
376 lookup_range_ret
lookup_range(
379 extent_len_t len
) final
;
384 lba_map_val_t val
) final
;
386 mutate_mapping_ret
mutate_mapping(
389 mutate_func_t
&&f
) final
;
390 mutate_mapping_ret
mutate_mapping_internal(
394 mutate_func_t
&&f
) final
;
396 mutate_internal_address_ret
mutate_internal_address(
400 paddr_t paddr
) final
;
402 find_hole_ret
find_hole(
406 extent_len_t len
) final
;
408 scan_mappings_ret
scan_mappings(
412 scan_mappings_func_t
&f
) final
;
414 scan_mapped_space_ret
scan_mapped_space(
416 scan_mapped_space_func_t
&f
) final
;
418 std::tuple
<LBANodeRef
, LBANodeRef
, laddr_t
>
419 make_split_children(op_context_t c
) final
{
420 auto left
= c
.cache
.alloc_new_extent
<LBALeafNode
>(
421 c
.trans
, LBA_BLOCK_SIZE
);
422 auto right
= c
.cache
.alloc_new_extent
<LBALeafNode
>(
423 c
.trans
, LBA_BLOCK_SIZE
);
424 auto pivot
= split_into(*left
, *right
);
425 left
->pin
.set_range(left
->get_meta());
426 right
->pin
.set_range(right
->get_meta());
427 return std::make_tuple(
433 LBANodeRef
make_full_merge(
435 LBANodeRef
&right
) final
{
436 auto replacement
= c
.cache
.alloc_new_extent
<LBALeafNode
>(
437 c
.trans
, LBA_BLOCK_SIZE
);
438 replacement
->merge_from(*this, *right
->cast
<LBALeafNode
>());
439 replacement
->pin
.set_range(replacement
->get_meta());
443 std::tuple
<LBANodeRef
, LBANodeRef
, laddr_t
>
447 bool prefer_left
) final
{
448 ceph_assert(_right
->get_type() == type
);
449 auto &right
= *_right
->cast
<LBALeafNode
>();
450 auto replacement_left
= c
.cache
.alloc_new_extent
<LBALeafNode
>(
451 c
.trans
, LBA_BLOCK_SIZE
);
452 auto replacement_right
= c
.cache
.alloc_new_extent
<LBALeafNode
>(
453 c
.trans
, LBA_BLOCK_SIZE
);
455 auto pivot
= balance_into_new_nodes(
462 replacement_left
->pin
.set_range(replacement_left
->get_meta());
463 replacement_right
->pin
.set_range(replacement_right
->get_meta());
464 return std::make_tuple(
470 // See LBAInternalNode, same concept
471 void resolve_relative_addrs(paddr_t base
) final
;
472 void node_resolve_vals(iterator from
, iterator to
) const final
{
473 if (is_initial_pending()) {
474 for (auto i
= from
; i
!= to
; ++i
) {
475 auto val
= i
->get_val();
476 if (val
.paddr
.is_relative()) {
477 assert(val
.paddr
.is_block_relative());
478 val
.paddr
= get_paddr().add_relative(val
.paddr
);
484 void node_unresolve_vals(iterator from
, iterator to
) const final
{
485 if (is_initial_pending()) {
486 for (auto i
= from
; i
!= to
; ++i
) {
487 auto val
= i
->get_val();
488 if (val
.paddr
.is_relative()) {
489 auto val
= i
->get_val();
490 assert(val
.paddr
.is_record_relative());
491 val
.paddr
= val
.paddr
- get_paddr();
498 ceph::bufferlist
get_delta() final
{
499 assert(!delta_buffer
.empty());
500 ceph::buffer::ptr
bptr(delta_buffer
.get_bytes());
501 delta_buffer
.copy_out(bptr
.c_str(), bptr
.length());
507 void apply_delta_and_adjust_crc(
508 paddr_t base
, const ceph::bufferlist
&_bl
) final
{
509 assert(_bl
.length());
510 ceph::bufferlist bl
= _bl
;
512 delta_buffer_t buffer
;
513 buffer
.copy_in(bl
.front().c_str(), bl
.front().length());
514 buffer
.replay(*this);
515 set_last_committed_crc(get_crc32c());
516 resolve_relative_addrs(base
);
519 extent_types_t
get_type() const final
{
523 std::ostream
&print_detail(std::ostream
&out
) const final
;
525 bool at_max_capacity() const final
{
526 return get_size() == get_capacity();
529 bool at_min_capacity() const final
{
530 return get_size() == (get_capacity() / 2);
533 /// returns iterators <lb, ub> containing addresses [l, r)
534 std::pair
<internal_iterator_t
, internal_iterator_t
> bound(
535 laddr_t l
, laddr_t r
) {
538 for (; retl
!= end(); ++retl
) {
539 if (retl
->get_key() >= l
|| (retl
->get_key() + retl
->get_val().len
) > l
)
543 for (; retr
!= end(); ++retr
) {
544 if (retr
->get_key() >= r
)
547 return std::make_pair(retl
, retr
);
550 std::pair
<internal_iterator_t
, internal_iterator_t
>
551 get_leaf_entries(laddr_t addr
, extent_len_t len
);
553 using LBALeafNodeRef
= TCachedExtentRef
<LBALeafNode
>;