1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
7 #include <seastar/core/metrics.hh>
9 #include "include/buffer.h"
10 #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
11 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
12 #include "crimson/os/seastore/logging.h"
14 SET_SUBSYS(seastore_lba
);
18 * - DEBUG: modification operations
19 * - TRACE: read operations, DEBUG details
22 namespace crimson::os::seastore
{
25 Transaction::tree_stats_t
& get_tree_stats(Transaction
&t
)
27 return t
.get_lba_tree_stats();
30 template Transaction::tree_stats_t
&
32 crimson::os::seastore::lba_manager::btree::LBABtree
>(
36 phy_tree_root_t
& get_phy_tree_root(root_t
&r
)
41 template phy_tree_root_t
&
43 crimson::os::seastore::lba_manager::btree::LBABtree
>(root_t
&r
);
46 const get_phy_tree_root_node_ret get_phy_tree_root_node
<
47 crimson::os::seastore::lba_manager::btree::LBABtree
>(
48 const RootBlockRef
&root_block
, op_context_t
<laddr_t
> c
)
50 auto lba_root
= root_block
->lba_root_node
;
52 ceph_assert(lba_root
->is_initial_pending()
53 == root_block
->is_pending());
55 trans_intr::make_interruptible(
56 c
.cache
.get_extent_viewable_by_trans(c
.trans
, lba_root
))};
57 } else if (root_block
->is_pending()) {
58 auto &prior
= static_cast<RootBlock
&>(*root_block
->get_prior_instance());
59 lba_root
= prior
.lba_root_node
;
62 trans_intr::make_interruptible(
63 c
.cache
.get_extent_viewable_by_trans(c
.trans
, lba_root
))};
66 trans_intr::make_interruptible(
67 seastar::make_ready_future
<
68 CachedExtentRef
>(CachedExtentRef()))};
72 trans_intr::make_interruptible(
73 seastar::make_ready_future
<
74 CachedExtentRef
>(CachedExtentRef()))};
78 template <typename ROOT
>
79 void link_phy_tree_root_node(RootBlockRef
&root_block
, ROOT
* lba_root
) {
80 root_block
->lba_root_node
= lba_root
;
81 ceph_assert(lba_root
!= nullptr);
82 lba_root
->root_block
= root_block
;
85 template void link_phy_tree_root_node(
86 RootBlockRef
&root_block
, lba_manager::btree::LBAInternalNode
* lba_root
);
87 template void link_phy_tree_root_node(
88 RootBlockRef
&root_block
, lba_manager::btree::LBALeafNode
* lba_root
);
89 template void link_phy_tree_root_node(
90 RootBlockRef
&root_block
, lba_manager::btree::LBANode
* lba_root
);
93 void unlink_phy_tree_root_node
<laddr_t
>(RootBlockRef
&root_block
) {
94 root_block
->lba_root_node
= nullptr;
99 namespace crimson::os::seastore::lba_manager::btree
{
101 BtreeLBAManager::mkfs_ret
102 BtreeLBAManager::mkfs(
105 LOG_PREFIX(BtreeLBAManager::mkfs
);
107 return cache
.get_root(t
).si_then([this, &t
](auto croot
) {
108 assert(croot
->is_mutation_pending());
109 croot
->get_root().lba_root
= LBABtree::mkfs(croot
, get_context(t
));
110 return mkfs_iertr::now();
111 }).handle_error_interruptible(
112 mkfs_iertr::pass_further
{},
113 crimson::ct_error::assert_all
{
114 "Invalid error in BtreeLBAManager::mkfs"
119 BtreeLBAManager::get_mappings_ret
120 BtreeLBAManager::get_mappings(
122 laddr_t offset
, extent_len_t length
)
124 LOG_PREFIX(BtreeLBAManager::get_mappings
);
125 TRACET("{}~{}", t
, offset
, length
);
126 auto c
= get_context(t
);
127 return with_btree_state
<LBABtree
, lba_pin_list_t
>(
130 [c
, offset
, length
, FNAME
](auto &btree
, auto &ret
) {
131 return LBABtree::iterate_repeat(
133 btree
.upper_bound_right(c
, offset
),
134 [&ret
, offset
, length
, c
, FNAME
](auto &pos
) {
135 if (pos
.is_end() || pos
.get_key() >= (offset
+ length
)) {
136 TRACET("{}~{} done with {} results",
137 c
.trans
, offset
, length
, ret
.size());
138 return typename
LBABtree::iterate_repeat_ret_inner(
139 interruptible::ready_future_marker
{},
140 seastar::stop_iteration::yes
);
142 TRACET("{}~{} got {}, {}, repeat ...",
143 c
.trans
, offset
, length
, pos
.get_key(), pos
.get_val());
144 ceph_assert((pos
.get_key() + pos
.get_val().len
) > offset
);
145 ret
.push_back(pos
.get_pin(c
));
146 return typename
LBABtree::iterate_repeat_ret_inner(
147 interruptible::ready_future_marker
{},
148 seastar::stop_iteration::no
);
153 BtreeLBAManager::get_mappings_ret
154 BtreeLBAManager::get_mappings(
158 LOG_PREFIX(BtreeLBAManager::get_mappings
);
159 TRACET("{}", t
, list
);
160 auto l
= std::make_unique
<laddr_list_t
>(std::move(list
));
161 auto retptr
= std::make_unique
<lba_pin_list_t
>();
163 return trans_intr::do_for_each(
166 [this, &t
, &ret
](const auto &p
) {
167 return this->get_mappings(t
, p
.first
, p
.second
).si_then(
169 ret
.splice(ret
.end(), res
, res
.begin(), res
.end());
170 return get_mappings_iertr::now();
172 }).si_then([l
=std::move(l
), retptr
=std::move(retptr
)]() mutable {
173 return std::move(*retptr
);
177 BtreeLBAManager::get_mapping_ret
178 BtreeLBAManager::get_mapping(
182 LOG_PREFIX(BtreeLBAManager::get_mapping
);
183 TRACET("{}", t
, offset
);
184 auto c
= get_context(t
);
185 return with_btree_ret
<LBABtree
, LBAMappingRef
>(
188 [FNAME
, c
, offset
](auto &btree
) {
189 return btree
.lower_bound(
191 ).si_then([FNAME
, offset
, c
](auto iter
) -> get_mapping_ret
{
192 if (iter
.is_end() || iter
.get_key() != offset
) {
193 ERRORT("laddr={} doesn't exist", c
.trans
, offset
);
194 return crimson::ct_error::enoent::make();
196 TRACET("{} got {}, {}",
197 c
.trans
, offset
, iter
.get_key(), iter
.get_val());
198 auto e
= iter
.get_pin(c
);
199 return get_mapping_ret(
200 interruptible::ready_future_marker
{},
207 BtreeLBAManager::alloc_extent_ret
208 BtreeLBAManager::alloc_extent(
213 LogicalCachedExtent
* nextent
)
218 std::optional
<typename
LBABtree::iterator
> insert_iter
;
219 std::optional
<typename
LBABtree::iterator
> ret
;
221 state_t(laddr_t hint
) : last_end(hint
) {}
224 LOG_PREFIX(BtreeLBAManager::alloc_extent
);
225 TRACET("{}~{}, hint={}", t
, addr
, len
, hint
);
226 auto c
= get_context(t
);
227 ++stats
.num_alloc_extents
;
228 auto lookup_attempts
= stats
.num_alloc_extents_iter_nexts
;
229 return crimson::os::seastore::with_btree_state
<LBABtree
, state_t
>(
233 [this, FNAME
, c
, hint
, len
, addr
, lookup_attempts
,
234 &t
, nextent
](auto &btree
, auto &state
) {
235 return LBABtree::iterate_repeat(
237 btree
.upper_bound_right(c
, hint
),
238 [this, &state
, len
, addr
, &t
, hint
, FNAME
, lookup_attempts
](auto &pos
) {
239 ++stats
.num_alloc_extents_iter_nexts
;
241 DEBUGT("{}~{}, hint={}, state: end, done with {} attempts, insert at {}",
243 stats
.num_alloc_extents_iter_nexts
- lookup_attempts
,
245 state
.insert_iter
= pos
;
246 return typename
LBABtree::iterate_repeat_ret_inner(
247 interruptible::ready_future_marker
{},
248 seastar::stop_iteration::yes
);
249 } else if (pos
.get_key() >= (state
.last_end
+ len
)) {
250 DEBUGT("{}~{}, hint={}, state: {}~{}, done with {} attempts, insert at {} -- {}",
252 pos
.get_key(), pos
.get_val().len
,
253 stats
.num_alloc_extents_iter_nexts
- lookup_attempts
,
256 state
.insert_iter
= pos
;
257 return typename
LBABtree::iterate_repeat_ret_inner(
258 interruptible::ready_future_marker
{},
259 seastar::stop_iteration::yes
);
261 state
.last_end
= pos
.get_key() + pos
.get_val().len
;
262 TRACET("{}~{}, hint={}, state: {}~{}, repeat ... -- {}",
264 pos
.get_key(), pos
.get_val().len
,
266 return typename
LBABtree::iterate_repeat_ret_inner(
267 interruptible::ready_future_marker
{},
268 seastar::stop_iteration::no
);
270 }).si_then([FNAME
, c
, addr
, len
, hint
, &btree
, &state
, nextent
] {
275 lba_map_val_t
{len
, addr
, 1, 0},
277 ).si_then([&state
, FNAME
, c
, addr
, len
, hint
, nextent
](auto &&p
) {
278 auto [iter
, inserted
] = std::move(p
);
279 TRACET("{}~{}, hint={}, inserted at {}",
280 c
.trans
, addr
, len
, hint
, state
.last_end
);
282 nextent
->set_laddr(iter
.get_key());
284 ceph_assert(inserted
);
288 }).si_then([c
](auto &&state
) {
289 return state
.ret
->get_pin(c
);
293 static bool is_lba_node(const CachedExtent
&e
)
295 return is_lba_node(e
.get_type());
298 BtreeLBAManager::base_iertr::template future
<>
300 op_context_t
<laddr_t
> c
,
301 const CachedExtentRef
&e
,
305 if (e
->is_logical()) {
306 auto logn
= e
->cast
<LogicalCachedExtent
>();
307 return btree
.lower_bound(
310 ).si_then([e
, c
, logn
, &ret
](auto iter
) {
311 LOG_PREFIX(BtreeLBAManager::init_cached_extent
);
312 if (!iter
.is_end() &&
313 iter
.get_key() == logn
->get_laddr() &&
314 iter
.get_val().paddr
== logn
->get_paddr()) {
315 assert(!iter
.get_leaf_node()->is_pending());
316 iter
.get_leaf_node()->link_child(logn
.get(), iter
.get_leaf_pos());
317 logn
->set_laddr(iter
.get_pin(c
)->get_key());
318 ceph_assert(iter
.get_val().len
== e
->get_length());
319 DEBUGT("logical extent {} live", c
.trans
, *logn
);
322 DEBUGT("logical extent {} not live", c
.trans
, *logn
);
327 return btree
.init_cached_extent(c
, e
328 ).si_then([&ret
](bool is_alive
) {
334 BtreeLBAManager::init_cached_extent_ret
335 BtreeLBAManager::init_cached_extent(
339 LOG_PREFIX(BtreeLBAManager::init_cached_extent
);
341 return seastar::do_with(bool(), [this, e
, &t
](bool &ret
) {
342 auto c
= get_context(t
);
343 return with_btree
<LBABtree
>(
345 [c
, e
, &ret
](auto &btree
) -> base_iertr::future
<> {
346 LOG_PREFIX(BtreeLBAManager::init_cached_extent
);
347 DEBUGT("extent {}", c
.trans
, *e
);
348 return _init_cached_extent(c
, e
, btree
, ret
);
350 ).si_then([&ret
] { return ret
; });
354 BtreeLBAManager::check_child_trackers_ret
355 BtreeLBAManager::check_child_trackers(
357 auto c
= get_context(t
);
358 return with_btree
<LBABtree
>(
361 return btree
.check_child_trackers(c
);
365 BtreeLBAManager::scan_mappings_ret
366 BtreeLBAManager::scan_mappings(
370 scan_mappings_func_t
&&f
)
372 LOG_PREFIX(BtreeLBAManager::scan_mappings
);
373 DEBUGT("begin: {}, end: {}", t
, begin
, end
);
375 auto c
= get_context(t
);
376 return with_btree
<LBABtree
>(
379 [c
, f
=std::move(f
), begin
, end
](auto &btree
) mutable {
380 return LBABtree::iterate_repeat(
382 btree
.upper_bound_right(c
, begin
),
383 [f
=std::move(f
), begin
, end
](auto &pos
) {
384 if (pos
.is_end() || pos
.get_key() >= end
) {
385 return typename
LBABtree::iterate_repeat_ret_inner(
386 interruptible::ready_future_marker
{},
387 seastar::stop_iteration::yes
);
389 ceph_assert((pos
.get_key() + pos
.get_val().len
) > begin
);
390 f(pos
.get_key(), pos
.get_val().paddr
, pos
.get_val().len
);
391 return typename
LBABtree::iterate_repeat_ret_inner(
392 interruptible::ready_future_marker
{},
393 seastar::stop_iteration::no
);
398 BtreeLBAManager::rewrite_extent_ret
399 BtreeLBAManager::rewrite_extent(
401 CachedExtentRef extent
)
403 LOG_PREFIX(BtreeLBAManager::rewrite_extent
);
404 if (extent
->has_been_invalidated()) {
405 ERRORT("extent has been invalidated -- {}", t
, *extent
);
408 assert(!extent
->is_logical());
410 if (is_lba_node(*extent
)) {
411 DEBUGT("rewriting lba extent -- {}", t
, *extent
);
412 auto c
= get_context(t
);
413 return with_btree
<LBABtree
>(
416 [c
, extent
](auto &btree
) mutable {
417 return btree
.rewrite_extent(c
, extent
);
420 DEBUGT("skip non lba extent -- {}", t
, *extent
);
421 return rewrite_extent_iertr::now();
425 BtreeLBAManager::update_mapping_ret
426 BtreeLBAManager::update_mapping(
431 LogicalCachedExtent
*nextent
)
433 LOG_PREFIX(BtreeLBAManager::update_mapping
);
434 TRACET("laddr={}, paddr {} => {}", t
, laddr
, prev_addr
, addr
);
435 return _update_mapping(
439 const lba_map_val_t
&in
) {
440 assert(!addr
.is_null());
441 lba_map_val_t ret
= in
;
442 ceph_assert(in
.paddr
== prev_addr
);
447 ).si_then([&t
, laddr
, prev_addr
, addr
, FNAME
](auto result
) {
448 DEBUGT("laddr={}, paddr {} => {} done -- {}",
449 t
, laddr
, prev_addr
, addr
, result
);
451 update_mapping_iertr::pass_further
{},
452 /* ENOENT in particular should be impossible */
453 crimson::ct_error::assert_all
{
454 "Invalid error in BtreeLBAManager::update_mapping"
459 BtreeLBAManager::get_physical_extent_if_live_ret
460 BtreeLBAManager::get_physical_extent_if_live(
467 LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live
);
468 DEBUGT("{}, laddr={}, paddr={}, length={}",
469 t
, type
, laddr
, addr
, len
);
470 ceph_assert(is_lba_node(type
));
471 auto c
= get_context(t
);
472 return with_btree_ret
<LBABtree
, CachedExtentRef
>(
475 [c
, type
, addr
, laddr
, len
](auto &btree
) {
476 if (type
== extent_types_t::LADDR_INTERNAL
) {
477 return btree
.get_internal_if_live(c
, addr
, laddr
, len
);
479 assert(type
== extent_types_t::LADDR_LEAF
||
480 type
== extent_types_t::DINK_LADDR_LEAF
);
481 return btree
.get_leaf_if_live(c
, addr
, laddr
, len
);
486 void BtreeLBAManager::register_metrics()
488 LOG_PREFIX(BtreeLBAManager::register_metrics
);
491 namespace sm
= seastar::metrics
;
497 stats
.num_alloc_extents
,
498 sm::description("total number of lba alloc_extent operations")
501 "alloc_extents_iter_nexts",
502 stats
.num_alloc_extents_iter_nexts
,
503 sm::description("total number of iterator next operations during extent allocation")
509 BtreeLBAManager::update_refcount_ret
510 BtreeLBAManager::update_refcount(
515 LOG_PREFIX(BtreeLBAManager::update_refcount
);
516 TRACET("laddr={}, delta={}", t
, addr
, delta
);
517 return _update_mapping(
520 [delta
](const lba_map_val_t
&in
) {
521 lba_map_val_t out
= in
;
522 ceph_assert((int)out
.refcount
+ delta
>= 0);
523 out
.refcount
+= delta
;
527 ).si_then([&t
, addr
, delta
, FNAME
](auto result
) {
528 DEBUGT("laddr={}, delta={} done -- {}", t
, addr
, delta
, result
);
529 return ref_update_result_t
{
537 BtreeLBAManager::_update_mapping_ret
538 BtreeLBAManager::_update_mapping(
542 LogicalCachedExtent
* nextent
)
544 auto c
= get_context(t
);
545 return with_btree_ret
<LBABtree
, lba_map_val_t
>(
548 [f
=std::move(f
), c
, addr
, nextent
](auto &btree
) mutable {
549 return btree
.lower_bound(
551 ).si_then([&btree
, f
=std::move(f
), c
, addr
, nextent
](auto iter
)
552 -> _update_mapping_ret
{
553 if (iter
.is_end() || iter
.get_key() != addr
) {
554 LOG_PREFIX(BtreeLBAManager::_update_mapping
);
555 ERRORT("laddr={} doesn't exist", c
.trans
, addr
);
556 return crimson::ct_error::enoent::make();
559 auto ret
= f(iter
.get_val());
560 if (ret
.refcount
== 0) {
573 ).si_then([ret
](auto) {