]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <sys/mman.h>
5#include <string.h>
6
1e59de90 7#include <seastar/core/metrics.hh>
f67539c2
TL
8
9#include "include/buffer.h"
10#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
20effc67 11#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
1e59de90
TL
12#include "crimson/os/seastore/logging.h"
13
14SET_SUBSYS(seastore_lba);
15/*
16 * levels:
17 * - INFO: mkfs
18 * - DEBUG: modification operations
19 * - TRACE: read operations, DEBUG details
20 */
21
22namespace crimson::os::seastore {
f67539c2 23
1e59de90
TL
24template <typename T>
25Transaction::tree_stats_t& get_tree_stats(Transaction &t)
26{
27 return t.get_lba_tree_stats();
28}
29
30template Transaction::tree_stats_t&
31get_tree_stats<
32 crimson::os::seastore::lba_manager::btree::LBABtree>(
33 Transaction &t);
34
35template <typename T>
36phy_tree_root_t& get_phy_tree_root(root_t &r)
37{
38 return r.lba_root;
39}
f67539c2 40
1e59de90
TL
41template phy_tree_root_t&
42get_phy_tree_root<
43 crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r);
44
45template <>
46const get_phy_tree_root_node_ret get_phy_tree_root_node<
47 crimson::os::seastore::lba_manager::btree::LBABtree>(
48 const RootBlockRef &root_block, op_context_t<laddr_t> c)
49{
50 auto lba_root = root_block->lba_root_node;
51 if (lba_root) {
52 ceph_assert(lba_root->is_initial_pending()
53 == root_block->is_pending());
54 return {true,
55 trans_intr::make_interruptible(
56 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
57 } else if (root_block->is_pending()) {
58 auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance());
59 lba_root = prior.lba_root_node;
60 if (lba_root) {
61 return {true,
62 trans_intr::make_interruptible(
63 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
64 } else {
65 return {false,
66 trans_intr::make_interruptible(
aee94f69
TL
67 Cache::get_extent_ertr::make_ready_future<
68 CachedExtentRef>())};
1e59de90
TL
69 }
70 } else {
71 return {false,
72 trans_intr::make_interruptible(
aee94f69
TL
73 Cache::get_extent_ertr::make_ready_future<
74 CachedExtentRef>())};
f67539c2
TL
75 }
76}
77
1e59de90
TL
78template <typename ROOT>
79void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* lba_root) {
80 root_block->lba_root_node = lba_root;
81 ceph_assert(lba_root != nullptr);
82 lba_root->root_block = root_block;
83}
84
85template void link_phy_tree_root_node(
86 RootBlockRef &root_block, lba_manager::btree::LBAInternalNode* lba_root);
87template void link_phy_tree_root_node(
88 RootBlockRef &root_block, lba_manager::btree::LBALeafNode* lba_root);
89template void link_phy_tree_root_node(
90 RootBlockRef &root_block, lba_manager::btree::LBANode* lba_root);
91
92template <>
93void unlink_phy_tree_root_node<laddr_t>(RootBlockRef &root_block) {
94 root_block->lba_root_node = nullptr;
95}
96
97}
20effc67 98
f67539c2
TL
99namespace crimson::os::seastore::lba_manager::btree {
100
1e59de90
TL
101BtreeLBAManager::mkfs_ret
102BtreeLBAManager::mkfs(
f67539c2
TL
103 Transaction &t)
104{
1e59de90
TL
105 LOG_PREFIX(BtreeLBAManager::mkfs);
106 INFOT("start", t);
20effc67 107 return cache.get_root(t).si_then([this, &t](auto croot) {
1e59de90
TL
108 assert(croot->is_mutation_pending());
109 croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
20effc67
TL
110 return mkfs_iertr::now();
111 }).handle_error_interruptible(
112 mkfs_iertr::pass_further{},
113 crimson::ct_error::assert_all{
114 "Invalid error in BtreeLBAManager::mkfs"
115 }
116 );
f67539c2
TL
117}
118
20effc67
TL
119BtreeLBAManager::get_mappings_ret
120BtreeLBAManager::get_mappings(
f67539c2
TL
121 Transaction &t,
122 laddr_t offset, extent_len_t length)
123{
20effc67 124 LOG_PREFIX(BtreeLBAManager::get_mappings);
1e59de90 125 TRACET("{}~{}", t, offset, length);
20effc67 126 auto c = get_context(t);
1e59de90
TL
127 return with_btree_state<LBABtree, lba_pin_list_t>(
128 cache,
20effc67 129 c,
aee94f69
TL
130 [c, offset, length, FNAME, this](auto &btree, auto &ret) {
131 return seastar::do_with(
132 std::list<BtreeLBAMappingRef>(),
133 [offset, length, c, FNAME, this, &ret, &btree](auto &pin_list) {
134 return LBABtree::iterate_repeat(
135 c,
136 btree.upper_bound_right(c, offset),
137 [&pin_list, offset, length, c, FNAME](auto &pos) {
138 if (pos.is_end() || pos.get_key() >= (offset + length)) {
139 TRACET("{}~{} done with {} results",
140 c.trans, offset, length, pin_list.size());
141 return LBABtree::iterate_repeat_ret_inner(
142 interruptible::ready_future_marker{},
143 seastar::stop_iteration::yes);
144 }
145 TRACET("{}~{} got {}, {}, repeat ...",
146 c.trans, offset, length, pos.get_key(), pos.get_val());
147 ceph_assert((pos.get_key() + pos.get_val().len) > offset);
148 pin_list.push_back(pos.get_pin(c));
149 return LBABtree::iterate_repeat_ret_inner(
20effc67 150 interruptible::ready_future_marker{},
aee94f69
TL
151 seastar::stop_iteration::no);
152 }).si_then([this, &ret, c, &pin_list] {
153 return _get_original_mappings(c, pin_list
154 ).si_then([&ret](auto _ret) {
155 ret = std::move(_ret);
156 });
157 });
20effc67 158 });
f67539c2
TL
159 });
160}
161
aee94f69
TL
162BtreeLBAManager::_get_original_mappings_ret
163BtreeLBAManager::_get_original_mappings(
164 op_context_t<laddr_t> c,
165 std::list<BtreeLBAMappingRef> &pin_list)
166{
167 return seastar::do_with(
168 lba_pin_list_t(),
169 [this, c, &pin_list](auto &ret) {
170 return trans_intr::do_for_each(
171 pin_list,
172 [this, c, &ret](auto &pin) {
173 LOG_PREFIX(BtreeLBAManager::get_mappings);
174 if (pin->get_raw_val().is_paddr()) {
175 ret.emplace_back(std::move(pin));
176 return get_mappings_iertr::now();
177 }
178 TRACET(
179 "getting original mapping for indirect mapping {}~{}",
180 c.trans, pin->get_key(), pin->get_length());
181 return this->get_mappings(
182 c.trans, pin->get_raw_val().get_laddr(), pin->get_length()
183 ).si_then([&pin, &ret, c](auto new_pin_list) {
184 LOG_PREFIX(BtreeLBAManager::get_mappings);
185 assert(new_pin_list.size() == 1);
186 auto &new_pin = new_pin_list.front();
187 auto intermediate_key = pin->get_raw_val().get_laddr();
188 assert(!new_pin->is_indirect());
189 assert(new_pin->get_key() <= intermediate_key);
190 assert(new_pin->get_key() + new_pin->get_length() >=
191 intermediate_key + pin->get_length());
192
193 TRACET("Got mapping {}~{} for indirect mapping {}~{}, "
194 "intermediate_key {}",
195 c.trans,
196 new_pin->get_key(), new_pin->get_length(),
197 pin->get_key(), pin->get_length(),
198 pin->get_raw_val().get_laddr());
199 auto &btree_new_pin = static_cast<BtreeLBAMapping&>(*new_pin);
200 btree_new_pin.set_key_for_indirect(
201 pin->get_key(),
202 pin->get_length(),
203 pin->get_raw_val().get_laddr());
204 ret.emplace_back(std::move(new_pin));
205 return seastar::now();
206 }).handle_error_interruptible(
207 crimson::ct_error::input_output_error::pass_further{},
208 crimson::ct_error::assert_all("unexpected enoent")
209 );
210 }
211 ).si_then([&ret] {
212 return std::move(ret);
213 });
214 });
215}
216
217
f67539c2
TL
218BtreeLBAManager::get_mappings_ret
219BtreeLBAManager::get_mappings(
220 Transaction &t,
221 laddr_list_t &&list)
222{
20effc67 223 LOG_PREFIX(BtreeLBAManager::get_mappings);
1e59de90 224 TRACET("{}", t, list);
f67539c2
TL
225 auto l = std::make_unique<laddr_list_t>(std::move(list));
226 auto retptr = std::make_unique<lba_pin_list_t>();
227 auto &ret = *retptr;
20effc67 228 return trans_intr::do_for_each(
f67539c2
TL
229 l->begin(),
230 l->end(),
231 [this, &t, &ret](const auto &p) {
1e59de90 232 return this->get_mappings(t, p.first, p.second).si_then(
f67539c2
TL
233 [&ret](auto res) {
234 ret.splice(ret.end(), res, res.begin(), res.end());
20effc67 235 return get_mappings_iertr::now();
f67539c2 236 });
20effc67 237 }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
f67539c2
TL
238 return std::move(*retptr);
239 });
240}
241
20effc67
TL
242BtreeLBAManager::get_mapping_ret
243BtreeLBAManager::get_mapping(
f67539c2 244 Transaction &t,
20effc67 245 laddr_t offset)
f67539c2 246{
20effc67 247 LOG_PREFIX(BtreeLBAManager::get_mapping);
1e59de90 248 TRACET("{}", t, offset);
aee94f69
TL
249 return _get_mapping(t, offset
250 ).si_then([](auto pin) {
251 return get_mapping_iertr::make_ready_future<LBAMappingRef>(std::move(pin));
252 });
253}
254
255BtreeLBAManager::_get_mapping_ret
256BtreeLBAManager::_get_mapping(
257 Transaction &t,
258 laddr_t offset)
259{
260 LOG_PREFIX(BtreeLBAManager::_get_mapping);
261 TRACET("{}", t, offset);
20effc67 262 auto c = get_context(t);
aee94f69 263 return with_btree_ret<LBABtree, BtreeLBAMappingRef>(
1e59de90 264 cache,
20effc67 265 c,
aee94f69 266 [FNAME, c, offset, this](auto &btree) {
20effc67
TL
267 return btree.lower_bound(
268 c, offset
aee94f69 269 ).si_then([FNAME, offset, c](auto iter) -> _get_mapping_ret {
20effc67 270 if (iter.is_end() || iter.get_key() != offset) {
1e59de90 271 ERRORT("laddr={} doesn't exist", c.trans, offset);
20effc67
TL
272 return crimson::ct_error::enoent::make();
273 } else {
1e59de90
TL
274 TRACET("{} got {}, {}",
275 c.trans, offset, iter.get_key(), iter.get_val());
276 auto e = iter.get_pin(c);
aee94f69 277 return _get_mapping_ret(
20effc67
TL
278 interruptible::ready_future_marker{},
279 std::move(e));
280 }
aee94f69
TL
281 }).si_then([this, c](auto pin) -> _get_mapping_ret {
282 if (pin->get_raw_val().is_laddr()) {
283 return seastar::do_with(
284 std::move(pin),
285 [this, c](auto &pin) {
286 return _get_mapping(
287 c.trans, pin->get_raw_val().get_laddr()
288 ).si_then([&pin](auto new_pin) {
289 ceph_assert(pin->get_length() == new_pin->get_length());
290 new_pin->set_key_for_indirect(
291 pin->get_key(),
292 pin->get_length());
293 return new_pin;
294 });
295 });
296 } else {
297 return get_mapping_iertr::make_ready_future<BtreeLBAMappingRef>(std::move(pin));
298 }
f67539c2
TL
299 });
300 });
301}
302
20effc67 303BtreeLBAManager::alloc_extent_ret
aee94f69 304BtreeLBAManager::_alloc_extent(
f67539c2 305 Transaction &t,
20effc67
TL
306 laddr_t hint,
307 extent_len_t len,
aee94f69
TL
308 pladdr_t addr,
309 paddr_t actual_addr,
310 laddr_t intermediate_base,
1e59de90 311 LogicalCachedExtent* nextent)
f67539c2 312{
20effc67
TL
313 struct state_t {
314 laddr_t last_end;
315
1e59de90
TL
316 std::optional<typename LBABtree::iterator> insert_iter;
317 std::optional<typename LBABtree::iterator> ret;
20effc67
TL
318
319 state_t(laddr_t hint) : last_end(hint) {}
320 };
321
aee94f69 322 LOG_PREFIX(BtreeLBAManager::_alloc_extent);
1e59de90 323 TRACET("{}~{}, hint={}", t, addr, len, hint);
20effc67 324 auto c = get_context(t);
1e59de90
TL
325 ++stats.num_alloc_extents;
326 auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
327 return crimson::os::seastore::with_btree_state<LBABtree, state_t>(
328 cache,
20effc67
TL
329 c,
330 hint,
1e59de90
TL
331 [this, FNAME, c, hint, len, addr, lookup_attempts,
332 &t, nextent](auto &btree, auto &state) {
20effc67
TL
333 return LBABtree::iterate_repeat(
334 c,
335 btree.upper_bound_right(c, hint),
1e59de90
TL
336 [this, &state, len, addr, &t, hint, FNAME, lookup_attempts](auto &pos) {
337 ++stats.num_alloc_extents_iter_nexts;
338 if (pos.is_end()) {
339 DEBUGT("{}~{}, hint={}, state: end, done with {} attempts, insert at {}",
340 t, addr, len, hint,
341 stats.num_alloc_extents_iter_nexts - lookup_attempts,
342 state.last_end);
343 state.insert_iter = pos;
344 return typename LBABtree::iterate_repeat_ret_inner(
345 interruptible::ready_future_marker{},
346 seastar::stop_iteration::yes);
347 } else if (pos.get_key() >= (state.last_end + len)) {
348 DEBUGT("{}~{}, hint={}, state: {}~{}, done with {} attempts, insert at {} -- {}",
349 t, addr, len, hint,
350 pos.get_key(), pos.get_val().len,
351 stats.num_alloc_extents_iter_nexts - lookup_attempts,
20effc67 352 state.last_end,
1e59de90 353 pos.get_val());
20effc67 354 state.insert_iter = pos;
1e59de90 355 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
356 interruptible::ready_future_marker{},
357 seastar::stop_iteration::yes);
358 } else {
359 state.last_end = pos.get_key() + pos.get_val().len;
1e59de90
TL
360 TRACET("{}~{}, hint={}, state: {}~{}, repeat ... -- {}",
361 t, addr, len, hint,
362 pos.get_key(), pos.get_val().len,
363 pos.get_val());
364 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
365 interruptible::ready_future_marker{},
366 seastar::stop_iteration::no);
367 }
1e59de90 368 }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] {
20effc67
TL
369 return btree.insert(
370 c,
371 *state.insert_iter,
372 state.last_end,
aee94f69 373 lba_map_val_t{len, pladdr_t(addr), 1, 0},
1e59de90
TL
374 nextent
375 ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) {
20effc67 376 auto [iter, inserted] = std::move(p);
1e59de90
TL
377 TRACET("{}~{}, hint={}, inserted at {}",
378 c.trans, addr, len, hint, state.last_end);
379 if (nextent) {
aee94f69 380 ceph_assert(addr.is_paddr());
1e59de90
TL
381 nextent->set_laddr(iter.get_key());
382 }
20effc67
TL
383 ceph_assert(inserted);
384 state.ret = iter;
385 });
386 });
aee94f69
TL
387 }).si_then([c, actual_addr, addr, intermediate_base](auto &&state) {
388 auto ret_pin = state.ret->get_pin(c);
389 if (actual_addr != P_ADDR_NULL) {
390 ceph_assert(addr.is_laddr());
391 ret_pin->set_paddr(actual_addr);
392 ret_pin->set_intermediate_base(intermediate_base);
393 } else {
394 ceph_assert(addr.is_paddr());
395 }
396 return alloc_extent_iertr::make_ready_future<LBAMappingRef>(
397 std::move(ret_pin));
f67539c2
TL
398 });
399}
400
f67539c2
TL
401static bool is_lba_node(const CachedExtent &e)
402{
403 return is_lba_node(e.get_type());
404}
405
1e59de90
TL
406BtreeLBAManager::base_iertr::template future<>
407_init_cached_extent(
408 op_context_t<laddr_t> c,
409 const CachedExtentRef &e,
410 LBABtree &btree,
411 bool &ret)
f67539c2 412{
1e59de90
TL
413 if (e->is_logical()) {
414 auto logn = e->cast<LogicalCachedExtent>();
415 return btree.lower_bound(
416 c,
417 logn->get_laddr()
418 ).si_then([e, c, logn, &ret](auto iter) {
419 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
420 if (!iter.is_end() &&
421 iter.get_key() == logn->get_laddr() &&
aee94f69
TL
422 iter.get_val().pladdr.is_paddr() &&
423 iter.get_val().pladdr.get_paddr() == logn->get_paddr()) {
1e59de90
TL
424 assert(!iter.get_leaf_node()->is_pending());
425 iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos());
426 logn->set_laddr(iter.get_pin(c)->get_key());
427 ceph_assert(iter.get_val().len == e->get_length());
428 DEBUGT("logical extent {} live", c.trans, *logn);
429 ret = true;
430 } else {
431 DEBUGT("logical extent {} not live", c.trans, *logn);
432 ret = false;
433 }
434 });
f67539c2 435 } else {
1e59de90
TL
436 return btree.init_cached_extent(c, e
437 ).si_then([&ret](bool is_alive) {
438 ret = is_alive;
439 });
f67539c2 440 }
f67539c2
TL
441}
442
1e59de90
TL
443BtreeLBAManager::init_cached_extent_ret
444BtreeLBAManager::init_cached_extent(
f67539c2
TL
445 Transaction &t,
446 CachedExtentRef e)
447{
20effc67 448 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
1e59de90
TL
449 TRACET("{}", t, *e);
450 return seastar::do_with(bool(), [this, e, &t](bool &ret) {
451 auto c = get_context(t);
452 return with_btree<LBABtree>(
453 cache, c,
454 [c, e, &ret](auto &btree) -> base_iertr::future<> {
455 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
456 DEBUGT("extent {}", c.trans, *e);
457 return _init_cached_extent(c, e, btree, ret);
458 }
459 ).si_then([&ret] { return ret; });
460 });
461}
462
463BtreeLBAManager::check_child_trackers_ret
464BtreeLBAManager::check_child_trackers(
465 Transaction &t) {
20effc67 466 auto c = get_context(t);
1e59de90
TL
467 return with_btree<LBABtree>(
468 cache, c,
469 [c](auto &btree) {
470 return btree.check_child_trackers(c);
471 });
f67539c2
TL
472}
473
1e59de90
TL
474BtreeLBAManager::scan_mappings_ret
475BtreeLBAManager::scan_mappings(
f67539c2
TL
476 Transaction &t,
477 laddr_t begin,
478 laddr_t end,
479 scan_mappings_func_t &&f)
480{
20effc67
TL
481 LOG_PREFIX(BtreeLBAManager::scan_mappings);
482 DEBUGT("begin: {}, end: {}", t, begin, end);
483
484 auto c = get_context(t);
1e59de90
TL
485 return with_btree<LBABtree>(
486 cache,
20effc67
TL
487 c,
488 [c, f=std::move(f), begin, end](auto &btree) mutable {
489 return LBABtree::iterate_repeat(
490 c,
491 btree.upper_bound_right(c, begin),
20effc67
TL
492 [f=std::move(f), begin, end](auto &pos) {
493 if (pos.is_end() || pos.get_key() >= end) {
1e59de90 494 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
495 interruptible::ready_future_marker{},
496 seastar::stop_iteration::yes);
497 }
498 ceph_assert((pos.get_key() + pos.get_val().len) > begin);
aee94f69
TL
499 f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len);
500 return LBABtree::iterate_repeat_ret_inner(
20effc67
TL
501 interruptible::ready_future_marker{},
502 seastar::stop_iteration::no);
f67539c2
TL
503 });
504 });
505}
506
1e59de90
TL
507BtreeLBAManager::rewrite_extent_ret
508BtreeLBAManager::rewrite_extent(
f67539c2
TL
509 Transaction &t,
510 CachedExtentRef extent)
511{
20effc67
TL
512 LOG_PREFIX(BtreeLBAManager::rewrite_extent);
513 if (extent->has_been_invalidated()) {
1e59de90
TL
514 ERRORT("extent has been invalidated -- {}", t, *extent);
515 ceph_abort();
20effc67 516 }
20effc67
TL
517 assert(!extent->is_logical());
518
20effc67 519 if (is_lba_node(*extent)) {
1e59de90 520 DEBUGT("rewriting lba extent -- {}", t, *extent);
20effc67 521 auto c = get_context(t);
1e59de90
TL
522 return with_btree<LBABtree>(
523 cache,
20effc67
TL
524 c,
525 [c, extent](auto &btree) mutable {
1e59de90 526 return btree.rewrite_extent(c, extent);
20effc67 527 });
f67539c2 528 } else {
1e59de90 529 DEBUGT("skip non lba extent -- {}", t, *extent);
20effc67 530 return rewrite_extent_iertr::now();
f67539c2
TL
531 }
532}
533
1e59de90 534BtreeLBAManager::update_mapping_ret
20effc67
TL
535BtreeLBAManager::update_mapping(
536 Transaction& t,
537 laddr_t laddr,
538 paddr_t prev_addr,
1e59de90
TL
539 paddr_t addr,
540 LogicalCachedExtent *nextent)
20effc67 541{
1e59de90
TL
542 LOG_PREFIX(BtreeLBAManager::update_mapping);
543 TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr);
544 return _update_mapping(
20effc67
TL
545 t,
546 laddr,
547 [prev_addr, addr](
548 const lba_map_val_t &in) {
549 assert(!addr.is_null());
550 lba_map_val_t ret = in;
aee94f69
TL
551 ceph_assert(in.pladdr.is_paddr());
552 ceph_assert(in.pladdr.get_paddr() == prev_addr);
553 ret.pladdr = addr;
20effc67 554 return ret;
1e59de90
TL
555 },
556 nextent
557 ).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
558 DEBUGT("laddr={}, paddr {} => {} done -- {}",
559 t, laddr, prev_addr, addr, result);
560 },
561 update_mapping_iertr::pass_further{},
562 /* ENOENT in particular should be impossible */
563 crimson::ct_error::assert_all{
564 "Invalid error in BtreeLBAManager::update_mapping"
565 }
566 );
20effc67
TL
567}
568
f67539c2
TL
569BtreeLBAManager::get_physical_extent_if_live_ret
570BtreeLBAManager::get_physical_extent_if_live(
571 Transaction &t,
572 extent_types_t type,
573 paddr_t addr,
574 laddr_t laddr,
1e59de90 575 extent_len_t len)
f67539c2 576{
1e59de90
TL
577 LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live);
578 DEBUGT("{}, laddr={}, paddr={}, length={}",
579 t, type, laddr, addr, len);
f67539c2 580 ceph_assert(is_lba_node(type));
20effc67 581 auto c = get_context(t);
1e59de90
TL
582 return with_btree_ret<LBABtree, CachedExtentRef>(
583 cache,
20effc67
TL
584 c,
585 [c, type, addr, laddr, len](auto &btree) {
586 if (type == extent_types_t::LADDR_INTERNAL) {
587 return btree.get_internal_if_live(c, addr, laddr, len);
588 } else {
1e59de90
TL
589 assert(type == extent_types_t::LADDR_LEAF ||
590 type == extent_types_t::DINK_LADDR_LEAF);
20effc67
TL
591 return btree.get_leaf_if_live(c, addr, laddr, len);
592 }
f67539c2 593 });
f67539c2
TL
594}
595
20effc67 596void BtreeLBAManager::register_metrics()
f67539c2 597{
1e59de90
TL
598 LOG_PREFIX(BtreeLBAManager::register_metrics);
599 DEBUG("start");
600 stats = {};
20effc67
TL
601 namespace sm = seastar::metrics;
602 metrics.add_group(
603 "LBA",
604 {
605 sm::make_counter(
606 "alloc_extents",
1e59de90 607 stats.num_alloc_extents,
20effc67
TL
608 sm::description("total number of lba alloc_extent operations")
609 ),
610 sm::make_counter(
611 "alloc_extents_iter_nexts",
1e59de90 612 stats.num_alloc_extents_iter_nexts,
20effc67
TL
613 sm::description("total number of iterator next operations during extent allocation")
614 ),
615 }
616 );
f67539c2
TL
617}
618
aee94f69
TL
619BtreeLBAManager::ref_iertr::future<std::optional<std::pair<paddr_t, extent_len_t>>>
620BtreeLBAManager::_decref_intermediate(
621 Transaction &t,
622 laddr_t addr,
623 extent_len_t len)
624{
625 auto c = get_context(t);
626 return with_btree<LBABtree>(
627 cache,
628 c,
629 [c, addr, len](auto &btree) mutable {
630 return btree.upper_bound_right(
631 c, addr
632 ).si_then([&btree, addr, len, c](auto iter) {
633 return seastar::do_with(
634 std::move(iter),
635 [&btree, addr, len, c](auto &iter) {
636 ceph_assert(!iter.is_end());
637 ceph_assert(iter.get_key() <= addr);
638 auto val = iter.get_val();
639 ceph_assert(iter.get_key() + val.len >= addr + len);
640 ceph_assert(val.pladdr.is_paddr());
641 ceph_assert(val.refcount >= 1);
642 val.refcount -= 1;
643
644 LOG_PREFIX(BtreeLBAManager::_decref_intermediate);
645 TRACET("decreased refcount of intermediate key {} -- {}",
646 c.trans,
647 iter.get_key(),
648 val);
649
650 if (!val.refcount) {
651 return btree.remove(c, iter
652 ).si_then([val] {
653 return std::make_optional<
654 std::pair<paddr_t, extent_len_t>>(
655 val.pladdr.get_paddr(), val.len);
656 });
657 } else {
658 return btree.update(c, iter, val, nullptr
659 ).si_then([](auto) {
660 return seastar::make_ready_future<
661 std::optional<std::pair<paddr_t, extent_len_t>>>(std::nullopt);
662 });
663 }
664 });
665 });
666 });
667}
668
1e59de90
TL
669BtreeLBAManager::update_refcount_ret
670BtreeLBAManager::update_refcount(
f67539c2
TL
671 Transaction &t,
672 laddr_t addr,
aee94f69
TL
673 int delta,
674 bool cascade_remove)
f67539c2 675{
20effc67 676 LOG_PREFIX(BtreeLBAManager::update_refcount);
1e59de90
TL
677 TRACET("laddr={}, delta={}", t, addr, delta);
678 return _update_mapping(
f67539c2
TL
679 t,
680 addr,
681 [delta](const lba_map_val_t &in) {
682 lba_map_val_t out = in;
683 ceph_assert((int)out.refcount + delta >= 0);
684 out.refcount += delta;
685 return out;
1e59de90
TL
686 },
687 nullptr
aee94f69 688 ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto result) {
1e59de90 689 DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
aee94f69
TL
690 auto fut = ref_iertr::make_ready_future<
691 std::optional<std::pair<paddr_t, extent_len_t>>>();
692 if (!result.refcount && result.pladdr.is_laddr() && cascade_remove) {
693 fut = _decref_intermediate(
694 t,
695 result.pladdr.get_laddr(),
696 result.len
697 );
698 }
699 return fut.si_then([result](auto removed) {
700 if (result.pladdr.is_laddr()
701 && removed) {
702 return ref_update_result_t{
703 result.refcount,
704 removed->first,
705 removed->second};
706 } else {
707 return ref_update_result_t{
708 result.refcount,
709 result.pladdr,
710 result.len
711 };
712 }
713 });
1e59de90 714 });
f67539c2
TL
715}
716
1e59de90
TL
717BtreeLBAManager::_update_mapping_ret
718BtreeLBAManager::_update_mapping(
f67539c2
TL
719 Transaction &t,
720 laddr_t addr,
1e59de90
TL
721 update_func_t &&f,
722 LogicalCachedExtent* nextent)
f67539c2 723{
20effc67 724 auto c = get_context(t);
1e59de90
TL
725 return with_btree_ret<LBABtree, lba_map_val_t>(
726 cache,
20effc67 727 c,
1e59de90 728 [f=std::move(f), c, addr, nextent](auto &btree) mutable {
20effc67
TL
729 return btree.lower_bound(
730 c, addr
1e59de90
TL
731 ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
732 -> _update_mapping_ret {
20effc67 733 if (iter.is_end() || iter.get_key() != addr) {
1e59de90
TL
734 LOG_PREFIX(BtreeLBAManager::_update_mapping);
735 ERRORT("laddr={} doesn't exist", c.trans, addr);
20effc67
TL
736 return crimson::ct_error::enoent::make();
737 }
738
739 auto ret = f(iter.get_val());
740 if (ret.refcount == 0) {
741 return btree.remove(
742 c,
743 iter
744 ).si_then([ret] {
745 return ret;
746 });
747 } else {
748 return btree.update(
749 c,
750 iter,
1e59de90
TL
751 ret,
752 nextent
20effc67
TL
753 ).si_then([ret](auto) {
754 return ret;
755 });
756 }
757 });
758 });
f67539c2
TL
759}
760
f67539c2 761}