]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <sys/mman.h>
5#include <string.h>
6
1e59de90 7#include <seastar/core/metrics.hh>
f67539c2
TL
8
9#include "include/buffer.h"
10#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
20effc67 11#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
1e59de90
TL
12#include "crimson/os/seastore/logging.h"
13
14SET_SUBSYS(seastore_lba);
15/*
16 * levels:
17 * - INFO: mkfs
18 * - DEBUG: modification operations
19 * - TRACE: read operations, DEBUG details
20 */
21
22namespace crimson::os::seastore {
f67539c2 23
1e59de90
TL
24template <typename T>
25Transaction::tree_stats_t& get_tree_stats(Transaction &t)
26{
27 return t.get_lba_tree_stats();
28}
29
30template Transaction::tree_stats_t&
31get_tree_stats<
32 crimson::os::seastore::lba_manager::btree::LBABtree>(
33 Transaction &t);
34
35template <typename T>
36phy_tree_root_t& get_phy_tree_root(root_t &r)
37{
38 return r.lba_root;
39}
f67539c2 40
1e59de90
TL
41template phy_tree_root_t&
42get_phy_tree_root<
43 crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r);
44
45template <>
46const get_phy_tree_root_node_ret get_phy_tree_root_node<
47 crimson::os::seastore::lba_manager::btree::LBABtree>(
48 const RootBlockRef &root_block, op_context_t<laddr_t> c)
49{
50 auto lba_root = root_block->lba_root_node;
51 if (lba_root) {
52 ceph_assert(lba_root->is_initial_pending()
53 == root_block->is_pending());
54 return {true,
55 trans_intr::make_interruptible(
56 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
57 } else if (root_block->is_pending()) {
58 auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance());
59 lba_root = prior.lba_root_node;
60 if (lba_root) {
61 return {true,
62 trans_intr::make_interruptible(
63 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
64 } else {
65 return {false,
66 trans_intr::make_interruptible(
67 seastar::make_ready_future<
68 CachedExtentRef>(CachedExtentRef()))};
69 }
70 } else {
71 return {false,
72 trans_intr::make_interruptible(
73 seastar::make_ready_future<
74 CachedExtentRef>(CachedExtentRef()))};
f67539c2
TL
75 }
76}
77
1e59de90
TL
78template <typename ROOT>
79void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* lba_root) {
80 root_block->lba_root_node = lba_root;
81 ceph_assert(lba_root != nullptr);
82 lba_root->root_block = root_block;
83}
84
85template void link_phy_tree_root_node(
86 RootBlockRef &root_block, lba_manager::btree::LBAInternalNode* lba_root);
87template void link_phy_tree_root_node(
88 RootBlockRef &root_block, lba_manager::btree::LBALeafNode* lba_root);
89template void link_phy_tree_root_node(
90 RootBlockRef &root_block, lba_manager::btree::LBANode* lba_root);
91
92template <>
93void unlink_phy_tree_root_node<laddr_t>(RootBlockRef &root_block) {
94 root_block->lba_root_node = nullptr;
95}
96
97}
20effc67 98
f67539c2
TL
99namespace crimson::os::seastore::lba_manager::btree {
100
1e59de90
TL
101BtreeLBAManager::mkfs_ret
102BtreeLBAManager::mkfs(
f67539c2
TL
103 Transaction &t)
104{
1e59de90
TL
105 LOG_PREFIX(BtreeLBAManager::mkfs);
106 INFOT("start", t);
20effc67 107 return cache.get_root(t).si_then([this, &t](auto croot) {
1e59de90
TL
108 assert(croot->is_mutation_pending());
109 croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
20effc67
TL
110 return mkfs_iertr::now();
111 }).handle_error_interruptible(
112 mkfs_iertr::pass_further{},
113 crimson::ct_error::assert_all{
114 "Invalid error in BtreeLBAManager::mkfs"
115 }
116 );
f67539c2
TL
117}
118
20effc67
TL
119BtreeLBAManager::get_mappings_ret
120BtreeLBAManager::get_mappings(
f67539c2
TL
121 Transaction &t,
122 laddr_t offset, extent_len_t length)
123{
20effc67 124 LOG_PREFIX(BtreeLBAManager::get_mappings);
1e59de90 125 TRACET("{}~{}", t, offset, length);
20effc67 126 auto c = get_context(t);
1e59de90
TL
127 return with_btree_state<LBABtree, lba_pin_list_t>(
128 cache,
20effc67 129 c,
1e59de90 130 [c, offset, length, FNAME](auto &btree, auto &ret) {
20effc67
TL
131 return LBABtree::iterate_repeat(
132 c,
133 btree.upper_bound_right(c, offset),
1e59de90 134 [&ret, offset, length, c, FNAME](auto &pos) {
20effc67 135 if (pos.is_end() || pos.get_key() >= (offset + length)) {
1e59de90
TL
136 TRACET("{}~{} done with {} results",
137 c.trans, offset, length, ret.size());
138 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
139 interruptible::ready_future_marker{},
140 seastar::stop_iteration::yes);
141 }
1e59de90
TL
142 TRACET("{}~{} got {}, {}, repeat ...",
143 c.trans, offset, length, pos.get_key(), pos.get_val());
20effc67 144 ceph_assert((pos.get_key() + pos.get_val().len) > offset);
1e59de90
TL
145 ret.push_back(pos.get_pin(c));
146 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
147 interruptible::ready_future_marker{},
148 seastar::stop_iteration::no);
149 });
f67539c2
TL
150 });
151}
152
f67539c2
TL
153BtreeLBAManager::get_mappings_ret
154BtreeLBAManager::get_mappings(
155 Transaction &t,
156 laddr_list_t &&list)
157{
20effc67 158 LOG_PREFIX(BtreeLBAManager::get_mappings);
1e59de90 159 TRACET("{}", t, list);
f67539c2
TL
160 auto l = std::make_unique<laddr_list_t>(std::move(list));
161 auto retptr = std::make_unique<lba_pin_list_t>();
162 auto &ret = *retptr;
20effc67 163 return trans_intr::do_for_each(
f67539c2
TL
164 l->begin(),
165 l->end(),
166 [this, &t, &ret](const auto &p) {
1e59de90 167 return this->get_mappings(t, p.first, p.second).si_then(
f67539c2
TL
168 [&ret](auto res) {
169 ret.splice(ret.end(), res, res.begin(), res.end());
20effc67 170 return get_mappings_iertr::now();
f67539c2 171 });
20effc67 172 }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
f67539c2
TL
173 return std::move(*retptr);
174 });
175}
176
20effc67
TL
177BtreeLBAManager::get_mapping_ret
178BtreeLBAManager::get_mapping(
f67539c2 179 Transaction &t,
20effc67 180 laddr_t offset)
f67539c2 181{
20effc67 182 LOG_PREFIX(BtreeLBAManager::get_mapping);
1e59de90 183 TRACET("{}", t, offset);
20effc67 184 auto c = get_context(t);
1e59de90
TL
185 return with_btree_ret<LBABtree, LBAMappingRef>(
186 cache,
20effc67
TL
187 c,
188 [FNAME, c, offset](auto &btree) {
189 return btree.lower_bound(
190 c, offset
191 ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret {
192 if (iter.is_end() || iter.get_key() != offset) {
1e59de90 193 ERRORT("laddr={} doesn't exist", c.trans, offset);
20effc67
TL
194 return crimson::ct_error::enoent::make();
195 } else {
1e59de90
TL
196 TRACET("{} got {}, {}",
197 c.trans, offset, iter.get_key(), iter.get_val());
198 auto e = iter.get_pin(c);
20effc67
TL
199 return get_mapping_ret(
200 interruptible::ready_future_marker{},
201 std::move(e));
202 }
f67539c2
TL
203 });
204 });
205}
206
20effc67
TL
207BtreeLBAManager::alloc_extent_ret
208BtreeLBAManager::alloc_extent(
f67539c2 209 Transaction &t,
20effc67
TL
210 laddr_t hint,
211 extent_len_t len,
1e59de90
TL
212 paddr_t addr,
213 LogicalCachedExtent* nextent)
f67539c2 214{
20effc67
TL
215 struct state_t {
216 laddr_t last_end;
217
1e59de90
TL
218 std::optional<typename LBABtree::iterator> insert_iter;
219 std::optional<typename LBABtree::iterator> ret;
20effc67
TL
220
221 state_t(laddr_t hint) : last_end(hint) {}
222 };
223
224 LOG_PREFIX(BtreeLBAManager::alloc_extent);
1e59de90 225 TRACET("{}~{}, hint={}", t, addr, len, hint);
20effc67 226 auto c = get_context(t);
1e59de90
TL
227 ++stats.num_alloc_extents;
228 auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
229 return crimson::os::seastore::with_btree_state<LBABtree, state_t>(
230 cache,
20effc67
TL
231 c,
232 hint,
1e59de90
TL
233 [this, FNAME, c, hint, len, addr, lookup_attempts,
234 &t, nextent](auto &btree, auto &state) {
20effc67
TL
235 return LBABtree::iterate_repeat(
236 c,
237 btree.upper_bound_right(c, hint),
1e59de90
TL
238 [this, &state, len, addr, &t, hint, FNAME, lookup_attempts](auto &pos) {
239 ++stats.num_alloc_extents_iter_nexts;
240 if (pos.is_end()) {
241 DEBUGT("{}~{}, hint={}, state: end, done with {} attempts, insert at {}",
242 t, addr, len, hint,
243 stats.num_alloc_extents_iter_nexts - lookup_attempts,
244 state.last_end);
245 state.insert_iter = pos;
246 return typename LBABtree::iterate_repeat_ret_inner(
247 interruptible::ready_future_marker{},
248 seastar::stop_iteration::yes);
249 } else if (pos.get_key() >= (state.last_end + len)) {
250 DEBUGT("{}~{}, hint={}, state: {}~{}, done with {} attempts, insert at {} -- {}",
251 t, addr, len, hint,
252 pos.get_key(), pos.get_val().len,
253 stats.num_alloc_extents_iter_nexts - lookup_attempts,
20effc67 254 state.last_end,
1e59de90 255 pos.get_val());
20effc67 256 state.insert_iter = pos;
1e59de90 257 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
258 interruptible::ready_future_marker{},
259 seastar::stop_iteration::yes);
260 } else {
261 state.last_end = pos.get_key() + pos.get_val().len;
1e59de90
TL
262 TRACET("{}~{}, hint={}, state: {}~{}, repeat ... -- {}",
263 t, addr, len, hint,
264 pos.get_key(), pos.get_val().len,
265 pos.get_val());
266 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
267 interruptible::ready_future_marker{},
268 seastar::stop_iteration::no);
269 }
1e59de90 270 }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] {
20effc67
TL
271 return btree.insert(
272 c,
273 *state.insert_iter,
274 state.last_end,
1e59de90
TL
275 lba_map_val_t{len, addr, 1, 0},
276 nextent
277 ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) {
20effc67 278 auto [iter, inserted] = std::move(p);
1e59de90
TL
279 TRACET("{}~{}, hint={}, inserted at {}",
280 c.trans, addr, len, hint, state.last_end);
281 if (nextent) {
282 nextent->set_laddr(iter.get_key());
283 }
20effc67
TL
284 ceph_assert(inserted);
285 state.ret = iter;
286 });
287 });
1e59de90
TL
288 }).si_then([c](auto &&state) {
289 return state.ret->get_pin(c);
f67539c2
TL
290 });
291}
292
f67539c2
TL
293static bool is_lba_node(const CachedExtent &e)
294{
295 return is_lba_node(e.get_type());
296}
297
1e59de90
TL
298BtreeLBAManager::base_iertr::template future<>
299_init_cached_extent(
300 op_context_t<laddr_t> c,
301 const CachedExtentRef &e,
302 LBABtree &btree,
303 bool &ret)
f67539c2 304{
1e59de90
TL
305 if (e->is_logical()) {
306 auto logn = e->cast<LogicalCachedExtent>();
307 return btree.lower_bound(
308 c,
309 logn->get_laddr()
310 ).si_then([e, c, logn, &ret](auto iter) {
311 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
312 if (!iter.is_end() &&
313 iter.get_key() == logn->get_laddr() &&
314 iter.get_val().paddr == logn->get_paddr()) {
315 assert(!iter.get_leaf_node()->is_pending());
316 iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos());
317 logn->set_laddr(iter.get_pin(c)->get_key());
318 ceph_assert(iter.get_val().len == e->get_length());
319 DEBUGT("logical extent {} live", c.trans, *logn);
320 ret = true;
321 } else {
322 DEBUGT("logical extent {} not live", c.trans, *logn);
323 ret = false;
324 }
325 });
f67539c2 326 } else {
1e59de90
TL
327 return btree.init_cached_extent(c, e
328 ).si_then([&ret](bool is_alive) {
329 ret = is_alive;
330 });
f67539c2 331 }
f67539c2
TL
332}
333
1e59de90
TL
334BtreeLBAManager::init_cached_extent_ret
335BtreeLBAManager::init_cached_extent(
f67539c2
TL
336 Transaction &t,
337 CachedExtentRef e)
338{
20effc67 339 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
1e59de90
TL
340 TRACET("{}", t, *e);
341 return seastar::do_with(bool(), [this, e, &t](bool &ret) {
342 auto c = get_context(t);
343 return with_btree<LBABtree>(
344 cache, c,
345 [c, e, &ret](auto &btree) -> base_iertr::future<> {
346 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
347 DEBUGT("extent {}", c.trans, *e);
348 return _init_cached_extent(c, e, btree, ret);
349 }
350 ).si_then([&ret] { return ret; });
351 });
352}
353
354BtreeLBAManager::check_child_trackers_ret
355BtreeLBAManager::check_child_trackers(
356 Transaction &t) {
20effc67 357 auto c = get_context(t);
1e59de90
TL
358 return with_btree<LBABtree>(
359 cache, c,
360 [c](auto &btree) {
361 return btree.check_child_trackers(c);
362 });
f67539c2
TL
363}
364
1e59de90
TL
365BtreeLBAManager::scan_mappings_ret
366BtreeLBAManager::scan_mappings(
f67539c2
TL
367 Transaction &t,
368 laddr_t begin,
369 laddr_t end,
370 scan_mappings_func_t &&f)
371{
20effc67
TL
372 LOG_PREFIX(BtreeLBAManager::scan_mappings);
373 DEBUGT("begin: {}, end: {}", t, begin, end);
374
375 auto c = get_context(t);
1e59de90
TL
376 return with_btree<LBABtree>(
377 cache,
20effc67
TL
378 c,
379 [c, f=std::move(f), begin, end](auto &btree) mutable {
380 return LBABtree::iterate_repeat(
381 c,
382 btree.upper_bound_right(c, begin),
20effc67
TL
383 [f=std::move(f), begin, end](auto &pos) {
384 if (pos.is_end() || pos.get_key() >= end) {
1e59de90 385 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
386 interruptible::ready_future_marker{},
387 seastar::stop_iteration::yes);
388 }
389 ceph_assert((pos.get_key() + pos.get_val().len) > begin);
390 f(pos.get_key(), pos.get_val().paddr, pos.get_val().len);
1e59de90 391 return typename LBABtree::iterate_repeat_ret_inner(
20effc67
TL
392 interruptible::ready_future_marker{},
393 seastar::stop_iteration::no);
f67539c2
TL
394 });
395 });
396}
397
1e59de90
TL
398BtreeLBAManager::rewrite_extent_ret
399BtreeLBAManager::rewrite_extent(
f67539c2
TL
400 Transaction &t,
401 CachedExtentRef extent)
402{
20effc67
TL
403 LOG_PREFIX(BtreeLBAManager::rewrite_extent);
404 if (extent->has_been_invalidated()) {
1e59de90
TL
405 ERRORT("extent has been invalidated -- {}", t, *extent);
406 ceph_abort();
20effc67 407 }
20effc67
TL
408 assert(!extent->is_logical());
409
20effc67 410 if (is_lba_node(*extent)) {
1e59de90 411 DEBUGT("rewriting lba extent -- {}", t, *extent);
20effc67 412 auto c = get_context(t);
1e59de90
TL
413 return with_btree<LBABtree>(
414 cache,
20effc67
TL
415 c,
416 [c, extent](auto &btree) mutable {
1e59de90 417 return btree.rewrite_extent(c, extent);
20effc67 418 });
f67539c2 419 } else {
1e59de90 420 DEBUGT("skip non lba extent -- {}", t, *extent);
20effc67 421 return rewrite_extent_iertr::now();
f67539c2
TL
422 }
423}
424
1e59de90 425BtreeLBAManager::update_mapping_ret
20effc67
TL
426BtreeLBAManager::update_mapping(
427 Transaction& t,
428 laddr_t laddr,
429 paddr_t prev_addr,
1e59de90
TL
430 paddr_t addr,
431 LogicalCachedExtent *nextent)
20effc67 432{
1e59de90
TL
433 LOG_PREFIX(BtreeLBAManager::update_mapping);
434 TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr);
435 return _update_mapping(
20effc67
TL
436 t,
437 laddr,
438 [prev_addr, addr](
439 const lba_map_val_t &in) {
440 assert(!addr.is_null());
441 lba_map_val_t ret = in;
442 ceph_assert(in.paddr == prev_addr);
443 ret.paddr = addr;
444 return ret;
1e59de90
TL
445 },
446 nextent
447 ).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
448 DEBUGT("laddr={}, paddr {} => {} done -- {}",
449 t, laddr, prev_addr, addr, result);
450 },
451 update_mapping_iertr::pass_further{},
452 /* ENOENT in particular should be impossible */
453 crimson::ct_error::assert_all{
454 "Invalid error in BtreeLBAManager::update_mapping"
455 }
456 );
20effc67
TL
457}
458
f67539c2
TL
459BtreeLBAManager::get_physical_extent_if_live_ret
460BtreeLBAManager::get_physical_extent_if_live(
461 Transaction &t,
462 extent_types_t type,
463 paddr_t addr,
464 laddr_t laddr,
1e59de90 465 extent_len_t len)
f67539c2 466{
1e59de90
TL
467 LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live);
468 DEBUGT("{}, laddr={}, paddr={}, length={}",
469 t, type, laddr, addr, len);
f67539c2 470 ceph_assert(is_lba_node(type));
20effc67 471 auto c = get_context(t);
1e59de90
TL
472 return with_btree_ret<LBABtree, CachedExtentRef>(
473 cache,
20effc67
TL
474 c,
475 [c, type, addr, laddr, len](auto &btree) {
476 if (type == extent_types_t::LADDR_INTERNAL) {
477 return btree.get_internal_if_live(c, addr, laddr, len);
478 } else {
1e59de90
TL
479 assert(type == extent_types_t::LADDR_LEAF ||
480 type == extent_types_t::DINK_LADDR_LEAF);
20effc67
TL
481 return btree.get_leaf_if_live(c, addr, laddr, len);
482 }
f67539c2 483 });
f67539c2
TL
484}
485
20effc67 486void BtreeLBAManager::register_metrics()
f67539c2 487{
1e59de90
TL
488 LOG_PREFIX(BtreeLBAManager::register_metrics);
489 DEBUG("start");
490 stats = {};
20effc67
TL
491 namespace sm = seastar::metrics;
492 metrics.add_group(
493 "LBA",
494 {
495 sm::make_counter(
496 "alloc_extents",
1e59de90 497 stats.num_alloc_extents,
20effc67
TL
498 sm::description("total number of lba alloc_extent operations")
499 ),
500 sm::make_counter(
501 "alloc_extents_iter_nexts",
1e59de90 502 stats.num_alloc_extents_iter_nexts,
20effc67
TL
503 sm::description("total number of iterator next operations during extent allocation")
504 ),
505 }
506 );
f67539c2
TL
507}
508
1e59de90
TL
509BtreeLBAManager::update_refcount_ret
510BtreeLBAManager::update_refcount(
f67539c2
TL
511 Transaction &t,
512 laddr_t addr,
513 int delta)
514{
20effc67 515 LOG_PREFIX(BtreeLBAManager::update_refcount);
1e59de90
TL
516 TRACET("laddr={}, delta={}", t, addr, delta);
517 return _update_mapping(
f67539c2
TL
518 t,
519 addr,
520 [delta](const lba_map_val_t &in) {
521 lba_map_val_t out = in;
522 ceph_assert((int)out.refcount + delta >= 0);
523 out.refcount += delta;
524 return out;
1e59de90
TL
525 },
526 nullptr
527 ).si_then([&t, addr, delta, FNAME](auto result) {
528 DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
529 return ref_update_result_t{
530 result.refcount,
531 result.paddr,
532 result.len
533 };
534 });
f67539c2
TL
535}
536
1e59de90
TL
537BtreeLBAManager::_update_mapping_ret
538BtreeLBAManager::_update_mapping(
f67539c2
TL
539 Transaction &t,
540 laddr_t addr,
1e59de90
TL
541 update_func_t &&f,
542 LogicalCachedExtent* nextent)
f67539c2 543{
20effc67 544 auto c = get_context(t);
1e59de90
TL
545 return with_btree_ret<LBABtree, lba_map_val_t>(
546 cache,
20effc67 547 c,
1e59de90 548 [f=std::move(f), c, addr, nextent](auto &btree) mutable {
20effc67
TL
549 return btree.lower_bound(
550 c, addr
1e59de90
TL
551 ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
552 -> _update_mapping_ret {
20effc67 553 if (iter.is_end() || iter.get_key() != addr) {
1e59de90
TL
554 LOG_PREFIX(BtreeLBAManager::_update_mapping);
555 ERRORT("laddr={} doesn't exist", c.trans, addr);
20effc67
TL
556 return crimson::ct_error::enoent::make();
557 }
558
559 auto ret = f(iter.get_val());
560 if (ret.refcount == 0) {
561 return btree.remove(
562 c,
563 iter
564 ).si_then([ret] {
565 return ret;
566 });
567 } else {
568 return btree.update(
569 c,
570 iter,
1e59de90
TL
571 ret,
572 nextent
20effc67
TL
573 ).si_then([ret](auto) {
574 return ret;
575 });
576 }
577 });
578 });
f67539c2
TL
579}
580
f67539c2 581}