]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6
7 #include <seastar/core/metrics.hh>
8
9 #include "include/buffer.h"
10 #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
11 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
12 #include "crimson/os/seastore/logging.h"
13
14 SET_SUBSYS(seastore_lba);
15 /*
16 * levels:
17 * - INFO: mkfs
18 * - DEBUG: modification operations
19 * - TRACE: read operations, DEBUG details
20 */
21
22 namespace crimson::os::seastore {
23
24 template <typename T>
25 Transaction::tree_stats_t& get_tree_stats(Transaction &t)
26 {
27 return t.get_lba_tree_stats();
28 }
29
30 template Transaction::tree_stats_t&
31 get_tree_stats<
32 crimson::os::seastore::lba_manager::btree::LBABtree>(
33 Transaction &t);
34
35 template <typename T>
36 phy_tree_root_t& get_phy_tree_root(root_t &r)
37 {
38 return r.lba_root;
39 }
40
41 template phy_tree_root_t&
42 get_phy_tree_root<
43 crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r);
44
45 template <>
46 const get_phy_tree_root_node_ret get_phy_tree_root_node<
47 crimson::os::seastore::lba_manager::btree::LBABtree>(
48 const RootBlockRef &root_block, op_context_t<laddr_t> c)
49 {
50 auto lba_root = root_block->lba_root_node;
51 if (lba_root) {
52 ceph_assert(lba_root->is_initial_pending()
53 == root_block->is_pending());
54 return {true,
55 trans_intr::make_interruptible(
56 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
57 } else if (root_block->is_pending()) {
58 auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance());
59 lba_root = prior.lba_root_node;
60 if (lba_root) {
61 return {true,
62 trans_intr::make_interruptible(
63 c.cache.get_extent_viewable_by_trans(c.trans, lba_root))};
64 } else {
65 return {false,
66 trans_intr::make_interruptible(
67 seastar::make_ready_future<
68 CachedExtentRef>(CachedExtentRef()))};
69 }
70 } else {
71 return {false,
72 trans_intr::make_interruptible(
73 seastar::make_ready_future<
74 CachedExtentRef>(CachedExtentRef()))};
75 }
76 }
77
78 template <typename ROOT>
79 void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* lba_root) {
80 root_block->lba_root_node = lba_root;
81 ceph_assert(lba_root != nullptr);
82 lba_root->root_block = root_block;
83 }
84
85 template void link_phy_tree_root_node(
86 RootBlockRef &root_block, lba_manager::btree::LBAInternalNode* lba_root);
87 template void link_phy_tree_root_node(
88 RootBlockRef &root_block, lba_manager::btree::LBALeafNode* lba_root);
89 template void link_phy_tree_root_node(
90 RootBlockRef &root_block, lba_manager::btree::LBANode* lba_root);
91
92 template <>
93 void unlink_phy_tree_root_node<laddr_t>(RootBlockRef &root_block) {
94 root_block->lba_root_node = nullptr;
95 }
96
97 }
98
99 namespace crimson::os::seastore::lba_manager::btree {
100
101 BtreeLBAManager::mkfs_ret
102 BtreeLBAManager::mkfs(
103 Transaction &t)
104 {
105 LOG_PREFIX(BtreeLBAManager::mkfs);
106 INFOT("start", t);
107 return cache.get_root(t).si_then([this, &t](auto croot) {
108 assert(croot->is_mutation_pending());
109 croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
110 return mkfs_iertr::now();
111 }).handle_error_interruptible(
112 mkfs_iertr::pass_further{},
113 crimson::ct_error::assert_all{
114 "Invalid error in BtreeLBAManager::mkfs"
115 }
116 );
117 }
118
119 BtreeLBAManager::get_mappings_ret
120 BtreeLBAManager::get_mappings(
121 Transaction &t,
122 laddr_t offset, extent_len_t length)
123 {
124 LOG_PREFIX(BtreeLBAManager::get_mappings);
125 TRACET("{}~{}", t, offset, length);
126 auto c = get_context(t);
127 return with_btree_state<LBABtree, lba_pin_list_t>(
128 cache,
129 c,
130 [c, offset, length, FNAME](auto &btree, auto &ret) {
131 return LBABtree::iterate_repeat(
132 c,
133 btree.upper_bound_right(c, offset),
134 [&ret, offset, length, c, FNAME](auto &pos) {
135 if (pos.is_end() || pos.get_key() >= (offset + length)) {
136 TRACET("{}~{} done with {} results",
137 c.trans, offset, length, ret.size());
138 return typename LBABtree::iterate_repeat_ret_inner(
139 interruptible::ready_future_marker{},
140 seastar::stop_iteration::yes);
141 }
142 TRACET("{}~{} got {}, {}, repeat ...",
143 c.trans, offset, length, pos.get_key(), pos.get_val());
144 ceph_assert((pos.get_key() + pos.get_val().len) > offset);
145 ret.push_back(pos.get_pin(c));
146 return typename LBABtree::iterate_repeat_ret_inner(
147 interruptible::ready_future_marker{},
148 seastar::stop_iteration::no);
149 });
150 });
151 }
152
153 BtreeLBAManager::get_mappings_ret
154 BtreeLBAManager::get_mappings(
155 Transaction &t,
156 laddr_list_t &&list)
157 {
158 LOG_PREFIX(BtreeLBAManager::get_mappings);
159 TRACET("{}", t, list);
160 auto l = std::make_unique<laddr_list_t>(std::move(list));
161 auto retptr = std::make_unique<lba_pin_list_t>();
162 auto &ret = *retptr;
163 return trans_intr::do_for_each(
164 l->begin(),
165 l->end(),
166 [this, &t, &ret](const auto &p) {
167 return this->get_mappings(t, p.first, p.second).si_then(
168 [&ret](auto res) {
169 ret.splice(ret.end(), res, res.begin(), res.end());
170 return get_mappings_iertr::now();
171 });
172 }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
173 return std::move(*retptr);
174 });
175 }
176
177 BtreeLBAManager::get_mapping_ret
178 BtreeLBAManager::get_mapping(
179 Transaction &t,
180 laddr_t offset)
181 {
182 LOG_PREFIX(BtreeLBAManager::get_mapping);
183 TRACET("{}", t, offset);
184 auto c = get_context(t);
185 return with_btree_ret<LBABtree, LBAMappingRef>(
186 cache,
187 c,
188 [FNAME, c, offset](auto &btree) {
189 return btree.lower_bound(
190 c, offset
191 ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret {
192 if (iter.is_end() || iter.get_key() != offset) {
193 ERRORT("laddr={} doesn't exist", c.trans, offset);
194 return crimson::ct_error::enoent::make();
195 } else {
196 TRACET("{} got {}, {}",
197 c.trans, offset, iter.get_key(), iter.get_val());
198 auto e = iter.get_pin(c);
199 return get_mapping_ret(
200 interruptible::ready_future_marker{},
201 std::move(e));
202 }
203 });
204 });
205 }
206
207 BtreeLBAManager::alloc_extent_ret
208 BtreeLBAManager::alloc_extent(
209 Transaction &t,
210 laddr_t hint,
211 extent_len_t len,
212 paddr_t addr,
213 LogicalCachedExtent* nextent)
214 {
215 struct state_t {
216 laddr_t last_end;
217
218 std::optional<typename LBABtree::iterator> insert_iter;
219 std::optional<typename LBABtree::iterator> ret;
220
221 state_t(laddr_t hint) : last_end(hint) {}
222 };
223
224 LOG_PREFIX(BtreeLBAManager::alloc_extent);
225 TRACET("{}~{}, hint={}", t, addr, len, hint);
226 auto c = get_context(t);
227 ++stats.num_alloc_extents;
228 auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
229 return crimson::os::seastore::with_btree_state<LBABtree, state_t>(
230 cache,
231 c,
232 hint,
233 [this, FNAME, c, hint, len, addr, lookup_attempts,
234 &t, nextent](auto &btree, auto &state) {
235 return LBABtree::iterate_repeat(
236 c,
237 btree.upper_bound_right(c, hint),
238 [this, &state, len, addr, &t, hint, FNAME, lookup_attempts](auto &pos) {
239 ++stats.num_alloc_extents_iter_nexts;
240 if (pos.is_end()) {
241 DEBUGT("{}~{}, hint={}, state: end, done with {} attempts, insert at {}",
242 t, addr, len, hint,
243 stats.num_alloc_extents_iter_nexts - lookup_attempts,
244 state.last_end);
245 state.insert_iter = pos;
246 return typename LBABtree::iterate_repeat_ret_inner(
247 interruptible::ready_future_marker{},
248 seastar::stop_iteration::yes);
249 } else if (pos.get_key() >= (state.last_end + len)) {
250 DEBUGT("{}~{}, hint={}, state: {}~{}, done with {} attempts, insert at {} -- {}",
251 t, addr, len, hint,
252 pos.get_key(), pos.get_val().len,
253 stats.num_alloc_extents_iter_nexts - lookup_attempts,
254 state.last_end,
255 pos.get_val());
256 state.insert_iter = pos;
257 return typename LBABtree::iterate_repeat_ret_inner(
258 interruptible::ready_future_marker{},
259 seastar::stop_iteration::yes);
260 } else {
261 state.last_end = pos.get_key() + pos.get_val().len;
262 TRACET("{}~{}, hint={}, state: {}~{}, repeat ... -- {}",
263 t, addr, len, hint,
264 pos.get_key(), pos.get_val().len,
265 pos.get_val());
266 return typename LBABtree::iterate_repeat_ret_inner(
267 interruptible::ready_future_marker{},
268 seastar::stop_iteration::no);
269 }
270 }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] {
271 return btree.insert(
272 c,
273 *state.insert_iter,
274 state.last_end,
275 lba_map_val_t{len, addr, 1, 0},
276 nextent
277 ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) {
278 auto [iter, inserted] = std::move(p);
279 TRACET("{}~{}, hint={}, inserted at {}",
280 c.trans, addr, len, hint, state.last_end);
281 if (nextent) {
282 nextent->set_laddr(iter.get_key());
283 }
284 ceph_assert(inserted);
285 state.ret = iter;
286 });
287 });
288 }).si_then([c](auto &&state) {
289 return state.ret->get_pin(c);
290 });
291 }
292
293 static bool is_lba_node(const CachedExtent &e)
294 {
295 return is_lba_node(e.get_type());
296 }
297
298 BtreeLBAManager::base_iertr::template future<>
299 _init_cached_extent(
300 op_context_t<laddr_t> c,
301 const CachedExtentRef &e,
302 LBABtree &btree,
303 bool &ret)
304 {
305 if (e->is_logical()) {
306 auto logn = e->cast<LogicalCachedExtent>();
307 return btree.lower_bound(
308 c,
309 logn->get_laddr()
310 ).si_then([e, c, logn, &ret](auto iter) {
311 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
312 if (!iter.is_end() &&
313 iter.get_key() == logn->get_laddr() &&
314 iter.get_val().paddr == logn->get_paddr()) {
315 assert(!iter.get_leaf_node()->is_pending());
316 iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos());
317 logn->set_laddr(iter.get_pin(c)->get_key());
318 ceph_assert(iter.get_val().len == e->get_length());
319 DEBUGT("logical extent {} live", c.trans, *logn);
320 ret = true;
321 } else {
322 DEBUGT("logical extent {} not live", c.trans, *logn);
323 ret = false;
324 }
325 });
326 } else {
327 return btree.init_cached_extent(c, e
328 ).si_then([&ret](bool is_alive) {
329 ret = is_alive;
330 });
331 }
332 }
333
334 BtreeLBAManager::init_cached_extent_ret
335 BtreeLBAManager::init_cached_extent(
336 Transaction &t,
337 CachedExtentRef e)
338 {
339 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
340 TRACET("{}", t, *e);
341 return seastar::do_with(bool(), [this, e, &t](bool &ret) {
342 auto c = get_context(t);
343 return with_btree<LBABtree>(
344 cache, c,
345 [c, e, &ret](auto &btree) -> base_iertr::future<> {
346 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
347 DEBUGT("extent {}", c.trans, *e);
348 return _init_cached_extent(c, e, btree, ret);
349 }
350 ).si_then([&ret] { return ret; });
351 });
352 }
353
354 BtreeLBAManager::check_child_trackers_ret
355 BtreeLBAManager::check_child_trackers(
356 Transaction &t) {
357 auto c = get_context(t);
358 return with_btree<LBABtree>(
359 cache, c,
360 [c](auto &btree) {
361 return btree.check_child_trackers(c);
362 });
363 }
364
365 BtreeLBAManager::scan_mappings_ret
366 BtreeLBAManager::scan_mappings(
367 Transaction &t,
368 laddr_t begin,
369 laddr_t end,
370 scan_mappings_func_t &&f)
371 {
372 LOG_PREFIX(BtreeLBAManager::scan_mappings);
373 DEBUGT("begin: {}, end: {}", t, begin, end);
374
375 auto c = get_context(t);
376 return with_btree<LBABtree>(
377 cache,
378 c,
379 [c, f=std::move(f), begin, end](auto &btree) mutable {
380 return LBABtree::iterate_repeat(
381 c,
382 btree.upper_bound_right(c, begin),
383 [f=std::move(f), begin, end](auto &pos) {
384 if (pos.is_end() || pos.get_key() >= end) {
385 return typename LBABtree::iterate_repeat_ret_inner(
386 interruptible::ready_future_marker{},
387 seastar::stop_iteration::yes);
388 }
389 ceph_assert((pos.get_key() + pos.get_val().len) > begin);
390 f(pos.get_key(), pos.get_val().paddr, pos.get_val().len);
391 return typename LBABtree::iterate_repeat_ret_inner(
392 interruptible::ready_future_marker{},
393 seastar::stop_iteration::no);
394 });
395 });
396 }
397
398 BtreeLBAManager::rewrite_extent_ret
399 BtreeLBAManager::rewrite_extent(
400 Transaction &t,
401 CachedExtentRef extent)
402 {
403 LOG_PREFIX(BtreeLBAManager::rewrite_extent);
404 if (extent->has_been_invalidated()) {
405 ERRORT("extent has been invalidated -- {}", t, *extent);
406 ceph_abort();
407 }
408 assert(!extent->is_logical());
409
410 if (is_lba_node(*extent)) {
411 DEBUGT("rewriting lba extent -- {}", t, *extent);
412 auto c = get_context(t);
413 return with_btree<LBABtree>(
414 cache,
415 c,
416 [c, extent](auto &btree) mutable {
417 return btree.rewrite_extent(c, extent);
418 });
419 } else {
420 DEBUGT("skip non lba extent -- {}", t, *extent);
421 return rewrite_extent_iertr::now();
422 }
423 }
424
425 BtreeLBAManager::update_mapping_ret
426 BtreeLBAManager::update_mapping(
427 Transaction& t,
428 laddr_t laddr,
429 paddr_t prev_addr,
430 paddr_t addr,
431 LogicalCachedExtent *nextent)
432 {
433 LOG_PREFIX(BtreeLBAManager::update_mapping);
434 TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr);
435 return _update_mapping(
436 t,
437 laddr,
438 [prev_addr, addr](
439 const lba_map_val_t &in) {
440 assert(!addr.is_null());
441 lba_map_val_t ret = in;
442 ceph_assert(in.paddr == prev_addr);
443 ret.paddr = addr;
444 return ret;
445 },
446 nextent
447 ).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
448 DEBUGT("laddr={}, paddr {} => {} done -- {}",
449 t, laddr, prev_addr, addr, result);
450 },
451 update_mapping_iertr::pass_further{},
452 /* ENOENT in particular should be impossible */
453 crimson::ct_error::assert_all{
454 "Invalid error in BtreeLBAManager::update_mapping"
455 }
456 );
457 }
458
459 BtreeLBAManager::get_physical_extent_if_live_ret
460 BtreeLBAManager::get_physical_extent_if_live(
461 Transaction &t,
462 extent_types_t type,
463 paddr_t addr,
464 laddr_t laddr,
465 extent_len_t len)
466 {
467 LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live);
468 DEBUGT("{}, laddr={}, paddr={}, length={}",
469 t, type, laddr, addr, len);
470 ceph_assert(is_lba_node(type));
471 auto c = get_context(t);
472 return with_btree_ret<LBABtree, CachedExtentRef>(
473 cache,
474 c,
475 [c, type, addr, laddr, len](auto &btree) {
476 if (type == extent_types_t::LADDR_INTERNAL) {
477 return btree.get_internal_if_live(c, addr, laddr, len);
478 } else {
479 assert(type == extent_types_t::LADDR_LEAF ||
480 type == extent_types_t::DINK_LADDR_LEAF);
481 return btree.get_leaf_if_live(c, addr, laddr, len);
482 }
483 });
484 }
485
486 void BtreeLBAManager::register_metrics()
487 {
488 LOG_PREFIX(BtreeLBAManager::register_metrics);
489 DEBUG("start");
490 stats = {};
491 namespace sm = seastar::metrics;
492 metrics.add_group(
493 "LBA",
494 {
495 sm::make_counter(
496 "alloc_extents",
497 stats.num_alloc_extents,
498 sm::description("total number of lba alloc_extent operations")
499 ),
500 sm::make_counter(
501 "alloc_extents_iter_nexts",
502 stats.num_alloc_extents_iter_nexts,
503 sm::description("total number of iterator next operations during extent allocation")
504 ),
505 }
506 );
507 }
508
509 BtreeLBAManager::update_refcount_ret
510 BtreeLBAManager::update_refcount(
511 Transaction &t,
512 laddr_t addr,
513 int delta)
514 {
515 LOG_PREFIX(BtreeLBAManager::update_refcount);
516 TRACET("laddr={}, delta={}", t, addr, delta);
517 return _update_mapping(
518 t,
519 addr,
520 [delta](const lba_map_val_t &in) {
521 lba_map_val_t out = in;
522 ceph_assert((int)out.refcount + delta >= 0);
523 out.refcount += delta;
524 return out;
525 },
526 nullptr
527 ).si_then([&t, addr, delta, FNAME](auto result) {
528 DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
529 return ref_update_result_t{
530 result.refcount,
531 result.paddr,
532 result.len
533 };
534 });
535 }
536
537 BtreeLBAManager::_update_mapping_ret
538 BtreeLBAManager::_update_mapping(
539 Transaction &t,
540 laddr_t addr,
541 update_func_t &&f,
542 LogicalCachedExtent* nextent)
543 {
544 auto c = get_context(t);
545 return with_btree_ret<LBABtree, lba_map_val_t>(
546 cache,
547 c,
548 [f=std::move(f), c, addr, nextent](auto &btree) mutable {
549 return btree.lower_bound(
550 c, addr
551 ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
552 -> _update_mapping_ret {
553 if (iter.is_end() || iter.get_key() != addr) {
554 LOG_PREFIX(BtreeLBAManager::_update_mapping);
555 ERRORT("laddr={} doesn't exist", c.trans, addr);
556 return crimson::ct_error::enoent::make();
557 }
558
559 auto ret = f(iter.get_val());
560 if (ret.refcount == 0) {
561 return btree.remove(
562 c,
563 iter
564 ).si_then([ret] {
565 return ret;
566 });
567 } else {
568 return btree.update(
569 c,
570 iter,
571 ret,
572 nextent
573 ).si_then([ret](auto) {
574 return ret;
575 });
576 }
577 });
578 });
579 }
580
581 }