]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc
buildsys: switch source download to quincy
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / lba_btree_node_impl.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6
7 #include <memory>
8 #include <string.h>
9
10 #include "include/buffer.h"
11 #include "include/byteorder.h"
12
13 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
14
15 namespace {
16 seastar::logger& logger() {
17 return crimson::get_logger(ceph_subsys_filestore);
18 }
19 }
20
21 namespace crimson::os::seastore::lba_manager::btree {
22
23 std::ostream &LBAInternalNode::print_detail(std::ostream &out) const
24 {
25 return out << ", size=" << get_size()
26 << ", meta=" << get_meta();
27 }
28
29 LBAInternalNode::lookup_ret LBAInternalNode::lookup(
30 op_context_t c,
31 laddr_t addr,
32 depth_t depth)
33 {
34 auto meta = get_meta();
35 if (depth == get_meta().depth) {
36 return lookup_ret(
37 lookup_ertr::ready_future_marker{},
38 this);
39 }
40 assert(meta.begin <= addr);
41 assert(meta.end > addr);
42 auto iter = lower_bound(addr);
43 return get_lba_btree_extent(
44 c,
45 meta.depth - 1,
46 iter->get_val(),
47 get_paddr()).safe_then([c, addr, depth](auto child) {
48 return child->lookup(c, addr, depth);
49 }).finally([ref=LBANodeRef(this)] {});
50 }
51
52 LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range(
53 op_context_t c,
54 laddr_t addr,
55 extent_len_t len)
56 {
57 auto [begin, end] = bound(addr, addr + len);
58 auto result_up = std::make_unique<lba_pin_list_t>();
59 auto &result = *result_up;
60 return crimson::do_for_each(
61 std::move(begin),
62 std::move(end),
63 [this, c, &result, addr, len](const auto &val) mutable {
64 return get_lba_btree_extent(
65 c,
66 get_meta().depth - 1,
67 val.get_val(),
68 get_paddr()).safe_then(
69 [c, &result, addr, len](auto extent) mutable {
70 return extent->lookup_range(
71 c,
72 addr,
73 len).safe_then(
74 [&result](auto pin_list) mutable {
75 result.splice(result.end(), pin_list,
76 pin_list.begin(), pin_list.end());
77 });
78 });
79 }).safe_then([result=std::move(result_up), ref=LBANodeRef(this)] {
80 return lookup_range_ertr::make_ready_future<lba_pin_list_t>(
81 std::move(*result));
82 });
83 }
84
85 LBAInternalNode::insert_ret LBAInternalNode::insert(
86 op_context_t c,
87 laddr_t laddr,
88 lba_map_val_t val)
89 {
90 auto insertion_pt = get_containing_child(laddr);
91 return get_lba_btree_extent(
92 c,
93 get_meta().depth - 1,
94 insertion_pt->get_val(),
95 get_paddr()).safe_then(
96 [this, insertion_pt, c, laddr, val=std::move(val)](
97 auto extent) mutable {
98 return extent->at_max_capacity() ?
99 split_entry(c, laddr, insertion_pt, extent) :
100 insert_ertr::make_ready_future<LBANodeRef>(std::move(extent));
101 }).safe_then([c, laddr, val=std::move(val)](
102 LBANodeRef extent) mutable {
103 return extent->insert(c, laddr, val);
104 });
105 }
106
107 LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping(
108 op_context_t c,
109 laddr_t laddr,
110 mutate_func_t &&f)
111 {
112 return mutate_mapping_internal(c, laddr, true, std::move(f));
113 }
114
115 LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping_internal(
116 op_context_t c,
117 laddr_t laddr,
118 bool is_root,
119 mutate_func_t &&f)
120 {
121 auto mutation_pt = get_containing_child(laddr);
122 if (mutation_pt == end()) {
123 assert(0 == "impossible");
124 return crimson::ct_error::enoent::make();
125 }
126 return get_lba_btree_extent(
127 c,
128 get_meta().depth - 1,
129 mutation_pt->get_val(),
130 get_paddr()
131 ).safe_then([=](LBANodeRef extent) {
132 if (extent->at_min_capacity() && get_size() > 1) {
133 return merge_entry(
134 c,
135 laddr,
136 mutation_pt,
137 extent,
138 is_root);
139 } else {
140 return merge_ertr::make_ready_future<LBANodeRef>(
141 std::move(extent));
142 }
143 }).safe_then([c, laddr, f=std::move(f)](LBANodeRef extent) mutable {
144 return extent->mutate_mapping_internal(c, laddr, false, std::move(f));
145 });
146 }
147
148 LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address(
149 op_context_t c,
150 depth_t depth,
151 laddr_t laddr,
152 paddr_t paddr)
153 {
154 if (get_meta().depth == (depth + 1)) {
155 if (!is_pending()) {
156 return c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>(
157 )->mutate_internal_address(
158 c,
159 depth,
160 laddr,
161 paddr);
162 }
163 auto iter = get_containing_child(laddr);
164 if (iter->get_key() != laddr) {
165 return crimson::ct_error::enoent::make();
166 }
167
168 auto old_paddr = iter->get_val();
169
170 journal_update(
171 iter,
172 maybe_generate_relative(paddr),
173 maybe_get_delta_buffer());
174
175 return mutate_internal_address_ret(
176 mutate_internal_address_ertr::ready_future_marker{},
177 old_paddr
178 );
179 } else {
180 auto iter = get_containing_child(laddr);
181 return get_lba_btree_extent(
182 c,
183 get_meta().depth - 1,
184 iter->get_val(),
185 get_paddr()
186 ).safe_then([=](auto node) {
187 return node->mutate_internal_address(
188 c,
189 depth,
190 laddr,
191 paddr);
192 });
193 }
194 }
195
196 LBAInternalNode::find_hole_ret LBAInternalNode::find_hole(
197 op_context_t c,
198 laddr_t min_addr,
199 laddr_t max_addr,
200 extent_len_t len)
201 {
202 logger().debug(
203 "LBAInternalNode::find_hole min={}, max={}, len={}, *this={}",
204 min_addr, max_addr, len, *this);
205 auto [begin, end] = bound(min_addr, max_addr);
206 return seastar::repeat_until_value(
207 [i=begin, e=end, c, min_addr, len, this]() mutable {
208 if (i == e) {
209 return seastar::make_ready_future<std::optional<laddr_t>>(
210 std::make_optional<laddr_t>(L_ADDR_NULL));
211 }
212 return get_lba_btree_extent(c,
213 get_meta().depth - 1,
214 i->get_val(),
215 get_paddr()).safe_then(
216 [c, min_addr, len, i](auto extent) mutable {
217 auto lb = std::max(min_addr, i->get_key());
218 auto ub = i->get_next_key_or_max();
219 logger().debug("LBAInternalNode::find_hole extent {} lb {} ub {}",
220 *extent, lb, ub);
221 return extent->find_hole(c, lb, ub, len);
222 }).safe_then([&i](auto addr) mutable -> std::optional<laddr_t> {
223 if (addr == L_ADDR_NULL) {
224 ++i;
225 return {};
226 } else {
227 return addr;
228 }
229 },
230 // TODO: GCC enters a dead loop if crimson::do_until() is used
231 // or erroratorized future is returned
232 crimson::ct_error::assert_all{ "fix me - APIv6" });
233 });
234 }
235
236 LBAInternalNode::scan_mappings_ret LBAInternalNode::scan_mappings(
237 op_context_t c,
238 laddr_t begin,
239 laddr_t end,
240 scan_mappings_func_t &f)
241 {
242 auto [biter, eiter] = bound(begin, end);
243 return crimson::do_for_each(
244 std::move(biter),
245 std::move(eiter),
246 [=, &f](auto &viter) {
247 return get_lba_btree_extent(
248 c,
249 get_meta().depth - 1,
250 viter->get_val(),
251 get_paddr()).safe_then([=, &f](auto child) {
252 return child->scan_mappings(c, begin, end, f);
253 });
254 }).safe_then([ref=LBANodeRef(this)]{});
255 }
256
257 LBAInternalNode::scan_mapped_space_ret LBAInternalNode::scan_mapped_space(
258 op_context_t c,
259 scan_mapped_space_func_t &f)
260 {
261 f(get_paddr(), get_length());
262 return crimson::do_for_each(
263 begin(), end(),
264 [=, &f](auto &viter) {
265 return get_lba_btree_extent(
266 c,
267 get_meta().depth - 1,
268 viter->get_val(),
269 get_paddr()).safe_then([=, &f](auto child) {
270 return child->scan_mapped_space(c, f);
271 });
272 }).safe_then([ref=LBANodeRef(this)]{});
273 }
274
275
276 void LBAInternalNode::resolve_relative_addrs(paddr_t base)
277 {
278 for (auto i: *this) {
279 if (i->get_val().is_relative()) {
280 auto updated = base.add_relative(i->get_val());
281 logger().debug(
282 "LBAInternalNode::resolve_relative_addrs {} -> {}",
283 i->get_val(),
284 updated);
285 i->set_val(updated);
286 }
287 }
288 }
289
290
291 LBAInternalNode::split_ret
292 LBAInternalNode::split_entry(
293 op_context_t c,
294 laddr_t addr,
295 internal_iterator_t iter, LBANodeRef entry)
296 {
297 if (!is_pending()) {
298 auto mut = c.cache.duplicate_for_write(
299 c.trans, this)->cast<LBAInternalNode>();
300 auto mut_iter = mut->iter_idx(iter->get_offset());
301 return mut->split_entry(c, addr, mut_iter, entry);
302 }
303
304 ceph_assert(!at_max_capacity());
305 auto [left, right, pivot] = entry->make_split_children(c);
306
307 journal_update(
308 iter,
309 maybe_generate_relative(left->get_paddr()),
310 maybe_get_delta_buffer());
311 journal_insert(
312 iter + 1,
313 pivot,
314 maybe_generate_relative(right->get_paddr()),
315 maybe_get_delta_buffer());
316
317 c.cache.retire_extent(c.trans, entry);
318
319 logger().debug(
320 "LBAInternalNode::split_entry *this {} entry {} into left {} right {}",
321 *this,
322 *entry,
323 *left,
324 *right);
325
326 return split_ertr::make_ready_future<LBANodeRef>(
327 pivot > addr ? left : right
328 );
329 }
330
331 LBAInternalNode::merge_ret
332 LBAInternalNode::merge_entry(
333 op_context_t c,
334 laddr_t addr,
335 internal_iterator_t iter,
336 LBANodeRef entry,
337 bool is_root)
338 {
339 if (!is_pending()) {
340 auto mut = c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>();
341 auto mut_iter = mut->iter_idx(iter->get_offset());
342 return mut->merge_entry(c, addr, mut_iter, entry, is_root);
343 }
344
345 logger().debug(
346 "LBAInternalNode: merge_entry: {}, {}",
347 *this,
348 *entry);
349 auto donor_is_left = (iter + 1) == end();
350 auto donor_iter = donor_is_left ? iter - 1 : iter + 1;
351 return get_lba_btree_extent(
352 c,
353 get_meta().depth - 1,
354 donor_iter->get_val(),
355 get_paddr()
356 ).safe_then([=](auto donor) mutable {
357 auto [l, r] = donor_is_left ?
358 std::make_pair(donor, entry) : std::make_pair(entry, donor);
359 auto [liter, riter] = donor_is_left ?
360 std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
361 if (donor->at_min_capacity()) {
362 auto replacement = l->make_full_merge(
363 c,
364 r);
365
366 journal_update(
367 liter,
368 maybe_generate_relative(replacement->get_paddr()),
369 maybe_get_delta_buffer());
370 journal_remove(riter, maybe_get_delta_buffer());
371
372 c.cache.retire_extent(c.trans, l);
373 c.cache.retire_extent(c.trans, r);
374
375 if (is_root && get_size() == 1) {
376 return c.cache.get_root(c.trans).safe_then([=](RootBlockRef croot) {
377 {
378 auto mut_croot = c.cache.duplicate_for_write(c.trans, croot);
379 croot = mut_croot->cast<RootBlock>();
380 }
381 croot->root.lba_root_addr = begin()->get_val();
382 logger().debug(
383 "LBAInternalNode::merge_entry: collapsing root {} to addr {}",
384 *this,
385 begin()->get_val());
386 croot->root.lba_depth = get_meta().depth - 1;
387 c.cache.retire_extent(c.trans, this);
388 return merge_ertr::make_ready_future<LBANodeRef>(replacement);
389 });
390 } else {
391 return merge_ertr::make_ready_future<LBANodeRef>(replacement);
392 }
393 } else {
394 logger().debug(
395 "LBAInternalEntry::merge_entry balanced l {} r {}",
396 *l,
397 *r);
398 auto [replacement_l, replacement_r, pivot] =
399 l->make_balanced(
400 c,
401 r,
402 !donor_is_left);
403
404 journal_update(
405 liter,
406 maybe_generate_relative(replacement_l->get_paddr()),
407 maybe_get_delta_buffer());
408 journal_replace(
409 riter,
410 pivot,
411 maybe_generate_relative(replacement_r->get_paddr()),
412 maybe_get_delta_buffer());
413
414 c.cache.retire_extent(c.trans, l);
415 c.cache.retire_extent(c.trans, r);
416 return merge_ertr::make_ready_future<LBANodeRef>(
417 addr >= pivot ? replacement_r : replacement_l
418 );
419 }
420 });
421 }
422
423
424 LBAInternalNode::internal_iterator_t
425 LBAInternalNode::get_containing_child(laddr_t laddr)
426 {
427 // TODO: binary search
428 for (auto i = begin(); i != end(); ++i) {
429 if (i.contains(laddr))
430 return i;
431 }
432 ceph_assert(0 == "invalid");
433 return end();
434 }
435
436 std::ostream &LBALeafNode::print_detail(std::ostream &out) const
437 {
438 return out << ", size=" << get_size()
439 << ", meta=" << get_meta();
440 }
441
442 LBALeafNode::lookup_range_ret LBALeafNode::lookup_range(
443 op_context_t c,
444 laddr_t addr,
445 extent_len_t len)
446 {
447 logger().debug(
448 "LBALeafNode::lookup_range {}~{}",
449 addr,
450 len);
451 auto ret = lba_pin_list_t();
452 auto [i, end] = get_leaf_entries(addr, len);
453 for (; i != end; ++i) {
454 auto val = i->get_val();
455 auto begin = i->get_key();
456 ret.emplace_back(
457 std::make_unique<BtreeLBAPin>(
458 this,
459 val.paddr.maybe_relative_to(get_paddr()),
460 lba_node_meta_t{ begin, begin + val.len, 0}));
461 }
462 return lookup_range_ertr::make_ready_future<lba_pin_list_t>(
463 std::move(ret));
464 }
465
466 LBALeafNode::insert_ret LBALeafNode::insert(
467 op_context_t c,
468 laddr_t laddr,
469 lba_map_val_t val)
470 {
471 ceph_assert(!at_max_capacity());
472
473 if (!is_pending()) {
474 return c.cache.duplicate_for_write(c.trans, this
475 )->cast<LBALeafNode>()->insert(c, laddr, val);
476 }
477
478 val.paddr = maybe_generate_relative(val.paddr);
479 logger().debug(
480 "LBALeafNode::insert: inserting {}~{} -> {}",
481 laddr,
482 val.len,
483 val.paddr);
484
485 auto insert_pt = lower_bound(laddr);
486 journal_insert(insert_pt, laddr, val, maybe_get_delta_buffer());
487
488 logger().debug(
489 "LBALeafNode::insert: inserted {}~{} -> {}",
490 insert_pt.get_key(),
491 insert_pt.get_val().len,
492 insert_pt.get_val().paddr);
493 auto begin = insert_pt.get_key();
494 return insert_ret(
495 insert_ertr::ready_future_marker{},
496 std::make_unique<BtreeLBAPin>(
497 this,
498 val.paddr.maybe_relative_to(get_paddr()),
499 lba_node_meta_t{ begin, begin + val.len, 0}));
500 }
501
502 LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping(
503 op_context_t c,
504 laddr_t laddr,
505 mutate_func_t &&f)
506 {
507 return mutate_mapping_internal(c, laddr, true, std::move(f));
508 }
509
510 LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping_internal(
511 op_context_t c,
512 laddr_t laddr,
513 bool is_root,
514 mutate_func_t &&f)
515 {
516 auto mutation_pt = find(laddr);
517 if (mutation_pt == end()) {
518 return crimson::ct_error::enoent::make();
519 }
520
521 if (!is_pending()) {
522 return c.cache.duplicate_for_write(c.trans, this)->cast<LBALeafNode>(
523 )->mutate_mapping_internal(
524 c,
525 laddr,
526 is_root,
527 std::move(f));
528 }
529
530 auto cur = mutation_pt.get_val();
531 auto mutated = f(cur);
532
533 mutated.paddr = maybe_generate_relative(mutated.paddr);
534
535 logger().debug(
536 "{}: mutate addr {}: {} -> {}",
537 __func__,
538 laddr,
539 cur.paddr,
540 mutated.paddr);
541
542 if (mutated.refcount > 0) {
543 journal_update(mutation_pt, mutated, maybe_get_delta_buffer());
544 return mutate_mapping_ret(
545 mutate_mapping_ertr::ready_future_marker{},
546 mutated);
547 } else {
548 journal_remove(mutation_pt, maybe_get_delta_buffer());
549 return mutate_mapping_ret(
550 mutate_mapping_ertr::ready_future_marker{},
551 mutated);
552 }
553 }
554
555 LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address(
556 op_context_t c,
557 depth_t depth,
558 laddr_t laddr,
559 paddr_t paddr)
560 {
561 ceph_assert(0 == "Impossible");
562 return mutate_internal_address_ret(
563 mutate_internal_address_ertr::ready_future_marker{},
564 paddr);
565 }
566
567 LBALeafNode::find_hole_ret LBALeafNode::find_hole(
568 op_context_t c,
569 laddr_t min,
570 laddr_t max,
571 extent_len_t len)
572 {
573 logger().debug(
574 "LBALeafNode::find_hole min={} max={}, len={}, *this={}",
575 min, max, len, *this);
576 auto [liter, uiter] = bound(min, max);
577 for (auto i = liter; i != uiter; ++i) {
578 auto ub = i->get_key();
579 if (min + len <= ub) {
580 return find_hole_ret(
581 find_hole_ertr::ready_future_marker{},
582 min);
583 } else {
584 min = i->get_key() + i->get_val().len;
585 }
586 }
587 if (min + len <= max) {
588 return find_hole_ret(
589 find_hole_ertr::ready_future_marker{},
590 min);
591 } else {
592 return find_hole_ret(
593 find_hole_ertr::ready_future_marker{},
594 L_ADDR_MAX);
595 }
596 }
597
598 LBALeafNode::scan_mappings_ret LBALeafNode::scan_mappings(
599 op_context_t c,
600 laddr_t begin,
601 laddr_t end,
602 scan_mappings_func_t &f)
603 {
604 auto [biter, eiter] = bound(begin, end);
605 for (auto i = biter; i != eiter; ++i) {
606 auto val = i->get_val();
607 f(i->get_key(), val.paddr, val.len);
608 }
609 return scan_mappings_ertr::now();
610 }
611
612 LBALeafNode::scan_mapped_space_ret LBALeafNode::scan_mapped_space(
613 op_context_t c,
614 scan_mapped_space_func_t &f)
615 {
616 f(get_paddr(), get_length());
617 for (auto i = begin(); i != end(); ++i) {
618 auto val = i->get_val();
619 f(val.paddr, val.len);
620 }
621 return scan_mappings_ertr::now();
622 }
623
624
625 void LBALeafNode::resolve_relative_addrs(paddr_t base)
626 {
627 for (auto i: *this) {
628 if (i->get_val().paddr.is_relative()) {
629 auto val = i->get_val();
630 val.paddr = base.add_relative(val.paddr);
631 logger().debug(
632 "LBALeafNode::resolve_relative_addrs {} -> {}",
633 i->get_val().paddr,
634 val.paddr);
635 i->set_val(val);
636 }
637 }
638 }
639
640 std::pair<LBALeafNode::internal_iterator_t, LBALeafNode::internal_iterator_t>
641 LBALeafNode::get_leaf_entries(laddr_t addr, extent_len_t len)
642 {
643 return bound(addr, addr + len);
644 }
645
646 Cache::get_extent_ertr::future<LBANodeRef> get_lba_btree_extent(
647 op_context_t c,
648 depth_t depth,
649 paddr_t offset,
650 paddr_t base)
651 {
652 offset = offset.maybe_relative_to(base);
653 ceph_assert(depth > 0);
654 if (depth > 1) {
655 logger().debug(
656 "get_lba_btree_extent: reading internal at offset {}, depth {}",
657 offset,
658 depth);
659 return c.cache.get_extent<LBAInternalNode>(
660 c.trans,
661 offset,
662 LBA_BLOCK_SIZE).safe_then([c](auto ret) {
663 auto meta = ret->get_meta();
664 if (ret->get_size()) {
665 ceph_assert(meta.begin <= ret->begin()->get_key());
666 ceph_assert(meta.end > (ret->end() - 1)->get_key());
667 }
668 if (!ret->is_pending() && !ret->pin.is_linked()) {
669 ret->pin.set_range(meta);
670 c.pins.add_pin(ret->pin);
671 }
672 return LBANodeRef(ret.detach(), /* add_ref = */ false);
673 });
674 } else {
675 logger().debug(
676 "get_lba_btree_extent: reading leaf at offset {}, depth {}",
677 offset,
678 depth);
679 return c.cache.get_extent<LBALeafNode>(
680 c.trans,
681 offset,
682 LBA_BLOCK_SIZE).safe_then([offset, c](auto ret) {
683 logger().debug(
684 "get_lba_btree_extent: read leaf at offset {} {}",
685 offset,
686 *ret);
687 auto meta = ret->get_meta();
688 if (ret->get_size()) {
689 ceph_assert(meta.begin <= ret->begin()->get_key());
690 ceph_assert(meta.end > (ret->end() - 1)->get_key());
691 }
692 if (!ret->is_pending() && !ret->pin.is_linked()) {
693 ret->pin.set_range(meta);
694 c.pins.add_pin(ret->pin);
695 }
696 return LBANodeRef(ret.detach(), /* add_ref = */ false);
697 });
698 }
699 }
700
701 }