]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <sys/mman.h>
5#include <string.h>
6
7#include "crimson/common/log.h"
8
9#include "include/buffer.h"
10#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
11#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
12
13
14namespace {
15 seastar::logger& logger() {
16 return crimson::get_logger(ceph_subsys_filestore);
17 }
18}
19
20namespace crimson::os::seastore::lba_manager::btree {
21
22BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
23 Transaction &t)
24{
25 logger().debug("BtreeLBAManager::mkfs");
26 return cache.get_root(t).safe_then([this, &t](auto croot) {
27 auto root_leaf = cache.alloc_new_extent<LBALeafNode>(
28 t,
29 LBA_BLOCK_SIZE);
30 root_leaf->set_size(0);
31 lba_node_meta_t meta{0, L_ADDR_MAX, 1};
32 root_leaf->set_meta(meta);
33 root_leaf->pin.set_range(meta);
34 croot->get_root() =
35 root_t{
36 1,
37 0,
38 root_leaf->get_paddr(),
39 make_record_relative_paddr(0),
40 L_ADDR_NULL};
41 return mkfs_ertr::now();
42 });
43}
44
45BtreeLBAManager::get_root_ret
46BtreeLBAManager::get_root(Transaction &t)
47{
48 return cache.get_root(t).safe_then([this, &t](auto croot) {
49 logger().debug(
50 "BtreeLBAManager::get_root: reading root at {} depth {}",
51 paddr_t{croot->get_root().lba_root_addr},
52 unsigned(croot->get_root().lba_depth));
53 return get_lba_btree_extent(
54 get_context(t),
55 croot->get_root().lba_depth,
56 croot->get_root().lba_root_addr,
57 paddr_t());
58 });
59}
60
61BtreeLBAManager::get_mapping_ret
62BtreeLBAManager::get_mapping(
63 Transaction &t,
64 laddr_t offset, extent_len_t length)
65{
66 logger().debug("BtreeLBAManager::get_mapping: {}, {}", offset, length);
67 return get_root(
68 t).safe_then([this, &t, offset, length](auto extent) {
69 return extent->lookup_range(
70 get_context(t),
71 offset, length
72 ).safe_then([extent](auto ret) { return ret; });
73 }).safe_then([](auto &&e) {
74 logger().debug("BtreeLBAManager::get_mapping: got mapping {}", e);
75 return get_mapping_ret(
76 get_mapping_ertr::ready_future_marker{},
77 std::move(e));
78 });
79}
80
81
82BtreeLBAManager::get_mappings_ret
83BtreeLBAManager::get_mappings(
84 Transaction &t,
85 laddr_list_t &&list)
86{
87 logger().debug("BtreeLBAManager::get_mappings: {}", list);
88 auto l = std::make_unique<laddr_list_t>(std::move(list));
89 auto retptr = std::make_unique<lba_pin_list_t>();
90 auto &ret = *retptr;
91 return crimson::do_for_each(
92 l->begin(),
93 l->end(),
94 [this, &t, &ret](const auto &p) {
95 return get_mapping(t, p.first, p.second).safe_then(
96 [&ret](auto res) {
97 ret.splice(ret.end(), res, res.begin(), res.end());
98 });
99 }).safe_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
100 return std::move(*retptr);
101 });
102}
103
104BtreeLBAManager::alloc_extent_ret
105BtreeLBAManager::alloc_extent(
106 Transaction &t,
107 laddr_t hint,
108 extent_len_t len,
109 paddr_t addr)
110{
111 // TODO: we can certainly combine the lookup and the insert.
112 return get_root(
113 t).safe_then([this, &t, hint, len](auto extent) {
114 logger().debug(
115 "BtreeLBAManager::alloc_extent: beginning search at {}",
116 *extent);
117 return extent->find_hole(
118 get_context(t),
119 hint,
120 L_ADDR_MAX,
121 len).safe_then([extent](auto ret) {
122 return std::make_pair(ret, extent);
123 });
124 }).safe_then([this, &t, len, addr](auto allocation_pair) {
125 auto &[laddr, extent] = allocation_pair;
126 ceph_assert(laddr != L_ADDR_MAX);
127 return insert_mapping(
128 t,
129 extent,
130 laddr,
131 { len, addr, 1, 0 }
132 ).safe_then([laddr=laddr, addr, len](auto pin) {
133 logger().debug(
134 "BtreeLBAManager::alloc_extent: alloc {}~{} for {}",
135 laddr,
136 len,
137 addr);
138 return alloc_extent_ret(
139 alloc_extent_ertr::ready_future_marker{},
140 LBAPinRef(pin.release()));
141 });
142 });
143}
144
145BtreeLBAManager::set_extent_ret
146BtreeLBAManager::set_extent(
147 Transaction &t,
148 laddr_t off, extent_len_t len, paddr_t addr)
149{
150 return get_root(
151 t).safe_then([this, &t, off, len, addr](auto root) {
152 return insert_mapping(
153 t,
154 root,
155 off,
156 { len, addr, 1, 0 });
157 }).safe_then([](auto ret) {
158 return set_extent_ret(
159 set_extent_ertr::ready_future_marker{},
160 LBAPinRef(ret.release()));
161 });
162}
163
164static bool is_lba_node(extent_types_t type)
165{
166 return type == extent_types_t::LADDR_INTERNAL ||
167 type == extent_types_t::LADDR_LEAF;
168}
169
170static bool is_lba_node(const CachedExtent &e)
171{
172 return is_lba_node(e.get_type());
173}
174
175btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
176{
177 if (is_lba_node(e)) {
178 return e.cast<LBANode>()->pin;
179 } else if (e.is_logical()) {
180 return static_cast<BtreeLBAPin &>(
181 e.cast<LogicalCachedExtent>()->get_pin()).pin;
182 } else {
183 ceph_abort_msg("impossible");
184 }
185}
186
187static depth_t get_depth(const CachedExtent &e)
188{
189 if (is_lba_node(e)) {
190 return e.cast<LBANode>()->get_node_meta().depth;
191 } else if (e.is_logical()) {
192 return 0;
193 } else {
194 ceph_assert(0 == "currently impossible");
195 return 0;
196 }
197}
198
199BtreeLBAManager::complete_transaction_ret
200BtreeLBAManager::complete_transaction(
201 Transaction &t)
202{
203 std::vector<CachedExtentRef> to_clear;
204 to_clear.reserve(t.get_retired_set().size());
205 for (auto &e: t.get_retired_set()) {
206 if (e->is_logical() || is_lba_node(*e))
207 to_clear.push_back(e);
208 }
209 // need to call check_parent from leaf->parent
210 std::sort(
211 to_clear.begin(), to_clear.end(),
212 [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); });
213
214 for (auto &e: to_clear) {
215 auto &pin = get_pin(*e);
216 logger().debug("{}: retiring {}, {}", __func__, *e, pin);
217 pin_set.retire(pin);
218 }
219
220 // ...but add_pin from parent->leaf
221 std::vector<CachedExtentRef> to_link;
222 to_link.reserve(t.get_fresh_block_list().size());
223 for (auto &e: t.get_fresh_block_list()) {
224 if (e->is_valid() && (is_lba_node(*e) || e->is_logical()))
225 to_link.push_back(e);
226 }
227 std::sort(
228 to_link.begin(), to_link.end(),
229 [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); });
230
231 for (auto &e : to_link) {
232 logger().debug("{}: linking {}", __func__, *e);
233 pin_set.add_pin(get_pin(*e));
234 }
235
236 for (auto &e: to_clear) {
237 auto &pin = get_pin(*e);
238 logger().debug("{}: checking {}, {}", __func__, *e, pin);
239 pin_set.check_parent(pin);
240 }
241 return complete_transaction_ertr::now();
242}
243
244BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
245 Transaction &t,
246 CachedExtentRef e)
247{
248 logger().debug("{}: {}", __func__, *e);
249 return get_root(t).safe_then(
250 [this, &t, e=std::move(e)](LBANodeRef root) mutable {
251 if (is_lba_node(*e)) {
252 auto lban = e->cast<LBANode>();
253 logger().debug("init_cached_extent: lba node, getting root");
254 return root->lookup(
255 op_context_t{cache, pin_set, t},
256 lban->get_node_meta().begin,
257 lban->get_node_meta().depth
258 ).safe_then([this, e=std::move(e)](LBANodeRef c) {
259 if (c->get_paddr() == e->get_paddr()) {
260 assert(&*c == &*e);
261 logger().debug("init_cached_extent: {} initialized", *e);
262 } else {
263 // e is obsolete
264 logger().debug("init_cached_extent: {} obsolete", *e);
265 cache.drop_from_cache(e);
266 }
267 return init_cached_extent_ertr::now();
268 });
269 } else if (e->is_logical()) {
270 auto logn = e->cast<LogicalCachedExtent>();
271 return root->lookup_range(
272 op_context_t{cache, pin_set, t},
273 logn->get_laddr(),
274 logn->get_length()).safe_then(
275 [this, logn=std::move(logn)](auto pins) {
276 if (pins.size() == 1) {
277 auto pin = std::move(pins.front());
278 pins.pop_front();
279 if (pin->get_paddr() == logn->get_paddr()) {
280 logn->set_pin(std::move(pin));
281 pin_set.add_pin(
282 static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
283 logger().debug("init_cached_extent: {} initialized", *logn);
284 } else {
285 // paddr doesn't match, remapped, obsolete
286 logger().debug("init_cached_extent: {} obsolete", *logn);
287 cache.drop_from_cache(logn);
288 }
289 } else {
290 // set of extents changed, obsolete
291 logger().debug("init_cached_extent: {} obsolete", *logn);
292 cache.drop_from_cache(logn);
293 }
294 return init_cached_extent_ertr::now();
295 });
296 } else {
297 logger().debug("init_cached_extent: {} skipped", *e);
298 return init_cached_extent_ertr::now();
299 }
300 });
301}
302
303BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
304 Transaction &t,
305 laddr_t begin,
306 laddr_t end,
307 scan_mappings_func_t &&f)
308{
309 return seastar::do_with(
310 std::move(f),
311 LBANodeRef(),
312 [=, &t](auto &f, auto &lbarootref) {
313 return get_root(t).safe_then(
314 [=, &t, &f](LBANodeRef lbaroot) mutable {
315 lbarootref = lbaroot;
316 return lbaroot->scan_mappings(
317 get_context(t),
318 begin,
319 end,
320 f);
321 });
322 });
323}
324
325BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space(
326 Transaction &t,
327 scan_mapped_space_func_t &&f)
328{
329 return seastar::do_with(
330 std::move(f),
331 LBANodeRef(),
332 [=, &t](auto &f, auto &lbarootref) {
333 return get_root(t).safe_then(
334 [=, &t, &f](LBANodeRef lbaroot) mutable {
335 lbarootref = lbaroot;
336 return lbaroot->scan_mapped_space(
337 get_context(t),
338 f);
339 });
340 });
341}
342
343BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
344 Transaction &t,
345 CachedExtentRef extent)
346{
347 if (extent->is_logical()) {
348 auto lextent = extent->cast<LogicalCachedExtent>();
349 cache.retire_extent(t, extent);
350 auto nlextent = cache.alloc_new_extent_by_type(
351 t,
352 lextent->get_type(),
353 lextent->get_length())->cast<LogicalCachedExtent>();
354 lextent->get_bptr().copy_out(
355 0,
356 lextent->get_length(),
357 nlextent->get_bptr().c_str());
358 nlextent->set_laddr(lextent->get_laddr());
359 nlextent->set_pin(lextent->get_pin().duplicate());
360
361 logger().debug(
362 "{}: rewriting {} into {}",
363 __func__,
364 *lextent,
365 *nlextent);
366
367 return update_mapping(
368 t,
369 lextent->get_laddr(),
370 [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()](
371 const lba_map_val_t &in) {
372 lba_map_val_t ret = in;
373 ceph_assert(in.paddr == prev_addr);
374 ret.paddr = addr;
375 return ret;
376 }).safe_then([nlextent](auto e) {}).handle_error(
377 rewrite_extent_ertr::pass_further{},
378 /* ENOENT in particular should be impossible */
379 crimson::ct_error::assert_all{}
380 );
381 } else if (is_lba_node(*extent)) {
382 auto lba_extent = extent->cast<LBANode>();
383 cache.retire_extent(t, extent);
384 auto nlba_extent = cache.alloc_new_extent_by_type(
385 t,
386 lba_extent->get_type(),
387 lba_extent->get_length())->cast<LBANode>();
388 lba_extent->get_bptr().copy_out(
389 0,
390 lba_extent->get_length(),
391 nlba_extent->get_bptr().c_str());
392 nlba_extent->pin.set_range(nlba_extent->get_node_meta());
393
394 /* This is a bit underhanded. Any relative addrs here must necessarily
395 * be record relative as we are rewriting a dirty extent. Thus, we
396 * are using resolve_relative_addrs with a (likely negative) block
397 * relative offset to correct them to block-relative offsets adjusted
398 * for our new transaction location.
399 *
400 * Upon commit, these now block relative addresses will be interpretted
401 * against the real final address.
402 */
403 nlba_extent->resolve_relative_addrs(
404 make_record_relative_paddr(0) - nlba_extent->get_paddr());
405
406 return update_internal_mapping(
407 t,
408 nlba_extent->get_node_meta().depth,
409 nlba_extent->get_node_meta().begin,
410 nlba_extent->get_paddr()).safe_then(
411 [](auto) {},
412 rewrite_extent_ertr::pass_further {},
413 crimson::ct_error::assert_all{});
414 } else {
415 return rewrite_extent_ertr::now();
416 }
417}
418
419BtreeLBAManager::get_physical_extent_if_live_ret
420BtreeLBAManager::get_physical_extent_if_live(
421 Transaction &t,
422 extent_types_t type,
423 paddr_t addr,
424 laddr_t laddr,
425 segment_off_t len)
426{
427 ceph_assert(is_lba_node(type));
428 return cache.get_extent_by_type(
429 t,
430 type,
431 addr,
432 laddr,
433 len
434 ).safe_then([=, &t](CachedExtentRef extent) {
435 return get_root(t).safe_then([=, &t](LBANodeRef root) {
436 auto lba_node = extent->cast<LBANode>();
437 return root->lookup(
438 op_context_t{cache, pin_set, t},
439 lba_node->get_node_meta().begin,
440 lba_node->get_node_meta().depth).safe_then([=](LBANodeRef c) {
441 if (c->get_paddr() == lba_node->get_paddr()) {
442 return get_physical_extent_if_live_ret(
443 get_physical_extent_if_live_ertr::ready_future_marker{},
444 lba_node);
445 } else {
446 cache.drop_from_cache(lba_node);
447 return get_physical_extent_if_live_ret(
448 get_physical_extent_if_live_ertr::ready_future_marker{},
449 CachedExtentRef());
450 }
451 });
452 });
453 });
454}
455
456BtreeLBAManager::BtreeLBAManager(
457 SegmentManager &segment_manager,
458 Cache &cache)
459 : segment_manager(segment_manager),
460 cache(cache) {}
461
462BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping(
463 Transaction &t,
464 LBANodeRef root,
465 laddr_t laddr,
466 lba_map_val_t val)
467{
468 auto split = insert_mapping_ertr::future<LBANodeRef>(
469 insert_mapping_ertr::ready_future_marker{},
470 root);
471 if (root->at_max_capacity()) {
472 split = cache.get_root(t).safe_then(
473 [this, root, laddr, &t](RootBlockRef croot) {
474 logger().debug(
475 "BtreeLBAManager::insert_mapping: splitting root {}",
476 *croot);
477 {
478 auto mut_croot = cache.duplicate_for_write(t, croot);
479 croot = mut_croot->cast<RootBlock>();
480 }
481 auto nroot = cache.alloc_new_extent<LBAInternalNode>(t, LBA_BLOCK_SIZE);
482 lba_node_meta_t meta{0, L_ADDR_MAX, root->get_node_meta().depth + 1};
483 nroot->set_meta(meta);
484 nroot->pin.set_range(meta);
485 nroot->journal_insert(
486 nroot->begin(),
487 L_ADDR_MIN,
488 root->get_paddr(),
489 nullptr);
490 croot->get_root().lba_root_addr = nroot->get_paddr();
491 croot->get_root().lba_depth = root->get_node_meta().depth + 1;
492 return nroot->split_entry(
493 get_context(t),
494 laddr, nroot->begin(), root);
495 });
496 }
497 return split.safe_then([this, &t, laddr, val](LBANodeRef node) {
498 return node->insert(
499 get_context(t),
500 laddr, val);
501 });
502}
503
504BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount(
505 Transaction &t,
506 laddr_t addr,
507 int delta)
508{
509 return update_mapping(
510 t,
511 addr,
512 [delta](const lba_map_val_t &in) {
513 lba_map_val_t out = in;
514 ceph_assert((int)out.refcount + delta >= 0);
515 out.refcount += delta;
516 return out;
517 }).safe_then([](auto result) {
518 return ref_update_result_t{result.refcount, result.paddr};
519 });
520}
521
522BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
523 Transaction &t,
524 laddr_t addr,
525 update_func_t &&f)
526{
527 return get_root(t
528 ).safe_then([this, f=std::move(f), &t, addr](LBANodeRef root) mutable {
529 return root->mutate_mapping(
530 get_context(t),
531 addr,
532 std::move(f));
533 });
534}
535
536BtreeLBAManager::update_internal_mapping_ret
537BtreeLBAManager::update_internal_mapping(
538 Transaction &t,
539 depth_t depth,
540 laddr_t laddr,
541 paddr_t paddr)
542{
543 return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) {
544 if (depth == croot->get_root().lba_depth) {
545 logger().debug(
546 "update_internal_mapping: updating lba root to: {}->{}",
547 laddr,
548 paddr);
549 {
550 auto mut_croot = cache.duplicate_for_write(t, croot);
551 croot = mut_croot->cast<RootBlock>();
552 }
553 ceph_assert(laddr == 0);
554 auto old_paddr = croot->get_root().lba_root_addr;
555 croot->get_root().lba_root_addr = paddr;
556 return update_internal_mapping_ret(
557 update_internal_mapping_ertr::ready_future_marker{},
558 old_paddr);
559 } else {
560 logger().debug(
561 "update_internal_mapping: updating lba node at depth {} to: {}->{}",
562 depth,
563 laddr,
564 paddr);
565 return get_lba_btree_extent(
566 get_context(t),
567 croot->get_root().lba_depth,
568 croot->get_root().lba_root_addr,
569 paddr_t()).safe_then([=, &t](LBANodeRef broot) {
570 return broot->mutate_internal_address(
571 get_context(t),
572 depth,
573 laddr,
574 paddr);
575 });
576 }
577 });
578}
579
580}