]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <sys/mman.h> | |
5 | #include <string.h> | |
6 | ||
7 | #include "crimson/common/log.h" | |
8 | ||
9 | #include "include/buffer.h" | |
10 | #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" | |
11 | #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" | |
12 | ||
13 | ||
14 | namespace { | |
15 | seastar::logger& logger() { | |
16 | return crimson::get_logger(ceph_subsys_filestore); | |
17 | } | |
18 | } | |
19 | ||
20 | namespace crimson::os::seastore::lba_manager::btree { | |
21 | ||
22 | BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( | |
23 | Transaction &t) | |
24 | { | |
25 | logger().debug("BtreeLBAManager::mkfs"); | |
26 | return cache.get_root(t).safe_then([this, &t](auto croot) { | |
27 | auto root_leaf = cache.alloc_new_extent<LBALeafNode>( | |
28 | t, | |
29 | LBA_BLOCK_SIZE); | |
30 | root_leaf->set_size(0); | |
31 | lba_node_meta_t meta{0, L_ADDR_MAX, 1}; | |
32 | root_leaf->set_meta(meta); | |
33 | root_leaf->pin.set_range(meta); | |
34 | croot->get_root() = | |
35 | root_t{ | |
36 | 1, | |
37 | 0, | |
38 | root_leaf->get_paddr(), | |
39 | make_record_relative_paddr(0), | |
40 | L_ADDR_NULL}; | |
41 | return mkfs_ertr::now(); | |
42 | }); | |
43 | } | |
44 | ||
45 | BtreeLBAManager::get_root_ret | |
46 | BtreeLBAManager::get_root(Transaction &t) | |
47 | { | |
48 | return cache.get_root(t).safe_then([this, &t](auto croot) { | |
49 | logger().debug( | |
50 | "BtreeLBAManager::get_root: reading root at {} depth {}", | |
51 | paddr_t{croot->get_root().lba_root_addr}, | |
52 | unsigned(croot->get_root().lba_depth)); | |
53 | return get_lba_btree_extent( | |
54 | get_context(t), | |
55 | croot->get_root().lba_depth, | |
56 | croot->get_root().lba_root_addr, | |
57 | paddr_t()); | |
58 | }); | |
59 | } | |
60 | ||
61 | BtreeLBAManager::get_mapping_ret | |
62 | BtreeLBAManager::get_mapping( | |
63 | Transaction &t, | |
64 | laddr_t offset, extent_len_t length) | |
65 | { | |
66 | logger().debug("BtreeLBAManager::get_mapping: {}, {}", offset, length); | |
67 | return get_root( | |
68 | t).safe_then([this, &t, offset, length](auto extent) { | |
69 | return extent->lookup_range( | |
70 | get_context(t), | |
71 | offset, length | |
72 | ).safe_then([extent](auto ret) { return ret; }); | |
73 | }).safe_then([](auto &&e) { | |
74 | logger().debug("BtreeLBAManager::get_mapping: got mapping {}", e); | |
75 | return get_mapping_ret( | |
76 | get_mapping_ertr::ready_future_marker{}, | |
77 | std::move(e)); | |
78 | }); | |
79 | } | |
80 | ||
81 | ||
82 | BtreeLBAManager::get_mappings_ret | |
83 | BtreeLBAManager::get_mappings( | |
84 | Transaction &t, | |
85 | laddr_list_t &&list) | |
86 | { | |
87 | logger().debug("BtreeLBAManager::get_mappings: {}", list); | |
88 | auto l = std::make_unique<laddr_list_t>(std::move(list)); | |
89 | auto retptr = std::make_unique<lba_pin_list_t>(); | |
90 | auto &ret = *retptr; | |
91 | return crimson::do_for_each( | |
92 | l->begin(), | |
93 | l->end(), | |
94 | [this, &t, &ret](const auto &p) { | |
95 | return get_mapping(t, p.first, p.second).safe_then( | |
96 | [&ret](auto res) { | |
97 | ret.splice(ret.end(), res, res.begin(), res.end()); | |
98 | }); | |
99 | }).safe_then([l=std::move(l), retptr=std::move(retptr)]() mutable { | |
100 | return std::move(*retptr); | |
101 | }); | |
102 | } | |
103 | ||
104 | BtreeLBAManager::alloc_extent_ret | |
105 | BtreeLBAManager::alloc_extent( | |
106 | Transaction &t, | |
107 | laddr_t hint, | |
108 | extent_len_t len, | |
109 | paddr_t addr) | |
110 | { | |
111 | // TODO: we can certainly combine the lookup and the insert. | |
112 | return get_root( | |
113 | t).safe_then([this, &t, hint, len](auto extent) { | |
114 | logger().debug( | |
115 | "BtreeLBAManager::alloc_extent: beginning search at {}", | |
116 | *extent); | |
117 | return extent->find_hole( | |
118 | get_context(t), | |
119 | hint, | |
120 | L_ADDR_MAX, | |
121 | len).safe_then([extent](auto ret) { | |
122 | return std::make_pair(ret, extent); | |
123 | }); | |
124 | }).safe_then([this, &t, len, addr](auto allocation_pair) { | |
125 | auto &[laddr, extent] = allocation_pair; | |
126 | ceph_assert(laddr != L_ADDR_MAX); | |
127 | return insert_mapping( | |
128 | t, | |
129 | extent, | |
130 | laddr, | |
131 | { len, addr, 1, 0 } | |
132 | ).safe_then([laddr=laddr, addr, len](auto pin) { | |
133 | logger().debug( | |
134 | "BtreeLBAManager::alloc_extent: alloc {}~{} for {}", | |
135 | laddr, | |
136 | len, | |
137 | addr); | |
138 | return alloc_extent_ret( | |
139 | alloc_extent_ertr::ready_future_marker{}, | |
140 | LBAPinRef(pin.release())); | |
141 | }); | |
142 | }); | |
143 | } | |
144 | ||
145 | BtreeLBAManager::set_extent_ret | |
146 | BtreeLBAManager::set_extent( | |
147 | Transaction &t, | |
148 | laddr_t off, extent_len_t len, paddr_t addr) | |
149 | { | |
150 | return get_root( | |
151 | t).safe_then([this, &t, off, len, addr](auto root) { | |
152 | return insert_mapping( | |
153 | t, | |
154 | root, | |
155 | off, | |
156 | { len, addr, 1, 0 }); | |
157 | }).safe_then([](auto ret) { | |
158 | return set_extent_ret( | |
159 | set_extent_ertr::ready_future_marker{}, | |
160 | LBAPinRef(ret.release())); | |
161 | }); | |
162 | } | |
163 | ||
164 | static bool is_lba_node(extent_types_t type) | |
165 | { | |
166 | return type == extent_types_t::LADDR_INTERNAL || | |
167 | type == extent_types_t::LADDR_LEAF; | |
168 | } | |
169 | ||
170 | static bool is_lba_node(const CachedExtent &e) | |
171 | { | |
172 | return is_lba_node(e.get_type()); | |
173 | } | |
174 | ||
175 | btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e) | |
176 | { | |
177 | if (is_lba_node(e)) { | |
178 | return e.cast<LBANode>()->pin; | |
179 | } else if (e.is_logical()) { | |
180 | return static_cast<BtreeLBAPin &>( | |
181 | e.cast<LogicalCachedExtent>()->get_pin()).pin; | |
182 | } else { | |
183 | ceph_abort_msg("impossible"); | |
184 | } | |
185 | } | |
186 | ||
187 | static depth_t get_depth(const CachedExtent &e) | |
188 | { | |
189 | if (is_lba_node(e)) { | |
190 | return e.cast<LBANode>()->get_node_meta().depth; | |
191 | } else if (e.is_logical()) { | |
192 | return 0; | |
193 | } else { | |
194 | ceph_assert(0 == "currently impossible"); | |
195 | return 0; | |
196 | } | |
197 | } | |
198 | ||
199 | BtreeLBAManager::complete_transaction_ret | |
200 | BtreeLBAManager::complete_transaction( | |
201 | Transaction &t) | |
202 | { | |
203 | std::vector<CachedExtentRef> to_clear; | |
204 | to_clear.reserve(t.get_retired_set().size()); | |
205 | for (auto &e: t.get_retired_set()) { | |
206 | if (e->is_logical() || is_lba_node(*e)) | |
207 | to_clear.push_back(e); | |
208 | } | |
209 | // need to call check_parent from leaf->parent | |
210 | std::sort( | |
211 | to_clear.begin(), to_clear.end(), | |
212 | [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); }); | |
213 | ||
214 | for (auto &e: to_clear) { | |
215 | auto &pin = get_pin(*e); | |
216 | logger().debug("{}: retiring {}, {}", __func__, *e, pin); | |
217 | pin_set.retire(pin); | |
218 | } | |
219 | ||
220 | // ...but add_pin from parent->leaf | |
221 | std::vector<CachedExtentRef> to_link; | |
222 | to_link.reserve(t.get_fresh_block_list().size()); | |
223 | for (auto &e: t.get_fresh_block_list()) { | |
224 | if (e->is_valid() && (is_lba_node(*e) || e->is_logical())) | |
225 | to_link.push_back(e); | |
226 | } | |
227 | std::sort( | |
228 | to_link.begin(), to_link.end(), | |
229 | [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); }); | |
230 | ||
231 | for (auto &e : to_link) { | |
232 | logger().debug("{}: linking {}", __func__, *e); | |
233 | pin_set.add_pin(get_pin(*e)); | |
234 | } | |
235 | ||
236 | for (auto &e: to_clear) { | |
237 | auto &pin = get_pin(*e); | |
238 | logger().debug("{}: checking {}, {}", __func__, *e, pin); | |
239 | pin_set.check_parent(pin); | |
240 | } | |
241 | return complete_transaction_ertr::now(); | |
242 | } | |
243 | ||
244 | BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent( | |
245 | Transaction &t, | |
246 | CachedExtentRef e) | |
247 | { | |
248 | logger().debug("{}: {}", __func__, *e); | |
249 | return get_root(t).safe_then( | |
250 | [this, &t, e=std::move(e)](LBANodeRef root) mutable { | |
251 | if (is_lba_node(*e)) { | |
252 | auto lban = e->cast<LBANode>(); | |
253 | logger().debug("init_cached_extent: lba node, getting root"); | |
254 | return root->lookup( | |
255 | op_context_t{cache, pin_set, t}, | |
256 | lban->get_node_meta().begin, | |
257 | lban->get_node_meta().depth | |
258 | ).safe_then([this, e=std::move(e)](LBANodeRef c) { | |
259 | if (c->get_paddr() == e->get_paddr()) { | |
260 | assert(&*c == &*e); | |
261 | logger().debug("init_cached_extent: {} initialized", *e); | |
262 | } else { | |
263 | // e is obsolete | |
264 | logger().debug("init_cached_extent: {} obsolete", *e); | |
265 | cache.drop_from_cache(e); | |
266 | } | |
267 | return init_cached_extent_ertr::now(); | |
268 | }); | |
269 | } else if (e->is_logical()) { | |
270 | auto logn = e->cast<LogicalCachedExtent>(); | |
271 | return root->lookup_range( | |
272 | op_context_t{cache, pin_set, t}, | |
273 | logn->get_laddr(), | |
274 | logn->get_length()).safe_then( | |
275 | [this, logn=std::move(logn)](auto pins) { | |
276 | if (pins.size() == 1) { | |
277 | auto pin = std::move(pins.front()); | |
278 | pins.pop_front(); | |
279 | if (pin->get_paddr() == logn->get_paddr()) { | |
280 | logn->set_pin(std::move(pin)); | |
281 | pin_set.add_pin( | |
282 | static_cast<BtreeLBAPin&>(logn->get_pin()).pin); | |
283 | logger().debug("init_cached_extent: {} initialized", *logn); | |
284 | } else { | |
285 | // paddr doesn't match, remapped, obsolete | |
286 | logger().debug("init_cached_extent: {} obsolete", *logn); | |
287 | cache.drop_from_cache(logn); | |
288 | } | |
289 | } else { | |
290 | // set of extents changed, obsolete | |
291 | logger().debug("init_cached_extent: {} obsolete", *logn); | |
292 | cache.drop_from_cache(logn); | |
293 | } | |
294 | return init_cached_extent_ertr::now(); | |
295 | }); | |
296 | } else { | |
297 | logger().debug("init_cached_extent: {} skipped", *e); | |
298 | return init_cached_extent_ertr::now(); | |
299 | } | |
300 | }); | |
301 | } | |
302 | ||
303 | BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings( | |
304 | Transaction &t, | |
305 | laddr_t begin, | |
306 | laddr_t end, | |
307 | scan_mappings_func_t &&f) | |
308 | { | |
309 | return seastar::do_with( | |
310 | std::move(f), | |
311 | LBANodeRef(), | |
312 | [=, &t](auto &f, auto &lbarootref) { | |
313 | return get_root(t).safe_then( | |
314 | [=, &t, &f](LBANodeRef lbaroot) mutable { | |
315 | lbarootref = lbaroot; | |
316 | return lbaroot->scan_mappings( | |
317 | get_context(t), | |
318 | begin, | |
319 | end, | |
320 | f); | |
321 | }); | |
322 | }); | |
323 | } | |
324 | ||
325 | BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space( | |
326 | Transaction &t, | |
327 | scan_mapped_space_func_t &&f) | |
328 | { | |
329 | return seastar::do_with( | |
330 | std::move(f), | |
331 | LBANodeRef(), | |
332 | [=, &t](auto &f, auto &lbarootref) { | |
333 | return get_root(t).safe_then( | |
334 | [=, &t, &f](LBANodeRef lbaroot) mutable { | |
335 | lbarootref = lbaroot; | |
336 | return lbaroot->scan_mapped_space( | |
337 | get_context(t), | |
338 | f); | |
339 | }); | |
340 | }); | |
341 | } | |
342 | ||
343 | BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent( | |
344 | Transaction &t, | |
345 | CachedExtentRef extent) | |
346 | { | |
347 | if (extent->is_logical()) { | |
348 | auto lextent = extent->cast<LogicalCachedExtent>(); | |
349 | cache.retire_extent(t, extent); | |
350 | auto nlextent = cache.alloc_new_extent_by_type( | |
351 | t, | |
352 | lextent->get_type(), | |
353 | lextent->get_length())->cast<LogicalCachedExtent>(); | |
354 | lextent->get_bptr().copy_out( | |
355 | 0, | |
356 | lextent->get_length(), | |
357 | nlextent->get_bptr().c_str()); | |
358 | nlextent->set_laddr(lextent->get_laddr()); | |
359 | nlextent->set_pin(lextent->get_pin().duplicate()); | |
360 | ||
361 | logger().debug( | |
362 | "{}: rewriting {} into {}", | |
363 | __func__, | |
364 | *lextent, | |
365 | *nlextent); | |
366 | ||
367 | return update_mapping( | |
368 | t, | |
369 | lextent->get_laddr(), | |
370 | [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()]( | |
371 | const lba_map_val_t &in) { | |
372 | lba_map_val_t ret = in; | |
373 | ceph_assert(in.paddr == prev_addr); | |
374 | ret.paddr = addr; | |
375 | return ret; | |
376 | }).safe_then([nlextent](auto e) {}).handle_error( | |
377 | rewrite_extent_ertr::pass_further{}, | |
378 | /* ENOENT in particular should be impossible */ | |
379 | crimson::ct_error::assert_all{} | |
380 | ); | |
381 | } else if (is_lba_node(*extent)) { | |
382 | auto lba_extent = extent->cast<LBANode>(); | |
383 | cache.retire_extent(t, extent); | |
384 | auto nlba_extent = cache.alloc_new_extent_by_type( | |
385 | t, | |
386 | lba_extent->get_type(), | |
387 | lba_extent->get_length())->cast<LBANode>(); | |
388 | lba_extent->get_bptr().copy_out( | |
389 | 0, | |
390 | lba_extent->get_length(), | |
391 | nlba_extent->get_bptr().c_str()); | |
392 | nlba_extent->pin.set_range(nlba_extent->get_node_meta()); | |
393 | ||
394 | /* This is a bit underhanded. Any relative addrs here must necessarily | |
395 | * be record relative as we are rewriting a dirty extent. Thus, we | |
396 | * are using resolve_relative_addrs with a (likely negative) block | |
397 | * relative offset to correct them to block-relative offsets adjusted | |
398 | * for our new transaction location. | |
399 | * | |
400 | * Upon commit, these now block relative addresses will be interpretted | |
401 | * against the real final address. | |
402 | */ | |
403 | nlba_extent->resolve_relative_addrs( | |
404 | make_record_relative_paddr(0) - nlba_extent->get_paddr()); | |
405 | ||
406 | return update_internal_mapping( | |
407 | t, | |
408 | nlba_extent->get_node_meta().depth, | |
409 | nlba_extent->get_node_meta().begin, | |
410 | nlba_extent->get_paddr()).safe_then( | |
411 | [](auto) {}, | |
412 | rewrite_extent_ertr::pass_further {}, | |
413 | crimson::ct_error::assert_all{}); | |
414 | } else { | |
415 | return rewrite_extent_ertr::now(); | |
416 | } | |
417 | } | |
418 | ||
419 | BtreeLBAManager::get_physical_extent_if_live_ret | |
420 | BtreeLBAManager::get_physical_extent_if_live( | |
421 | Transaction &t, | |
422 | extent_types_t type, | |
423 | paddr_t addr, | |
424 | laddr_t laddr, | |
425 | segment_off_t len) | |
426 | { | |
427 | ceph_assert(is_lba_node(type)); | |
428 | return cache.get_extent_by_type( | |
429 | t, | |
430 | type, | |
431 | addr, | |
432 | laddr, | |
433 | len | |
434 | ).safe_then([=, &t](CachedExtentRef extent) { | |
435 | return get_root(t).safe_then([=, &t](LBANodeRef root) { | |
436 | auto lba_node = extent->cast<LBANode>(); | |
437 | return root->lookup( | |
438 | op_context_t{cache, pin_set, t}, | |
439 | lba_node->get_node_meta().begin, | |
440 | lba_node->get_node_meta().depth).safe_then([=](LBANodeRef c) { | |
441 | if (c->get_paddr() == lba_node->get_paddr()) { | |
442 | return get_physical_extent_if_live_ret( | |
443 | get_physical_extent_if_live_ertr::ready_future_marker{}, | |
444 | lba_node); | |
445 | } else { | |
446 | cache.drop_from_cache(lba_node); | |
447 | return get_physical_extent_if_live_ret( | |
448 | get_physical_extent_if_live_ertr::ready_future_marker{}, | |
449 | CachedExtentRef()); | |
450 | } | |
451 | }); | |
452 | }); | |
453 | }); | |
454 | } | |
455 | ||
456 | BtreeLBAManager::BtreeLBAManager( | |
457 | SegmentManager &segment_manager, | |
458 | Cache &cache) | |
459 | : segment_manager(segment_manager), | |
460 | cache(cache) {} | |
461 | ||
462 | BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping( | |
463 | Transaction &t, | |
464 | LBANodeRef root, | |
465 | laddr_t laddr, | |
466 | lba_map_val_t val) | |
467 | { | |
468 | auto split = insert_mapping_ertr::future<LBANodeRef>( | |
469 | insert_mapping_ertr::ready_future_marker{}, | |
470 | root); | |
471 | if (root->at_max_capacity()) { | |
472 | split = cache.get_root(t).safe_then( | |
473 | [this, root, laddr, &t](RootBlockRef croot) { | |
474 | logger().debug( | |
475 | "BtreeLBAManager::insert_mapping: splitting root {}", | |
476 | *croot); | |
477 | { | |
478 | auto mut_croot = cache.duplicate_for_write(t, croot); | |
479 | croot = mut_croot->cast<RootBlock>(); | |
480 | } | |
481 | auto nroot = cache.alloc_new_extent<LBAInternalNode>(t, LBA_BLOCK_SIZE); | |
482 | lba_node_meta_t meta{0, L_ADDR_MAX, root->get_node_meta().depth + 1}; | |
483 | nroot->set_meta(meta); | |
484 | nroot->pin.set_range(meta); | |
485 | nroot->journal_insert( | |
486 | nroot->begin(), | |
487 | L_ADDR_MIN, | |
488 | root->get_paddr(), | |
489 | nullptr); | |
490 | croot->get_root().lba_root_addr = nroot->get_paddr(); | |
491 | croot->get_root().lba_depth = root->get_node_meta().depth + 1; | |
492 | return nroot->split_entry( | |
493 | get_context(t), | |
494 | laddr, nroot->begin(), root); | |
495 | }); | |
496 | } | |
497 | return split.safe_then([this, &t, laddr, val](LBANodeRef node) { | |
498 | return node->insert( | |
499 | get_context(t), | |
500 | laddr, val); | |
501 | }); | |
502 | } | |
503 | ||
504 | BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount( | |
505 | Transaction &t, | |
506 | laddr_t addr, | |
507 | int delta) | |
508 | { | |
509 | return update_mapping( | |
510 | t, | |
511 | addr, | |
512 | [delta](const lba_map_val_t &in) { | |
513 | lba_map_val_t out = in; | |
514 | ceph_assert((int)out.refcount + delta >= 0); | |
515 | out.refcount += delta; | |
516 | return out; | |
517 | }).safe_then([](auto result) { | |
518 | return ref_update_result_t{result.refcount, result.paddr}; | |
519 | }); | |
520 | } | |
521 | ||
522 | BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping( | |
523 | Transaction &t, | |
524 | laddr_t addr, | |
525 | update_func_t &&f) | |
526 | { | |
527 | return get_root(t | |
528 | ).safe_then([this, f=std::move(f), &t, addr](LBANodeRef root) mutable { | |
529 | return root->mutate_mapping( | |
530 | get_context(t), | |
531 | addr, | |
532 | std::move(f)); | |
533 | }); | |
534 | } | |
535 | ||
536 | BtreeLBAManager::update_internal_mapping_ret | |
537 | BtreeLBAManager::update_internal_mapping( | |
538 | Transaction &t, | |
539 | depth_t depth, | |
540 | laddr_t laddr, | |
541 | paddr_t paddr) | |
542 | { | |
543 | return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) { | |
544 | if (depth == croot->get_root().lba_depth) { | |
545 | logger().debug( | |
546 | "update_internal_mapping: updating lba root to: {}->{}", | |
547 | laddr, | |
548 | paddr); | |
549 | { | |
550 | auto mut_croot = cache.duplicate_for_write(t, croot); | |
551 | croot = mut_croot->cast<RootBlock>(); | |
552 | } | |
553 | ceph_assert(laddr == 0); | |
554 | auto old_paddr = croot->get_root().lba_root_addr; | |
555 | croot->get_root().lba_root_addr = paddr; | |
556 | return update_internal_mapping_ret( | |
557 | update_internal_mapping_ertr::ready_future_marker{}, | |
558 | old_paddr); | |
559 | } else { | |
560 | logger().debug( | |
561 | "update_internal_mapping: updating lba node at depth {} to: {}->{}", | |
562 | depth, | |
563 | laddr, | |
564 | paddr); | |
565 | return get_lba_btree_extent( | |
566 | get_context(t), | |
567 | croot->get_root().lba_depth, | |
568 | croot->get_root().lba_root_addr, | |
569 | paddr_t()).safe_then([=, &t](LBANodeRef broot) { | |
570 | return broot->mutate_internal_address( | |
571 | get_context(t), | |
572 | depth, | |
573 | laddr, | |
574 | paddr); | |
575 | }); | |
576 | } | |
577 | }); | |
578 | } | |
579 | ||
580 | } |