]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <sys/mman.h> | |
5 | #include <string.h> | |
6 | ||
7 | #include "crimson/common/log.h" | |
20effc67 | 8 | #include "crimson/os/seastore/logging.h" |
f67539c2 TL |
9 | |
10 | #include "include/buffer.h" | |
11 | #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" | |
20effc67 TL |
12 | #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" |
13 | #include "crimson/os/seastore/lba_manager/btree/lba_btree.h" | |
f67539c2 TL |
14 | |
15 | ||
16 | namespace { | |
17 | seastar::logger& logger() { | |
20effc67 | 18 | return crimson::get_logger(ceph_subsys_seastore_lba); |
f67539c2 TL |
19 | } |
20 | } | |
21 | ||
20effc67 TL |
22 | SET_SUBSYS(seastore_lba); |
23 | ||
f67539c2 TL |
24 | namespace crimson::os::seastore::lba_manager::btree { |
25 | ||
26 | BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( | |
27 | Transaction &t) | |
28 | { | |
20effc67 TL |
29 | return cache.get_root(t).si_then([this, &t](auto croot) { |
30 | croot->get_root().lba_root = LBABtree::mkfs(get_context(t)); | |
31 | return mkfs_iertr::now(); | |
32 | }).handle_error_interruptible( | |
33 | mkfs_iertr::pass_further{}, | |
34 | crimson::ct_error::assert_all{ | |
35 | "Invalid error in BtreeLBAManager::mkfs" | |
36 | } | |
37 | ); | |
f67539c2 TL |
38 | } |
39 | ||
20effc67 TL |
40 | BtreeLBAManager::get_mappings_ret |
41 | BtreeLBAManager::get_mappings( | |
f67539c2 TL |
42 | Transaction &t, |
43 | laddr_t offset, extent_len_t length) | |
44 | { | |
20effc67 TL |
45 | LOG_PREFIX(BtreeLBAManager::get_mappings); |
46 | DEBUGT("offset: {}, length{}", t, offset, length); | |
47 | auto c = get_context(t); | |
48 | return with_btree_state<lba_pin_list_t>( | |
49 | c, | |
50 | [c, offset, length](auto &btree, auto &ret) { | |
51 | return LBABtree::iterate_repeat( | |
52 | c, | |
53 | btree.upper_bound_right(c, offset), | |
54 | false, | |
55 | [&ret, offset, length](auto &pos) { | |
56 | if (pos.is_end() || pos.get_key() >= (offset + length)) { | |
57 | return LBABtree::iterate_repeat_ret_inner( | |
58 | interruptible::ready_future_marker{}, | |
59 | seastar::stop_iteration::yes); | |
60 | } | |
61 | ceph_assert((pos.get_key() + pos.get_val().len) > offset); | |
62 | ret.push_back(pos.get_pin()); | |
63 | return LBABtree::iterate_repeat_ret_inner( | |
64 | interruptible::ready_future_marker{}, | |
65 | seastar::stop_iteration::no); | |
66 | }); | |
f67539c2 TL |
67 | }); |
68 | } | |
69 | ||
70 | ||
71 | BtreeLBAManager::get_mappings_ret | |
72 | BtreeLBAManager::get_mappings( | |
73 | Transaction &t, | |
74 | laddr_list_t &&list) | |
75 | { | |
20effc67 TL |
76 | LOG_PREFIX(BtreeLBAManager::get_mappings); |
77 | DEBUGT("{}", t, list); | |
f67539c2 TL |
78 | auto l = std::make_unique<laddr_list_t>(std::move(list)); |
79 | auto retptr = std::make_unique<lba_pin_list_t>(); | |
80 | auto &ret = *retptr; | |
20effc67 | 81 | return trans_intr::do_for_each( |
f67539c2 TL |
82 | l->begin(), |
83 | l->end(), | |
84 | [this, &t, &ret](const auto &p) { | |
20effc67 | 85 | return get_mappings(t, p.first, p.second).si_then( |
f67539c2 TL |
86 | [&ret](auto res) { |
87 | ret.splice(ret.end(), res, res.begin(), res.end()); | |
20effc67 | 88 | return get_mappings_iertr::now(); |
f67539c2 | 89 | }); |
20effc67 | 90 | }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable { |
f67539c2 TL |
91 | return std::move(*retptr); |
92 | }); | |
93 | } | |
94 | ||
20effc67 TL |
95 | BtreeLBAManager::get_mapping_ret |
96 | BtreeLBAManager::get_mapping( | |
f67539c2 | 97 | Transaction &t, |
20effc67 | 98 | laddr_t offset) |
f67539c2 | 99 | { |
20effc67 TL |
100 | LOG_PREFIX(BtreeLBAManager::get_mapping); |
101 | DEBUGT("{}", t, offset); | |
102 | auto c = get_context(t); | |
103 | return with_btree_ret<LBAPinRef>( | |
104 | c, | |
105 | [FNAME, c, offset](auto &btree) { | |
106 | return btree.lower_bound( | |
107 | c, offset | |
108 | ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret { | |
109 | if (iter.is_end() || iter.get_key() != offset) { | |
110 | return crimson::ct_error::enoent::make(); | |
111 | } else { | |
112 | auto e = iter.get_pin(); | |
113 | DEBUGT("got mapping {}", c.trans, *e); | |
114 | return get_mapping_ret( | |
115 | interruptible::ready_future_marker{}, | |
116 | std::move(e)); | |
117 | } | |
f67539c2 TL |
118 | }); |
119 | }); | |
120 | } | |
121 | ||
20effc67 TL |
122 | BtreeLBAManager::alloc_extent_ret |
123 | BtreeLBAManager::alloc_extent( | |
f67539c2 | 124 | Transaction &t, |
20effc67 TL |
125 | laddr_t hint, |
126 | extent_len_t len, | |
127 | paddr_t addr) | |
f67539c2 | 128 | { |
20effc67 TL |
129 | struct state_t { |
130 | laddr_t last_end; | |
131 | ||
132 | std::optional<LBABtree::iterator> insert_iter; | |
133 | std::optional<LBABtree::iterator> ret; | |
134 | ||
135 | state_t(laddr_t hint) : last_end(hint) {} | |
136 | }; | |
137 | ||
138 | LOG_PREFIX(BtreeLBAManager::alloc_extent); | |
139 | DEBUGT("hint: {}, length: {}", t, hint, len); | |
140 | auto c = get_context(t); | |
141 | ++LBABtree::lba_tree_inner_stats.num_alloc_extents; | |
142 | return with_btree_state<state_t>( | |
143 | c, | |
144 | hint, | |
145 | [FNAME, c, hint, len, addr, &t](auto &btree, auto &state) { | |
146 | return LBABtree::iterate_repeat( | |
147 | c, | |
148 | btree.upper_bound_right(c, hint), | |
149 | true, | |
150 | [&state, len, &t, hint](auto &pos) { | |
151 | LOG_PREFIX(BtreeLBAManager::alloc_extent); | |
152 | if (!pos.is_end()) { | |
153 | DEBUGT("iterate_repeat: pos: {}~{}, state: {}~{}, hint: {}", | |
154 | t, | |
155 | pos.get_key(), | |
156 | pos.get_val().len, | |
157 | state.last_end, | |
158 | len, | |
159 | hint); | |
160 | } | |
161 | if (pos.is_end() || pos.get_key() >= (state.last_end + len)) { | |
162 | state.insert_iter = pos; | |
163 | return LBABtree::iterate_repeat_ret_inner( | |
164 | interruptible::ready_future_marker{}, | |
165 | seastar::stop_iteration::yes); | |
166 | } else { | |
167 | state.last_end = pos.get_key() + pos.get_val().len; | |
168 | return LBABtree::iterate_repeat_ret_inner( | |
169 | interruptible::ready_future_marker{}, | |
170 | seastar::stop_iteration::no); | |
171 | } | |
172 | }).si_then([FNAME, c, addr, len, &btree, &state] { | |
173 | DEBUGT("about to insert at addr {}~{}", c.trans, state.last_end, len); | |
174 | return btree.insert( | |
175 | c, | |
176 | *state.insert_iter, | |
177 | state.last_end, | |
178 | lba_map_val_t{len, addr, 1, 0} | |
179 | ).si_then([&state](auto &&p) { | |
180 | auto [iter, inserted] = std::move(p); | |
181 | ceph_assert(inserted); | |
182 | state.ret = iter; | |
183 | }); | |
184 | }); | |
185 | }).si_then([](auto &&state) { | |
186 | return state.ret->get_pin(); | |
f67539c2 TL |
187 | }); |
188 | } | |
189 | ||
f67539c2 TL |
190 | static bool is_lba_node(const CachedExtent &e) |
191 | { | |
192 | return is_lba_node(e.get_type()); | |
193 | } | |
194 | ||
195 | btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e) | |
196 | { | |
197 | if (is_lba_node(e)) { | |
198 | return e.cast<LBANode>()->pin; | |
199 | } else if (e.is_logical()) { | |
200 | return static_cast<BtreeLBAPin &>( | |
201 | e.cast<LogicalCachedExtent>()->get_pin()).pin; | |
202 | } else { | |
203 | ceph_abort_msg("impossible"); | |
204 | } | |
205 | } | |
206 | ||
207 | static depth_t get_depth(const CachedExtent &e) | |
208 | { | |
209 | if (is_lba_node(e)) { | |
210 | return e.cast<LBANode>()->get_node_meta().depth; | |
211 | } else if (e.is_logical()) { | |
212 | return 0; | |
213 | } else { | |
214 | ceph_assert(0 == "currently impossible"); | |
215 | return 0; | |
216 | } | |
217 | } | |
218 | ||
20effc67 | 219 | void BtreeLBAManager::complete_transaction( |
f67539c2 TL |
220 | Transaction &t) |
221 | { | |
222 | std::vector<CachedExtentRef> to_clear; | |
223 | to_clear.reserve(t.get_retired_set().size()); | |
224 | for (auto &e: t.get_retired_set()) { | |
225 | if (e->is_logical() || is_lba_node(*e)) | |
226 | to_clear.push_back(e); | |
227 | } | |
228 | // need to call check_parent from leaf->parent | |
229 | std::sort( | |
230 | to_clear.begin(), to_clear.end(), | |
231 | [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); }); | |
232 | ||
233 | for (auto &e: to_clear) { | |
234 | auto &pin = get_pin(*e); | |
235 | logger().debug("{}: retiring {}, {}", __func__, *e, pin); | |
236 | pin_set.retire(pin); | |
237 | } | |
238 | ||
239 | // ...but add_pin from parent->leaf | |
240 | std::vector<CachedExtentRef> to_link; | |
20effc67 TL |
241 | to_link.reserve(t.get_fresh_block_stats().num); |
242 | t.for_each_fresh_block([&](auto &e) { | |
f67539c2 TL |
243 | if (e->is_valid() && (is_lba_node(*e) || e->is_logical())) |
244 | to_link.push_back(e); | |
20effc67 TL |
245 | }); |
246 | ||
f67539c2 TL |
247 | std::sort( |
248 | to_link.begin(), to_link.end(), | |
249 | [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); }); | |
250 | ||
251 | for (auto &e : to_link) { | |
252 | logger().debug("{}: linking {}", __func__, *e); | |
253 | pin_set.add_pin(get_pin(*e)); | |
254 | } | |
255 | ||
256 | for (auto &e: to_clear) { | |
257 | auto &pin = get_pin(*e); | |
258 | logger().debug("{}: checking {}, {}", __func__, *e, pin); | |
259 | pin_set.check_parent(pin); | |
260 | } | |
f67539c2 TL |
261 | } |
262 | ||
263 | BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent( | |
264 | Transaction &t, | |
265 | CachedExtentRef e) | |
266 | { | |
20effc67 TL |
267 | LOG_PREFIX(BtreeLBAManager::init_cached_extent); |
268 | DEBUGT("extent {}", t, *e); | |
269 | auto c = get_context(t); | |
270 | return with_btree( | |
271 | c, | |
272 | [c, e](auto &btree) { | |
273 | return btree.init_cached_extent( | |
274 | c, e | |
275 | ).si_then([](auto) {}); | |
f67539c2 TL |
276 | }); |
277 | } | |
278 | ||
279 | BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings( | |
280 | Transaction &t, | |
281 | laddr_t begin, | |
282 | laddr_t end, | |
283 | scan_mappings_func_t &&f) | |
284 | { | |
20effc67 TL |
285 | LOG_PREFIX(BtreeLBAManager::scan_mappings); |
286 | DEBUGT("begin: {}, end: {}", t, begin, end); | |
287 | ||
288 | auto c = get_context(t); | |
289 | return with_btree( | |
290 | c, | |
291 | [c, f=std::move(f), begin, end](auto &btree) mutable { | |
292 | return LBABtree::iterate_repeat( | |
293 | c, | |
294 | btree.upper_bound_right(c, begin), | |
295 | false, | |
296 | [f=std::move(f), begin, end](auto &pos) { | |
297 | if (pos.is_end() || pos.get_key() >= end) { | |
298 | return LBABtree::iterate_repeat_ret_inner( | |
299 | interruptible::ready_future_marker{}, | |
300 | seastar::stop_iteration::yes); | |
301 | } | |
302 | ceph_assert((pos.get_key() + pos.get_val().len) > begin); | |
303 | f(pos.get_key(), pos.get_val().paddr, pos.get_val().len); | |
304 | return LBABtree::iterate_repeat_ret_inner( | |
305 | interruptible::ready_future_marker{}, | |
306 | seastar::stop_iteration::no); | |
f67539c2 TL |
307 | }); |
308 | }); | |
309 | } | |
310 | ||
311 | BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space( | |
312 | Transaction &t, | |
313 | scan_mapped_space_func_t &&f) | |
314 | { | |
20effc67 TL |
315 | LOG_PREFIX(BtreeLBAManager::scan_mapped_space); |
316 | DEBUGT("", t); | |
317 | auto c = get_context(t); | |
f67539c2 TL |
318 | return seastar::do_with( |
319 | std::move(f), | |
20effc67 TL |
320 | [this, c](auto &visitor) { |
321 | return with_btree( | |
322 | c, | |
323 | [c, &visitor](auto &btree) { | |
324 | return LBABtree::iterate_repeat( | |
325 | c, | |
326 | btree.lower_bound(c, 0, &visitor), | |
327 | false, | |
328 | [&visitor](auto &pos) { | |
329 | if (pos.is_end()) { | |
330 | return LBABtree::iterate_repeat_ret_inner( | |
331 | interruptible::ready_future_marker{}, | |
332 | seastar::stop_iteration::yes); | |
333 | } | |
334 | visitor(pos.get_val().paddr, pos.get_val().len); | |
335 | return LBABtree::iterate_repeat_ret_inner( | |
336 | interruptible::ready_future_marker{}, | |
337 | seastar::stop_iteration::no); | |
338 | }, | |
339 | &visitor); | |
f67539c2 TL |
340 | }); |
341 | }); | |
342 | } | |
343 | ||
344 | BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent( | |
345 | Transaction &t, | |
346 | CachedExtentRef extent) | |
347 | { | |
20effc67 TL |
348 | LOG_PREFIX(BtreeLBAManager::rewrite_extent); |
349 | if (extent->has_been_invalidated()) { | |
350 | ERRORT("{} has been invalidated", t, *extent); | |
351 | } | |
352 | assert(!extent->has_been_invalidated()); | |
353 | assert(!extent->is_logical()); | |
354 | ||
355 | logger().debug( | |
356 | "{}: rewriting {}", | |
357 | __func__, | |
358 | *extent); | |
359 | ||
360 | if (is_lba_node(*extent)) { | |
361 | auto c = get_context(t); | |
362 | return with_btree( | |
363 | c, | |
364 | [c, extent](auto &btree) mutable { | |
365 | return btree.rewrite_lba_extent(c, extent); | |
366 | }); | |
f67539c2 | 367 | } else { |
20effc67 | 368 | return rewrite_extent_iertr::now(); |
f67539c2 TL |
369 | } |
370 | } | |
371 | ||
20effc67 TL |
372 | BtreeLBAManager::update_le_mapping_ret |
373 | BtreeLBAManager::update_mapping( | |
374 | Transaction& t, | |
375 | laddr_t laddr, | |
376 | paddr_t prev_addr, | |
377 | paddr_t addr) | |
378 | { | |
379 | return update_mapping( | |
380 | t, | |
381 | laddr, | |
382 | [prev_addr, addr]( | |
383 | const lba_map_val_t &in) { | |
384 | assert(!addr.is_null()); | |
385 | lba_map_val_t ret = in; | |
386 | ceph_assert(in.paddr == prev_addr); | |
387 | ret.paddr = addr; | |
388 | return ret; | |
389 | }).si_then( | |
390 | [](auto) {}, | |
391 | update_le_mapping_iertr::pass_further{}, | |
392 | /* ENOENT in particular should be impossible */ | |
393 | crimson::ct_error::assert_all{ | |
394 | "Invalid error in BtreeLBAManager::rewrite_extent after update_mapping" | |
395 | } | |
396 | ); | |
397 | } | |
398 | ||
f67539c2 TL |
399 | BtreeLBAManager::get_physical_extent_if_live_ret |
400 | BtreeLBAManager::get_physical_extent_if_live( | |
401 | Transaction &t, | |
402 | extent_types_t type, | |
403 | paddr_t addr, | |
404 | laddr_t laddr, | |
405 | segment_off_t len) | |
406 | { | |
407 | ceph_assert(is_lba_node(type)); | |
20effc67 TL |
408 | auto c = get_context(t); |
409 | return with_btree_ret<CachedExtentRef>( | |
410 | c, | |
411 | [c, type, addr, laddr, len](auto &btree) { | |
412 | if (type == extent_types_t::LADDR_INTERNAL) { | |
413 | return btree.get_internal_if_live(c, addr, laddr, len); | |
414 | } else { | |
415 | assert(type == extent_types_t::LADDR_LEAF); | |
416 | return btree.get_leaf_if_live(c, addr, laddr, len); | |
417 | } | |
f67539c2 | 418 | }); |
f67539c2 TL |
419 | } |
420 | ||
421 | BtreeLBAManager::BtreeLBAManager( | |
422 | SegmentManager &segment_manager, | |
423 | Cache &cache) | |
424 | : segment_manager(segment_manager), | |
20effc67 TL |
425 | cache(cache) |
426 | { | |
427 | register_metrics(); | |
428 | } | |
f67539c2 | 429 | |
20effc67 TL |
430 | LBABtree::lba_tree_inner_stats_t LBABtree::lba_tree_inner_stats; |
431 | void BtreeLBAManager::register_metrics() | |
f67539c2 | 432 | { |
20effc67 TL |
433 | namespace sm = seastar::metrics; |
434 | metrics.add_group( | |
435 | "LBA", | |
436 | { | |
437 | sm::make_counter( | |
438 | "alloc_extents", | |
439 | LBABtree::lba_tree_inner_stats.num_alloc_extents, | |
440 | sm::description("total number of lba alloc_extent operations") | |
441 | ), | |
442 | sm::make_counter( | |
443 | "alloc_extents_iter_nexts", | |
444 | LBABtree::lba_tree_inner_stats.num_alloc_extents_iter_nexts, | |
445 | sm::description("total number of iterator next operations during extent allocation") | |
446 | ), | |
447 | } | |
448 | ); | |
f67539c2 TL |
449 | } |
450 | ||
451 | BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount( | |
452 | Transaction &t, | |
453 | laddr_t addr, | |
454 | int delta) | |
455 | { | |
20effc67 TL |
456 | LOG_PREFIX(BtreeLBAManager::update_refcount); |
457 | DEBUGT("addr {}, delta {}", t, addr, delta); | |
f67539c2 TL |
458 | return update_mapping( |
459 | t, | |
460 | addr, | |
461 | [delta](const lba_map_val_t &in) { | |
462 | lba_map_val_t out = in; | |
463 | ceph_assert((int)out.refcount + delta >= 0); | |
464 | out.refcount += delta; | |
465 | return out; | |
20effc67 TL |
466 | }).si_then([](auto result) { |
467 | return ref_update_result_t{ | |
468 | result.refcount, | |
469 | result.paddr, | |
470 | result.len | |
471 | }; | |
f67539c2 TL |
472 | }); |
473 | } | |
474 | ||
475 | BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping( | |
476 | Transaction &t, | |
477 | laddr_t addr, | |
478 | update_func_t &&f) | |
479 | { | |
20effc67 TL |
480 | LOG_PREFIX(BtreeLBAManager::update_mapping); |
481 | DEBUGT("addr {}", t, addr); | |
482 | auto c = get_context(t); | |
483 | return with_btree_ret<lba_map_val_t>( | |
484 | c, | |
485 | [f=std::move(f), c, addr](auto &btree) mutable { | |
486 | return btree.lower_bound( | |
487 | c, addr | |
488 | ).si_then([&btree, f=std::move(f), c, addr](auto iter) | |
489 | -> update_mapping_ret { | |
490 | if (iter.is_end() || iter.get_key() != addr) { | |
491 | return crimson::ct_error::enoent::make(); | |
492 | } | |
493 | ||
494 | auto ret = f(iter.get_val()); | |
495 | if (ret.refcount == 0) { | |
496 | return btree.remove( | |
497 | c, | |
498 | iter | |
499 | ).si_then([ret] { | |
500 | return ret; | |
501 | }); | |
502 | } else { | |
503 | return btree.update( | |
504 | c, | |
505 | iter, | |
506 | ret | |
507 | ).si_then([ret](auto) { | |
508 | return ret; | |
509 | }); | |
510 | } | |
511 | }); | |
512 | }); | |
f67539c2 TL |
513 | } |
514 | ||
20effc67 | 515 | BtreeLBAManager::~BtreeLBAManager() |
f67539c2 | 516 | { |
20effc67 TL |
517 | pin_set.scan([](auto &i) { |
518 | logger().error("Found {} {} has_ref={}", i, i.get_extent(), i.has_ref()); | |
f67539c2 TL |
519 | }); |
520 | } | |
521 | ||
522 | } |