]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <sys/mman.h>
5#include <string.h>
6
7#include "crimson/common/log.h"
20effc67 8#include "crimson/os/seastore/logging.h"
f67539c2
TL
9
10#include "include/buffer.h"
11#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
20effc67
TL
12#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
13#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
f67539c2
TL
14
15
16namespace {
17 seastar::logger& logger() {
20effc67 18 return crimson::get_logger(ceph_subsys_seastore_lba);
f67539c2
TL
19 }
20}
21
20effc67
TL
22SET_SUBSYS(seastore_lba);
23
f67539c2
TL
24namespace crimson::os::seastore::lba_manager::btree {
25
26BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
27 Transaction &t)
28{
20effc67
TL
29 return cache.get_root(t).si_then([this, &t](auto croot) {
30 croot->get_root().lba_root = LBABtree::mkfs(get_context(t));
31 return mkfs_iertr::now();
32 }).handle_error_interruptible(
33 mkfs_iertr::pass_further{},
34 crimson::ct_error::assert_all{
35 "Invalid error in BtreeLBAManager::mkfs"
36 }
37 );
f67539c2
TL
38}
39
20effc67
TL
40BtreeLBAManager::get_mappings_ret
41BtreeLBAManager::get_mappings(
f67539c2
TL
42 Transaction &t,
43 laddr_t offset, extent_len_t length)
44{
20effc67
TL
45 LOG_PREFIX(BtreeLBAManager::get_mappings);
46 DEBUGT("offset: {}, length{}", t, offset, length);
47 auto c = get_context(t);
48 return with_btree_state<lba_pin_list_t>(
49 c,
50 [c, offset, length](auto &btree, auto &ret) {
51 return LBABtree::iterate_repeat(
52 c,
53 btree.upper_bound_right(c, offset),
54 false,
55 [&ret, offset, length](auto &pos) {
56 if (pos.is_end() || pos.get_key() >= (offset + length)) {
57 return LBABtree::iterate_repeat_ret_inner(
58 interruptible::ready_future_marker{},
59 seastar::stop_iteration::yes);
60 }
61 ceph_assert((pos.get_key() + pos.get_val().len) > offset);
62 ret.push_back(pos.get_pin());
63 return LBABtree::iterate_repeat_ret_inner(
64 interruptible::ready_future_marker{},
65 seastar::stop_iteration::no);
66 });
f67539c2
TL
67 });
68}
69
70
71BtreeLBAManager::get_mappings_ret
72BtreeLBAManager::get_mappings(
73 Transaction &t,
74 laddr_list_t &&list)
75{
20effc67
TL
76 LOG_PREFIX(BtreeLBAManager::get_mappings);
77 DEBUGT("{}", t, list);
f67539c2
TL
78 auto l = std::make_unique<laddr_list_t>(std::move(list));
79 auto retptr = std::make_unique<lba_pin_list_t>();
80 auto &ret = *retptr;
20effc67 81 return trans_intr::do_for_each(
f67539c2
TL
82 l->begin(),
83 l->end(),
84 [this, &t, &ret](const auto &p) {
20effc67 85 return get_mappings(t, p.first, p.second).si_then(
f67539c2
TL
86 [&ret](auto res) {
87 ret.splice(ret.end(), res, res.begin(), res.end());
20effc67 88 return get_mappings_iertr::now();
f67539c2 89 });
20effc67 90 }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
f67539c2
TL
91 return std::move(*retptr);
92 });
93}
94
20effc67
TL
95BtreeLBAManager::get_mapping_ret
96BtreeLBAManager::get_mapping(
f67539c2 97 Transaction &t,
20effc67 98 laddr_t offset)
f67539c2 99{
20effc67
TL
100 LOG_PREFIX(BtreeLBAManager::get_mapping);
101 DEBUGT("{}", t, offset);
102 auto c = get_context(t);
103 return with_btree_ret<LBAPinRef>(
104 c,
105 [FNAME, c, offset](auto &btree) {
106 return btree.lower_bound(
107 c, offset
108 ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret {
109 if (iter.is_end() || iter.get_key() != offset) {
110 return crimson::ct_error::enoent::make();
111 } else {
112 auto e = iter.get_pin();
113 DEBUGT("got mapping {}", c.trans, *e);
114 return get_mapping_ret(
115 interruptible::ready_future_marker{},
116 std::move(e));
117 }
f67539c2
TL
118 });
119 });
120}
121
20effc67
TL
122BtreeLBAManager::alloc_extent_ret
123BtreeLBAManager::alloc_extent(
f67539c2 124 Transaction &t,
20effc67
TL
125 laddr_t hint,
126 extent_len_t len,
127 paddr_t addr)
f67539c2 128{
20effc67
TL
129 struct state_t {
130 laddr_t last_end;
131
132 std::optional<LBABtree::iterator> insert_iter;
133 std::optional<LBABtree::iterator> ret;
134
135 state_t(laddr_t hint) : last_end(hint) {}
136 };
137
138 LOG_PREFIX(BtreeLBAManager::alloc_extent);
139 DEBUGT("hint: {}, length: {}", t, hint, len);
140 auto c = get_context(t);
141 ++LBABtree::lba_tree_inner_stats.num_alloc_extents;
142 return with_btree_state<state_t>(
143 c,
144 hint,
145 [FNAME, c, hint, len, addr, &t](auto &btree, auto &state) {
146 return LBABtree::iterate_repeat(
147 c,
148 btree.upper_bound_right(c, hint),
149 true,
150 [&state, len, &t, hint](auto &pos) {
151 LOG_PREFIX(BtreeLBAManager::alloc_extent);
152 if (!pos.is_end()) {
153 DEBUGT("iterate_repeat: pos: {}~{}, state: {}~{}, hint: {}",
154 t,
155 pos.get_key(),
156 pos.get_val().len,
157 state.last_end,
158 len,
159 hint);
160 }
161 if (pos.is_end() || pos.get_key() >= (state.last_end + len)) {
162 state.insert_iter = pos;
163 return LBABtree::iterate_repeat_ret_inner(
164 interruptible::ready_future_marker{},
165 seastar::stop_iteration::yes);
166 } else {
167 state.last_end = pos.get_key() + pos.get_val().len;
168 return LBABtree::iterate_repeat_ret_inner(
169 interruptible::ready_future_marker{},
170 seastar::stop_iteration::no);
171 }
172 }).si_then([FNAME, c, addr, len, &btree, &state] {
173 DEBUGT("about to insert at addr {}~{}", c.trans, state.last_end, len);
174 return btree.insert(
175 c,
176 *state.insert_iter,
177 state.last_end,
178 lba_map_val_t{len, addr, 1, 0}
179 ).si_then([&state](auto &&p) {
180 auto [iter, inserted] = std::move(p);
181 ceph_assert(inserted);
182 state.ret = iter;
183 });
184 });
185 }).si_then([](auto &&state) {
186 return state.ret->get_pin();
f67539c2
TL
187 });
188}
189
f67539c2
TL
190static bool is_lba_node(const CachedExtent &e)
191{
192 return is_lba_node(e.get_type());
193}
194
195btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
196{
197 if (is_lba_node(e)) {
198 return e.cast<LBANode>()->pin;
199 } else if (e.is_logical()) {
200 return static_cast<BtreeLBAPin &>(
201 e.cast<LogicalCachedExtent>()->get_pin()).pin;
202 } else {
203 ceph_abort_msg("impossible");
204 }
205}
206
207static depth_t get_depth(const CachedExtent &e)
208{
209 if (is_lba_node(e)) {
210 return e.cast<LBANode>()->get_node_meta().depth;
211 } else if (e.is_logical()) {
212 return 0;
213 } else {
214 ceph_assert(0 == "currently impossible");
215 return 0;
216 }
217}
218
20effc67 219void BtreeLBAManager::complete_transaction(
f67539c2
TL
220 Transaction &t)
221{
222 std::vector<CachedExtentRef> to_clear;
223 to_clear.reserve(t.get_retired_set().size());
224 for (auto &e: t.get_retired_set()) {
225 if (e->is_logical() || is_lba_node(*e))
226 to_clear.push_back(e);
227 }
228 // need to call check_parent from leaf->parent
229 std::sort(
230 to_clear.begin(), to_clear.end(),
231 [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); });
232
233 for (auto &e: to_clear) {
234 auto &pin = get_pin(*e);
235 logger().debug("{}: retiring {}, {}", __func__, *e, pin);
236 pin_set.retire(pin);
237 }
238
239 // ...but add_pin from parent->leaf
240 std::vector<CachedExtentRef> to_link;
20effc67
TL
241 to_link.reserve(t.get_fresh_block_stats().num);
242 t.for_each_fresh_block([&](auto &e) {
f67539c2
TL
243 if (e->is_valid() && (is_lba_node(*e) || e->is_logical()))
244 to_link.push_back(e);
20effc67
TL
245 });
246
f67539c2
TL
247 std::sort(
248 to_link.begin(), to_link.end(),
249 [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); });
250
251 for (auto &e : to_link) {
252 logger().debug("{}: linking {}", __func__, *e);
253 pin_set.add_pin(get_pin(*e));
254 }
255
256 for (auto &e: to_clear) {
257 auto &pin = get_pin(*e);
258 logger().debug("{}: checking {}, {}", __func__, *e, pin);
259 pin_set.check_parent(pin);
260 }
f67539c2
TL
261}
262
263BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
264 Transaction &t,
265 CachedExtentRef e)
266{
20effc67
TL
267 LOG_PREFIX(BtreeLBAManager::init_cached_extent);
268 DEBUGT("extent {}", t, *e);
269 auto c = get_context(t);
270 return with_btree(
271 c,
272 [c, e](auto &btree) {
273 return btree.init_cached_extent(
274 c, e
275 ).si_then([](auto) {});
f67539c2
TL
276 });
277}
278
279BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
280 Transaction &t,
281 laddr_t begin,
282 laddr_t end,
283 scan_mappings_func_t &&f)
284{
20effc67
TL
285 LOG_PREFIX(BtreeLBAManager::scan_mappings);
286 DEBUGT("begin: {}, end: {}", t, begin, end);
287
288 auto c = get_context(t);
289 return with_btree(
290 c,
291 [c, f=std::move(f), begin, end](auto &btree) mutable {
292 return LBABtree::iterate_repeat(
293 c,
294 btree.upper_bound_right(c, begin),
295 false,
296 [f=std::move(f), begin, end](auto &pos) {
297 if (pos.is_end() || pos.get_key() >= end) {
298 return LBABtree::iterate_repeat_ret_inner(
299 interruptible::ready_future_marker{},
300 seastar::stop_iteration::yes);
301 }
302 ceph_assert((pos.get_key() + pos.get_val().len) > begin);
303 f(pos.get_key(), pos.get_val().paddr, pos.get_val().len);
304 return LBABtree::iterate_repeat_ret_inner(
305 interruptible::ready_future_marker{},
306 seastar::stop_iteration::no);
f67539c2
TL
307 });
308 });
309}
310
311BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space(
312 Transaction &t,
313 scan_mapped_space_func_t &&f)
314{
20effc67
TL
315 LOG_PREFIX(BtreeLBAManager::scan_mapped_space);
316 DEBUGT("", t);
317 auto c = get_context(t);
f67539c2
TL
318 return seastar::do_with(
319 std::move(f),
20effc67
TL
320 [this, c](auto &visitor) {
321 return with_btree(
322 c,
323 [c, &visitor](auto &btree) {
324 return LBABtree::iterate_repeat(
325 c,
326 btree.lower_bound(c, 0, &visitor),
327 false,
328 [&visitor](auto &pos) {
329 if (pos.is_end()) {
330 return LBABtree::iterate_repeat_ret_inner(
331 interruptible::ready_future_marker{},
332 seastar::stop_iteration::yes);
333 }
334 visitor(pos.get_val().paddr, pos.get_val().len);
335 return LBABtree::iterate_repeat_ret_inner(
336 interruptible::ready_future_marker{},
337 seastar::stop_iteration::no);
338 },
339 &visitor);
f67539c2
TL
340 });
341 });
342}
343
344BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
345 Transaction &t,
346 CachedExtentRef extent)
347{
20effc67
TL
348 LOG_PREFIX(BtreeLBAManager::rewrite_extent);
349 if (extent->has_been_invalidated()) {
350 ERRORT("{} has been invalidated", t, *extent);
351 }
352 assert(!extent->has_been_invalidated());
353 assert(!extent->is_logical());
354
355 logger().debug(
356 "{}: rewriting {}",
357 __func__,
358 *extent);
359
360 if (is_lba_node(*extent)) {
361 auto c = get_context(t);
362 return with_btree(
363 c,
364 [c, extent](auto &btree) mutable {
365 return btree.rewrite_lba_extent(c, extent);
366 });
f67539c2 367 } else {
20effc67 368 return rewrite_extent_iertr::now();
f67539c2
TL
369 }
370}
371
20effc67
TL
372BtreeLBAManager::update_le_mapping_ret
373BtreeLBAManager::update_mapping(
374 Transaction& t,
375 laddr_t laddr,
376 paddr_t prev_addr,
377 paddr_t addr)
378{
379 return update_mapping(
380 t,
381 laddr,
382 [prev_addr, addr](
383 const lba_map_val_t &in) {
384 assert(!addr.is_null());
385 lba_map_val_t ret = in;
386 ceph_assert(in.paddr == prev_addr);
387 ret.paddr = addr;
388 return ret;
389 }).si_then(
390 [](auto) {},
391 update_le_mapping_iertr::pass_further{},
392 /* ENOENT in particular should be impossible */
393 crimson::ct_error::assert_all{
394 "Invalid error in BtreeLBAManager::rewrite_extent after update_mapping"
395 }
396 );
397}
398
f67539c2
TL
399BtreeLBAManager::get_physical_extent_if_live_ret
400BtreeLBAManager::get_physical_extent_if_live(
401 Transaction &t,
402 extent_types_t type,
403 paddr_t addr,
404 laddr_t laddr,
405 segment_off_t len)
406{
407 ceph_assert(is_lba_node(type));
20effc67
TL
408 auto c = get_context(t);
409 return with_btree_ret<CachedExtentRef>(
410 c,
411 [c, type, addr, laddr, len](auto &btree) {
412 if (type == extent_types_t::LADDR_INTERNAL) {
413 return btree.get_internal_if_live(c, addr, laddr, len);
414 } else {
415 assert(type == extent_types_t::LADDR_LEAF);
416 return btree.get_leaf_if_live(c, addr, laddr, len);
417 }
f67539c2 418 });
f67539c2
TL
419}
420
421BtreeLBAManager::BtreeLBAManager(
422 SegmentManager &segment_manager,
423 Cache &cache)
424 : segment_manager(segment_manager),
20effc67
TL
425 cache(cache)
426{
427 register_metrics();
428}
f67539c2 429
20effc67
TL
430LBABtree::lba_tree_inner_stats_t LBABtree::lba_tree_inner_stats;
431void BtreeLBAManager::register_metrics()
f67539c2 432{
20effc67
TL
433 namespace sm = seastar::metrics;
434 metrics.add_group(
435 "LBA",
436 {
437 sm::make_counter(
438 "alloc_extents",
439 LBABtree::lba_tree_inner_stats.num_alloc_extents,
440 sm::description("total number of lba alloc_extent operations")
441 ),
442 sm::make_counter(
443 "alloc_extents_iter_nexts",
444 LBABtree::lba_tree_inner_stats.num_alloc_extents_iter_nexts,
445 sm::description("total number of iterator next operations during extent allocation")
446 ),
447 }
448 );
f67539c2
TL
449}
450
451BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount(
452 Transaction &t,
453 laddr_t addr,
454 int delta)
455{
20effc67
TL
456 LOG_PREFIX(BtreeLBAManager::update_refcount);
457 DEBUGT("addr {}, delta {}", t, addr, delta);
f67539c2
TL
458 return update_mapping(
459 t,
460 addr,
461 [delta](const lba_map_val_t &in) {
462 lba_map_val_t out = in;
463 ceph_assert((int)out.refcount + delta >= 0);
464 out.refcount += delta;
465 return out;
20effc67
TL
466 }).si_then([](auto result) {
467 return ref_update_result_t{
468 result.refcount,
469 result.paddr,
470 result.len
471 };
f67539c2
TL
472 });
473}
474
475BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
476 Transaction &t,
477 laddr_t addr,
478 update_func_t &&f)
479{
20effc67
TL
480 LOG_PREFIX(BtreeLBAManager::update_mapping);
481 DEBUGT("addr {}", t, addr);
482 auto c = get_context(t);
483 return with_btree_ret<lba_map_val_t>(
484 c,
485 [f=std::move(f), c, addr](auto &btree) mutable {
486 return btree.lower_bound(
487 c, addr
488 ).si_then([&btree, f=std::move(f), c, addr](auto iter)
489 -> update_mapping_ret {
490 if (iter.is_end() || iter.get_key() != addr) {
491 return crimson::ct_error::enoent::make();
492 }
493
494 auto ret = f(iter.get_val());
495 if (ret.refcount == 0) {
496 return btree.remove(
497 c,
498 iter
499 ).si_then([ret] {
500 return ret;
501 });
502 } else {
503 return btree.update(
504 c,
505 iter,
506 ret
507 ).si_then([ret](auto) {
508 return ret;
509 });
510 }
511 });
512 });
f67539c2
TL
513}
514
20effc67 515BtreeLBAManager::~BtreeLBAManager()
f67539c2 516{
20effc67
TL
517 pin_set.scan([](auto &i) {
518 logger().error("Found {} {} has_ref={}", i, i.get_extent(), i.has_ref());
f67539c2
TL
519 });
520}
521
522}