]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <sys/mman.h> | |
5 | #include <string.h> | |
6 | ||
1e59de90 | 7 | #include <seastar/core/metrics.hh> |
f67539c2 TL |
8 | |
9 | #include "include/buffer.h" | |
10 | #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" | |
20effc67 | 11 | #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" |
1e59de90 TL |
12 | #include "crimson/os/seastore/logging.h" |
13 | ||
14 | SET_SUBSYS(seastore_lba); | |
15 | /* | |
16 | * levels: | |
17 | * - INFO: mkfs | |
18 | * - DEBUG: modification operations | |
19 | * - TRACE: read operations, DEBUG details | |
20 | */ | |
21 | ||
22 | namespace crimson::os::seastore { | |
f67539c2 | 23 | |
1e59de90 TL |
24 | template <typename T> |
25 | Transaction::tree_stats_t& get_tree_stats(Transaction &t) | |
26 | { | |
27 | return t.get_lba_tree_stats(); | |
28 | } | |
29 | ||
30 | template Transaction::tree_stats_t& | |
31 | get_tree_stats< | |
32 | crimson::os::seastore::lba_manager::btree::LBABtree>( | |
33 | Transaction &t); | |
34 | ||
35 | template <typename T> | |
36 | phy_tree_root_t& get_phy_tree_root(root_t &r) | |
37 | { | |
38 | return r.lba_root; | |
39 | } | |
f67539c2 | 40 | |
1e59de90 TL |
41 | template phy_tree_root_t& |
42 | get_phy_tree_root< | |
43 | crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r); | |
44 | ||
45 | template <> | |
46 | const get_phy_tree_root_node_ret get_phy_tree_root_node< | |
47 | crimson::os::seastore::lba_manager::btree::LBABtree>( | |
48 | const RootBlockRef &root_block, op_context_t<laddr_t> c) | |
49 | { | |
50 | auto lba_root = root_block->lba_root_node; | |
51 | if (lba_root) { | |
52 | ceph_assert(lba_root->is_initial_pending() | |
53 | == root_block->is_pending()); | |
54 | return {true, | |
55 | trans_intr::make_interruptible( | |
56 | c.cache.get_extent_viewable_by_trans(c.trans, lba_root))}; | |
57 | } else if (root_block->is_pending()) { | |
58 | auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance()); | |
59 | lba_root = prior.lba_root_node; | |
60 | if (lba_root) { | |
61 | return {true, | |
62 | trans_intr::make_interruptible( | |
63 | c.cache.get_extent_viewable_by_trans(c.trans, lba_root))}; | |
64 | } else { | |
65 | return {false, | |
66 | trans_intr::make_interruptible( | |
aee94f69 TL |
67 | Cache::get_extent_ertr::make_ready_future< |
68 | CachedExtentRef>())}; | |
1e59de90 TL |
69 | } |
70 | } else { | |
71 | return {false, | |
72 | trans_intr::make_interruptible( | |
aee94f69 TL |
73 | Cache::get_extent_ertr::make_ready_future< |
74 | CachedExtentRef>())}; | |
f67539c2 TL |
75 | } |
76 | } | |
77 | ||
1e59de90 TL |
78 | template <typename ROOT> |
79 | void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* lba_root) { | |
80 | root_block->lba_root_node = lba_root; | |
81 | ceph_assert(lba_root != nullptr); | |
82 | lba_root->root_block = root_block; | |
83 | } | |
84 | ||
85 | template void link_phy_tree_root_node( | |
86 | RootBlockRef &root_block, lba_manager::btree::LBAInternalNode* lba_root); | |
87 | template void link_phy_tree_root_node( | |
88 | RootBlockRef &root_block, lba_manager::btree::LBALeafNode* lba_root); | |
89 | template void link_phy_tree_root_node( | |
90 | RootBlockRef &root_block, lba_manager::btree::LBANode* lba_root); | |
91 | ||
92 | template <> | |
93 | void unlink_phy_tree_root_node<laddr_t>(RootBlockRef &root_block) { | |
94 | root_block->lba_root_node = nullptr; | |
95 | } | |
96 | ||
97 | } | |
20effc67 | 98 | |
f67539c2 TL |
99 | namespace crimson::os::seastore::lba_manager::btree { |
100 | ||
1e59de90 TL |
101 | BtreeLBAManager::mkfs_ret |
102 | BtreeLBAManager::mkfs( | |
f67539c2 TL |
103 | Transaction &t) |
104 | { | |
1e59de90 TL |
105 | LOG_PREFIX(BtreeLBAManager::mkfs); |
106 | INFOT("start", t); | |
20effc67 | 107 | return cache.get_root(t).si_then([this, &t](auto croot) { |
1e59de90 TL |
108 | assert(croot->is_mutation_pending()); |
109 | croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t)); | |
20effc67 TL |
110 | return mkfs_iertr::now(); |
111 | }).handle_error_interruptible( | |
112 | mkfs_iertr::pass_further{}, | |
113 | crimson::ct_error::assert_all{ | |
114 | "Invalid error in BtreeLBAManager::mkfs" | |
115 | } | |
116 | ); | |
f67539c2 TL |
117 | } |
118 | ||
20effc67 TL |
119 | BtreeLBAManager::get_mappings_ret |
120 | BtreeLBAManager::get_mappings( | |
f67539c2 TL |
121 | Transaction &t, |
122 | laddr_t offset, extent_len_t length) | |
123 | { | |
20effc67 | 124 | LOG_PREFIX(BtreeLBAManager::get_mappings); |
1e59de90 | 125 | TRACET("{}~{}", t, offset, length); |
20effc67 | 126 | auto c = get_context(t); |
1e59de90 TL |
127 | return with_btree_state<LBABtree, lba_pin_list_t>( |
128 | cache, | |
20effc67 | 129 | c, |
aee94f69 TL |
130 | [c, offset, length, FNAME, this](auto &btree, auto &ret) { |
131 | return seastar::do_with( | |
132 | std::list<BtreeLBAMappingRef>(), | |
133 | [offset, length, c, FNAME, this, &ret, &btree](auto &pin_list) { | |
134 | return LBABtree::iterate_repeat( | |
135 | c, | |
136 | btree.upper_bound_right(c, offset), | |
137 | [&pin_list, offset, length, c, FNAME](auto &pos) { | |
138 | if (pos.is_end() || pos.get_key() >= (offset + length)) { | |
139 | TRACET("{}~{} done with {} results", | |
140 | c.trans, offset, length, pin_list.size()); | |
141 | return LBABtree::iterate_repeat_ret_inner( | |
142 | interruptible::ready_future_marker{}, | |
143 | seastar::stop_iteration::yes); | |
144 | } | |
145 | TRACET("{}~{} got {}, {}, repeat ...", | |
146 | c.trans, offset, length, pos.get_key(), pos.get_val()); | |
147 | ceph_assert((pos.get_key() + pos.get_val().len) > offset); | |
148 | pin_list.push_back(pos.get_pin(c)); | |
149 | return LBABtree::iterate_repeat_ret_inner( | |
20effc67 | 150 | interruptible::ready_future_marker{}, |
aee94f69 TL |
151 | seastar::stop_iteration::no); |
152 | }).si_then([this, &ret, c, &pin_list] { | |
153 | return _get_original_mappings(c, pin_list | |
154 | ).si_then([&ret](auto _ret) { | |
155 | ret = std::move(_ret); | |
156 | }); | |
157 | }); | |
20effc67 | 158 | }); |
f67539c2 TL |
159 | }); |
160 | } | |
161 | ||
aee94f69 TL |
162 | BtreeLBAManager::_get_original_mappings_ret |
163 | BtreeLBAManager::_get_original_mappings( | |
164 | op_context_t<laddr_t> c, | |
165 | std::list<BtreeLBAMappingRef> &pin_list) | |
166 | { | |
167 | return seastar::do_with( | |
168 | lba_pin_list_t(), | |
169 | [this, c, &pin_list](auto &ret) { | |
170 | return trans_intr::do_for_each( | |
171 | pin_list, | |
172 | [this, c, &ret](auto &pin) { | |
173 | LOG_PREFIX(BtreeLBAManager::get_mappings); | |
174 | if (pin->get_raw_val().is_paddr()) { | |
175 | ret.emplace_back(std::move(pin)); | |
176 | return get_mappings_iertr::now(); | |
177 | } | |
178 | TRACET( | |
179 | "getting original mapping for indirect mapping {}~{}", | |
180 | c.trans, pin->get_key(), pin->get_length()); | |
181 | return this->get_mappings( | |
182 | c.trans, pin->get_raw_val().get_laddr(), pin->get_length() | |
183 | ).si_then([&pin, &ret, c](auto new_pin_list) { | |
184 | LOG_PREFIX(BtreeLBAManager::get_mappings); | |
185 | assert(new_pin_list.size() == 1); | |
186 | auto &new_pin = new_pin_list.front(); | |
187 | auto intermediate_key = pin->get_raw_val().get_laddr(); | |
188 | assert(!new_pin->is_indirect()); | |
189 | assert(new_pin->get_key() <= intermediate_key); | |
190 | assert(new_pin->get_key() + new_pin->get_length() >= | |
191 | intermediate_key + pin->get_length()); | |
192 | ||
193 | TRACET("Got mapping {}~{} for indirect mapping {}~{}, " | |
194 | "intermediate_key {}", | |
195 | c.trans, | |
196 | new_pin->get_key(), new_pin->get_length(), | |
197 | pin->get_key(), pin->get_length(), | |
198 | pin->get_raw_val().get_laddr()); | |
199 | auto &btree_new_pin = static_cast<BtreeLBAMapping&>(*new_pin); | |
200 | btree_new_pin.set_key_for_indirect( | |
201 | pin->get_key(), | |
202 | pin->get_length(), | |
203 | pin->get_raw_val().get_laddr()); | |
204 | ret.emplace_back(std::move(new_pin)); | |
205 | return seastar::now(); | |
206 | }).handle_error_interruptible( | |
207 | crimson::ct_error::input_output_error::pass_further{}, | |
208 | crimson::ct_error::assert_all("unexpected enoent") | |
209 | ); | |
210 | } | |
211 | ).si_then([&ret] { | |
212 | return std::move(ret); | |
213 | }); | |
214 | }); | |
215 | } | |
216 | ||
217 | ||
f67539c2 TL |
218 | BtreeLBAManager::get_mappings_ret |
219 | BtreeLBAManager::get_mappings( | |
220 | Transaction &t, | |
221 | laddr_list_t &&list) | |
222 | { | |
20effc67 | 223 | LOG_PREFIX(BtreeLBAManager::get_mappings); |
1e59de90 | 224 | TRACET("{}", t, list); |
f67539c2 TL |
225 | auto l = std::make_unique<laddr_list_t>(std::move(list)); |
226 | auto retptr = std::make_unique<lba_pin_list_t>(); | |
227 | auto &ret = *retptr; | |
20effc67 | 228 | return trans_intr::do_for_each( |
f67539c2 TL |
229 | l->begin(), |
230 | l->end(), | |
231 | [this, &t, &ret](const auto &p) { | |
1e59de90 | 232 | return this->get_mappings(t, p.first, p.second).si_then( |
f67539c2 TL |
233 | [&ret](auto res) { |
234 | ret.splice(ret.end(), res, res.begin(), res.end()); | |
20effc67 | 235 | return get_mappings_iertr::now(); |
f67539c2 | 236 | }); |
20effc67 | 237 | }).si_then([l=std::move(l), retptr=std::move(retptr)]() mutable { |
f67539c2 TL |
238 | return std::move(*retptr); |
239 | }); | |
240 | } | |
241 | ||
20effc67 TL |
242 | BtreeLBAManager::get_mapping_ret |
243 | BtreeLBAManager::get_mapping( | |
f67539c2 | 244 | Transaction &t, |
20effc67 | 245 | laddr_t offset) |
f67539c2 | 246 | { |
20effc67 | 247 | LOG_PREFIX(BtreeLBAManager::get_mapping); |
1e59de90 | 248 | TRACET("{}", t, offset); |
aee94f69 TL |
249 | return _get_mapping(t, offset |
250 | ).si_then([](auto pin) { | |
251 | return get_mapping_iertr::make_ready_future<LBAMappingRef>(std::move(pin)); | |
252 | }); | |
253 | } | |
254 | ||
255 | BtreeLBAManager::_get_mapping_ret | |
256 | BtreeLBAManager::_get_mapping( | |
257 | Transaction &t, | |
258 | laddr_t offset) | |
259 | { | |
260 | LOG_PREFIX(BtreeLBAManager::_get_mapping); | |
261 | TRACET("{}", t, offset); | |
20effc67 | 262 | auto c = get_context(t); |
aee94f69 | 263 | return with_btree_ret<LBABtree, BtreeLBAMappingRef>( |
1e59de90 | 264 | cache, |
20effc67 | 265 | c, |
aee94f69 | 266 | [FNAME, c, offset, this](auto &btree) { |
20effc67 TL |
267 | return btree.lower_bound( |
268 | c, offset | |
aee94f69 | 269 | ).si_then([FNAME, offset, c](auto iter) -> _get_mapping_ret { |
20effc67 | 270 | if (iter.is_end() || iter.get_key() != offset) { |
1e59de90 | 271 | ERRORT("laddr={} doesn't exist", c.trans, offset); |
20effc67 TL |
272 | return crimson::ct_error::enoent::make(); |
273 | } else { | |
1e59de90 TL |
274 | TRACET("{} got {}, {}", |
275 | c.trans, offset, iter.get_key(), iter.get_val()); | |
276 | auto e = iter.get_pin(c); | |
aee94f69 | 277 | return _get_mapping_ret( |
20effc67 TL |
278 | interruptible::ready_future_marker{}, |
279 | std::move(e)); | |
280 | } | |
aee94f69 TL |
281 | }).si_then([this, c](auto pin) -> _get_mapping_ret { |
282 | if (pin->get_raw_val().is_laddr()) { | |
283 | return seastar::do_with( | |
284 | std::move(pin), | |
285 | [this, c](auto &pin) { | |
286 | return _get_mapping( | |
287 | c.trans, pin->get_raw_val().get_laddr() | |
288 | ).si_then([&pin](auto new_pin) { | |
289 | ceph_assert(pin->get_length() == new_pin->get_length()); | |
290 | new_pin->set_key_for_indirect( | |
291 | pin->get_key(), | |
292 | pin->get_length()); | |
293 | return new_pin; | |
294 | }); | |
295 | }); | |
296 | } else { | |
297 | return get_mapping_iertr::make_ready_future<BtreeLBAMappingRef>(std::move(pin)); | |
298 | } | |
f67539c2 TL |
299 | }); |
300 | }); | |
301 | } | |
302 | ||
20effc67 | 303 | BtreeLBAManager::alloc_extent_ret |
aee94f69 | 304 | BtreeLBAManager::_alloc_extent( |
f67539c2 | 305 | Transaction &t, |
20effc67 TL |
306 | laddr_t hint, |
307 | extent_len_t len, | |
aee94f69 TL |
308 | pladdr_t addr, |
309 | paddr_t actual_addr, | |
310 | laddr_t intermediate_base, | |
1e59de90 | 311 | LogicalCachedExtent* nextent) |
f67539c2 | 312 | { |
20effc67 TL |
313 | struct state_t { |
314 | laddr_t last_end; | |
315 | ||
1e59de90 TL |
316 | std::optional<typename LBABtree::iterator> insert_iter; |
317 | std::optional<typename LBABtree::iterator> ret; | |
20effc67 TL |
318 | |
319 | state_t(laddr_t hint) : last_end(hint) {} | |
320 | }; | |
321 | ||
aee94f69 | 322 | LOG_PREFIX(BtreeLBAManager::_alloc_extent); |
1e59de90 | 323 | TRACET("{}~{}, hint={}", t, addr, len, hint); |
20effc67 | 324 | auto c = get_context(t); |
1e59de90 TL |
325 | ++stats.num_alloc_extents; |
326 | auto lookup_attempts = stats.num_alloc_extents_iter_nexts; | |
327 | return crimson::os::seastore::with_btree_state<LBABtree, state_t>( | |
328 | cache, | |
20effc67 TL |
329 | c, |
330 | hint, | |
1e59de90 TL |
331 | [this, FNAME, c, hint, len, addr, lookup_attempts, |
332 | &t, nextent](auto &btree, auto &state) { | |
20effc67 TL |
333 | return LBABtree::iterate_repeat( |
334 | c, | |
335 | btree.upper_bound_right(c, hint), | |
1e59de90 TL |
336 | [this, &state, len, addr, &t, hint, FNAME, lookup_attempts](auto &pos) { |
337 | ++stats.num_alloc_extents_iter_nexts; | |
338 | if (pos.is_end()) { | |
339 | DEBUGT("{}~{}, hint={}, state: end, done with {} attempts, insert at {}", | |
340 | t, addr, len, hint, | |
341 | stats.num_alloc_extents_iter_nexts - lookup_attempts, | |
342 | state.last_end); | |
343 | state.insert_iter = pos; | |
344 | return typename LBABtree::iterate_repeat_ret_inner( | |
345 | interruptible::ready_future_marker{}, | |
346 | seastar::stop_iteration::yes); | |
347 | } else if (pos.get_key() >= (state.last_end + len)) { | |
348 | DEBUGT("{}~{}, hint={}, state: {}~{}, done with {} attempts, insert at {} -- {}", | |
349 | t, addr, len, hint, | |
350 | pos.get_key(), pos.get_val().len, | |
351 | stats.num_alloc_extents_iter_nexts - lookup_attempts, | |
20effc67 | 352 | state.last_end, |
1e59de90 | 353 | pos.get_val()); |
20effc67 | 354 | state.insert_iter = pos; |
1e59de90 | 355 | return typename LBABtree::iterate_repeat_ret_inner( |
20effc67 TL |
356 | interruptible::ready_future_marker{}, |
357 | seastar::stop_iteration::yes); | |
358 | } else { | |
359 | state.last_end = pos.get_key() + pos.get_val().len; | |
1e59de90 TL |
360 | TRACET("{}~{}, hint={}, state: {}~{}, repeat ... -- {}", |
361 | t, addr, len, hint, | |
362 | pos.get_key(), pos.get_val().len, | |
363 | pos.get_val()); | |
364 | return typename LBABtree::iterate_repeat_ret_inner( | |
20effc67 TL |
365 | interruptible::ready_future_marker{}, |
366 | seastar::stop_iteration::no); | |
367 | } | |
1e59de90 | 368 | }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] { |
20effc67 TL |
369 | return btree.insert( |
370 | c, | |
371 | *state.insert_iter, | |
372 | state.last_end, | |
aee94f69 | 373 | lba_map_val_t{len, pladdr_t(addr), 1, 0}, |
1e59de90 TL |
374 | nextent |
375 | ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) { | |
20effc67 | 376 | auto [iter, inserted] = std::move(p); |
1e59de90 TL |
377 | TRACET("{}~{}, hint={}, inserted at {}", |
378 | c.trans, addr, len, hint, state.last_end); | |
379 | if (nextent) { | |
aee94f69 | 380 | ceph_assert(addr.is_paddr()); |
1e59de90 TL |
381 | nextent->set_laddr(iter.get_key()); |
382 | } | |
20effc67 TL |
383 | ceph_assert(inserted); |
384 | state.ret = iter; | |
385 | }); | |
386 | }); | |
aee94f69 TL |
387 | }).si_then([c, actual_addr, addr, intermediate_base](auto &&state) { |
388 | auto ret_pin = state.ret->get_pin(c); | |
389 | if (actual_addr != P_ADDR_NULL) { | |
390 | ceph_assert(addr.is_laddr()); | |
391 | ret_pin->set_paddr(actual_addr); | |
392 | ret_pin->set_intermediate_base(intermediate_base); | |
393 | } else { | |
394 | ceph_assert(addr.is_paddr()); | |
395 | } | |
396 | return alloc_extent_iertr::make_ready_future<LBAMappingRef>( | |
397 | std::move(ret_pin)); | |
f67539c2 TL |
398 | }); |
399 | } | |
400 | ||
f67539c2 TL |
401 | static bool is_lba_node(const CachedExtent &e) |
402 | { | |
403 | return is_lba_node(e.get_type()); | |
404 | } | |
405 | ||
1e59de90 TL |
406 | BtreeLBAManager::base_iertr::template future<> |
407 | _init_cached_extent( | |
408 | op_context_t<laddr_t> c, | |
409 | const CachedExtentRef &e, | |
410 | LBABtree &btree, | |
411 | bool &ret) | |
f67539c2 | 412 | { |
1e59de90 TL |
413 | if (e->is_logical()) { |
414 | auto logn = e->cast<LogicalCachedExtent>(); | |
415 | return btree.lower_bound( | |
416 | c, | |
417 | logn->get_laddr() | |
418 | ).si_then([e, c, logn, &ret](auto iter) { | |
419 | LOG_PREFIX(BtreeLBAManager::init_cached_extent); | |
420 | if (!iter.is_end() && | |
421 | iter.get_key() == logn->get_laddr() && | |
aee94f69 TL |
422 | iter.get_val().pladdr.is_paddr() && |
423 | iter.get_val().pladdr.get_paddr() == logn->get_paddr()) { | |
1e59de90 TL |
424 | assert(!iter.get_leaf_node()->is_pending()); |
425 | iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos()); | |
426 | logn->set_laddr(iter.get_pin(c)->get_key()); | |
427 | ceph_assert(iter.get_val().len == e->get_length()); | |
428 | DEBUGT("logical extent {} live", c.trans, *logn); | |
429 | ret = true; | |
430 | } else { | |
431 | DEBUGT("logical extent {} not live", c.trans, *logn); | |
432 | ret = false; | |
433 | } | |
434 | }); | |
f67539c2 | 435 | } else { |
1e59de90 TL |
436 | return btree.init_cached_extent(c, e |
437 | ).si_then([&ret](bool is_alive) { | |
438 | ret = is_alive; | |
439 | }); | |
f67539c2 | 440 | } |
f67539c2 TL |
441 | } |
442 | ||
1e59de90 TL |
443 | BtreeLBAManager::init_cached_extent_ret |
444 | BtreeLBAManager::init_cached_extent( | |
f67539c2 TL |
445 | Transaction &t, |
446 | CachedExtentRef e) | |
447 | { | |
20effc67 | 448 | LOG_PREFIX(BtreeLBAManager::init_cached_extent); |
1e59de90 TL |
449 | TRACET("{}", t, *e); |
450 | return seastar::do_with(bool(), [this, e, &t](bool &ret) { | |
451 | auto c = get_context(t); | |
452 | return with_btree<LBABtree>( | |
453 | cache, c, | |
454 | [c, e, &ret](auto &btree) -> base_iertr::future<> { | |
455 | LOG_PREFIX(BtreeLBAManager::init_cached_extent); | |
456 | DEBUGT("extent {}", c.trans, *e); | |
457 | return _init_cached_extent(c, e, btree, ret); | |
458 | } | |
459 | ).si_then([&ret] { return ret; }); | |
460 | }); | |
461 | } | |
462 | ||
463 | BtreeLBAManager::check_child_trackers_ret | |
464 | BtreeLBAManager::check_child_trackers( | |
465 | Transaction &t) { | |
20effc67 | 466 | auto c = get_context(t); |
1e59de90 TL |
467 | return with_btree<LBABtree>( |
468 | cache, c, | |
469 | [c](auto &btree) { | |
470 | return btree.check_child_trackers(c); | |
471 | }); | |
f67539c2 TL |
472 | } |
473 | ||
1e59de90 TL |
474 | BtreeLBAManager::scan_mappings_ret |
475 | BtreeLBAManager::scan_mappings( | |
f67539c2 TL |
476 | Transaction &t, |
477 | laddr_t begin, | |
478 | laddr_t end, | |
479 | scan_mappings_func_t &&f) | |
480 | { | |
20effc67 TL |
481 | LOG_PREFIX(BtreeLBAManager::scan_mappings); |
482 | DEBUGT("begin: {}, end: {}", t, begin, end); | |
483 | ||
484 | auto c = get_context(t); | |
1e59de90 TL |
485 | return with_btree<LBABtree>( |
486 | cache, | |
20effc67 TL |
487 | c, |
488 | [c, f=std::move(f), begin, end](auto &btree) mutable { | |
489 | return LBABtree::iterate_repeat( | |
490 | c, | |
491 | btree.upper_bound_right(c, begin), | |
20effc67 TL |
492 | [f=std::move(f), begin, end](auto &pos) { |
493 | if (pos.is_end() || pos.get_key() >= end) { | |
1e59de90 | 494 | return typename LBABtree::iterate_repeat_ret_inner( |
20effc67 TL |
495 | interruptible::ready_future_marker{}, |
496 | seastar::stop_iteration::yes); | |
497 | } | |
498 | ceph_assert((pos.get_key() + pos.get_val().len) > begin); | |
aee94f69 TL |
499 | f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len); |
500 | return LBABtree::iterate_repeat_ret_inner( | |
20effc67 TL |
501 | interruptible::ready_future_marker{}, |
502 | seastar::stop_iteration::no); | |
f67539c2 TL |
503 | }); |
504 | }); | |
505 | } | |
506 | ||
1e59de90 TL |
507 | BtreeLBAManager::rewrite_extent_ret |
508 | BtreeLBAManager::rewrite_extent( | |
f67539c2 TL |
509 | Transaction &t, |
510 | CachedExtentRef extent) | |
511 | { | |
20effc67 TL |
512 | LOG_PREFIX(BtreeLBAManager::rewrite_extent); |
513 | if (extent->has_been_invalidated()) { | |
1e59de90 TL |
514 | ERRORT("extent has been invalidated -- {}", t, *extent); |
515 | ceph_abort(); | |
20effc67 | 516 | } |
20effc67 TL |
517 | assert(!extent->is_logical()); |
518 | ||
20effc67 | 519 | if (is_lba_node(*extent)) { |
1e59de90 | 520 | DEBUGT("rewriting lba extent -- {}", t, *extent); |
20effc67 | 521 | auto c = get_context(t); |
1e59de90 TL |
522 | return with_btree<LBABtree>( |
523 | cache, | |
20effc67 TL |
524 | c, |
525 | [c, extent](auto &btree) mutable { | |
1e59de90 | 526 | return btree.rewrite_extent(c, extent); |
20effc67 | 527 | }); |
f67539c2 | 528 | } else { |
1e59de90 | 529 | DEBUGT("skip non lba extent -- {}", t, *extent); |
20effc67 | 530 | return rewrite_extent_iertr::now(); |
f67539c2 TL |
531 | } |
532 | } | |
533 | ||
1e59de90 | 534 | BtreeLBAManager::update_mapping_ret |
20effc67 TL |
535 | BtreeLBAManager::update_mapping( |
536 | Transaction& t, | |
537 | laddr_t laddr, | |
538 | paddr_t prev_addr, | |
1e59de90 TL |
539 | paddr_t addr, |
540 | LogicalCachedExtent *nextent) | |
20effc67 | 541 | { |
1e59de90 TL |
542 | LOG_PREFIX(BtreeLBAManager::update_mapping); |
543 | TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr); | |
544 | return _update_mapping( | |
20effc67 TL |
545 | t, |
546 | laddr, | |
547 | [prev_addr, addr]( | |
548 | const lba_map_val_t &in) { | |
549 | assert(!addr.is_null()); | |
550 | lba_map_val_t ret = in; | |
aee94f69 TL |
551 | ceph_assert(in.pladdr.is_paddr()); |
552 | ceph_assert(in.pladdr.get_paddr() == prev_addr); | |
553 | ret.pladdr = addr; | |
20effc67 | 554 | return ret; |
1e59de90 TL |
555 | }, |
556 | nextent | |
557 | ).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) { | |
558 | DEBUGT("laddr={}, paddr {} => {} done -- {}", | |
559 | t, laddr, prev_addr, addr, result); | |
560 | }, | |
561 | update_mapping_iertr::pass_further{}, | |
562 | /* ENOENT in particular should be impossible */ | |
563 | crimson::ct_error::assert_all{ | |
564 | "Invalid error in BtreeLBAManager::update_mapping" | |
565 | } | |
566 | ); | |
20effc67 TL |
567 | } |
568 | ||
f67539c2 TL |
569 | BtreeLBAManager::get_physical_extent_if_live_ret |
570 | BtreeLBAManager::get_physical_extent_if_live( | |
571 | Transaction &t, | |
572 | extent_types_t type, | |
573 | paddr_t addr, | |
574 | laddr_t laddr, | |
1e59de90 | 575 | extent_len_t len) |
f67539c2 | 576 | { |
1e59de90 TL |
577 | LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live); |
578 | DEBUGT("{}, laddr={}, paddr={}, length={}", | |
579 | t, type, laddr, addr, len); | |
f67539c2 | 580 | ceph_assert(is_lba_node(type)); |
20effc67 | 581 | auto c = get_context(t); |
1e59de90 TL |
582 | return with_btree_ret<LBABtree, CachedExtentRef>( |
583 | cache, | |
20effc67 TL |
584 | c, |
585 | [c, type, addr, laddr, len](auto &btree) { | |
586 | if (type == extent_types_t::LADDR_INTERNAL) { | |
587 | return btree.get_internal_if_live(c, addr, laddr, len); | |
588 | } else { | |
1e59de90 TL |
589 | assert(type == extent_types_t::LADDR_LEAF || |
590 | type == extent_types_t::DINK_LADDR_LEAF); | |
20effc67 TL |
591 | return btree.get_leaf_if_live(c, addr, laddr, len); |
592 | } | |
f67539c2 | 593 | }); |
f67539c2 TL |
594 | } |
595 | ||
20effc67 | 596 | void BtreeLBAManager::register_metrics() |
f67539c2 | 597 | { |
1e59de90 TL |
598 | LOG_PREFIX(BtreeLBAManager::register_metrics); |
599 | DEBUG("start"); | |
600 | stats = {}; | |
20effc67 TL |
601 | namespace sm = seastar::metrics; |
602 | metrics.add_group( | |
603 | "LBA", | |
604 | { | |
605 | sm::make_counter( | |
606 | "alloc_extents", | |
1e59de90 | 607 | stats.num_alloc_extents, |
20effc67 TL |
608 | sm::description("total number of lba alloc_extent operations") |
609 | ), | |
610 | sm::make_counter( | |
611 | "alloc_extents_iter_nexts", | |
1e59de90 | 612 | stats.num_alloc_extents_iter_nexts, |
20effc67 TL |
613 | sm::description("total number of iterator next operations during extent allocation") |
614 | ), | |
615 | } | |
616 | ); | |
f67539c2 TL |
617 | } |
618 | ||
aee94f69 TL |
619 | BtreeLBAManager::ref_iertr::future<std::optional<std::pair<paddr_t, extent_len_t>>> |
620 | BtreeLBAManager::_decref_intermediate( | |
621 | Transaction &t, | |
622 | laddr_t addr, | |
623 | extent_len_t len) | |
624 | { | |
625 | auto c = get_context(t); | |
626 | return with_btree<LBABtree>( | |
627 | cache, | |
628 | c, | |
629 | [c, addr, len](auto &btree) mutable { | |
630 | return btree.upper_bound_right( | |
631 | c, addr | |
632 | ).si_then([&btree, addr, len, c](auto iter) { | |
633 | return seastar::do_with( | |
634 | std::move(iter), | |
635 | [&btree, addr, len, c](auto &iter) { | |
636 | ceph_assert(!iter.is_end()); | |
637 | ceph_assert(iter.get_key() <= addr); | |
638 | auto val = iter.get_val(); | |
639 | ceph_assert(iter.get_key() + val.len >= addr + len); | |
640 | ceph_assert(val.pladdr.is_paddr()); | |
641 | ceph_assert(val.refcount >= 1); | |
642 | val.refcount -= 1; | |
643 | ||
644 | LOG_PREFIX(BtreeLBAManager::_decref_intermediate); | |
645 | TRACET("decreased refcount of intermediate key {} -- {}", | |
646 | c.trans, | |
647 | iter.get_key(), | |
648 | val); | |
649 | ||
650 | if (!val.refcount) { | |
651 | return btree.remove(c, iter | |
652 | ).si_then([val] { | |
653 | return std::make_optional< | |
654 | std::pair<paddr_t, extent_len_t>>( | |
655 | val.pladdr.get_paddr(), val.len); | |
656 | }); | |
657 | } else { | |
658 | return btree.update(c, iter, val, nullptr | |
659 | ).si_then([](auto) { | |
660 | return seastar::make_ready_future< | |
661 | std::optional<std::pair<paddr_t, extent_len_t>>>(std::nullopt); | |
662 | }); | |
663 | } | |
664 | }); | |
665 | }); | |
666 | }); | |
667 | } | |
668 | ||
1e59de90 TL |
669 | BtreeLBAManager::update_refcount_ret |
670 | BtreeLBAManager::update_refcount( | |
f67539c2 TL |
671 | Transaction &t, |
672 | laddr_t addr, | |
aee94f69 TL |
673 | int delta, |
674 | bool cascade_remove) | |
f67539c2 | 675 | { |
20effc67 | 676 | LOG_PREFIX(BtreeLBAManager::update_refcount); |
1e59de90 TL |
677 | TRACET("laddr={}, delta={}", t, addr, delta); |
678 | return _update_mapping( | |
f67539c2 TL |
679 | t, |
680 | addr, | |
681 | [delta](const lba_map_val_t &in) { | |
682 | lba_map_val_t out = in; | |
683 | ceph_assert((int)out.refcount + delta >= 0); | |
684 | out.refcount += delta; | |
685 | return out; | |
1e59de90 TL |
686 | }, |
687 | nullptr | |
aee94f69 | 688 | ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto result) { |
1e59de90 | 689 | DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result); |
aee94f69 TL |
690 | auto fut = ref_iertr::make_ready_future< |
691 | std::optional<std::pair<paddr_t, extent_len_t>>>(); | |
692 | if (!result.refcount && result.pladdr.is_laddr() && cascade_remove) { | |
693 | fut = _decref_intermediate( | |
694 | t, | |
695 | result.pladdr.get_laddr(), | |
696 | result.len | |
697 | ); | |
698 | } | |
699 | return fut.si_then([result](auto removed) { | |
700 | if (result.pladdr.is_laddr() | |
701 | && removed) { | |
702 | return ref_update_result_t{ | |
703 | result.refcount, | |
704 | removed->first, | |
705 | removed->second}; | |
706 | } else { | |
707 | return ref_update_result_t{ | |
708 | result.refcount, | |
709 | result.pladdr, | |
710 | result.len | |
711 | }; | |
712 | } | |
713 | }); | |
1e59de90 | 714 | }); |
f67539c2 TL |
715 | } |
716 | ||
1e59de90 TL |
717 | BtreeLBAManager::_update_mapping_ret |
718 | BtreeLBAManager::_update_mapping( | |
f67539c2 TL |
719 | Transaction &t, |
720 | laddr_t addr, | |
1e59de90 TL |
721 | update_func_t &&f, |
722 | LogicalCachedExtent* nextent) | |
f67539c2 | 723 | { |
20effc67 | 724 | auto c = get_context(t); |
1e59de90 TL |
725 | return with_btree_ret<LBABtree, lba_map_val_t>( |
726 | cache, | |
20effc67 | 727 | c, |
1e59de90 | 728 | [f=std::move(f), c, addr, nextent](auto &btree) mutable { |
20effc67 TL |
729 | return btree.lower_bound( |
730 | c, addr | |
1e59de90 TL |
731 | ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter) |
732 | -> _update_mapping_ret { | |
20effc67 | 733 | if (iter.is_end() || iter.get_key() != addr) { |
1e59de90 TL |
734 | LOG_PREFIX(BtreeLBAManager::_update_mapping); |
735 | ERRORT("laddr={} doesn't exist", c.trans, addr); | |
20effc67 TL |
736 | return crimson::ct_error::enoent::make(); |
737 | } | |
738 | ||
739 | auto ret = f(iter.get_val()); | |
740 | if (ret.refcount == 0) { | |
741 | return btree.remove( | |
742 | c, | |
743 | iter | |
744 | ).si_then([ret] { | |
745 | return ret; | |
746 | }); | |
747 | } else { | |
748 | return btree.update( | |
749 | c, | |
750 | iter, | |
1e59de90 TL |
751 | ret, |
752 | nextent | |
20effc67 TL |
753 | ).si_then([ret](auto) { |
754 | return ret; | |
755 | }); | |
756 | } | |
757 | }); | |
758 | }); | |
f67539c2 TL |
759 | } |
760 | ||
f67539c2 | 761 | } |