]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h
buildsys: switch source download to quincy
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / lba_btree_node_impl.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <sys/mman.h>
7 #include <string.h>
8
9 #include <memory>
10 #include <string.h>
11
12 #include "include/buffer.h"
13
14 #include "crimson/common/fixed_kv_node_layout.h"
15 #include "crimson/common/errorator.h"
16 #include "crimson/os/seastore/lba_manager.h"
17 #include "crimson/os/seastore/seastore_types.h"
18 #include "crimson/os/seastore/cache.h"
19 #include "crimson/os/seastore/cached_extent.h"
20 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
21 #include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
22
23 namespace crimson::os::seastore::lba_manager::btree {
24
25 constexpr size_t LBA_BLOCK_SIZE = 4096;
26
27 /**
28 * lba_node_meta_le_t
29 *
30 * On disk layout for lba_node_meta_t
31 */
32 struct lba_node_meta_le_t {
33 laddr_le_t begin = laddr_le_t(0);
34 laddr_le_t end = laddr_le_t(0);
35 depth_le_t depth = init_les32(0);
36
37 lba_node_meta_le_t() = default;
38 lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
39 explicit lba_node_meta_le_t(const lba_node_meta_t &val)
40 : begin(init_le64(val.begin)),
41 end(init_le64(val.end)),
42 depth(init_les32(val.depth)) {}
43
44 operator lba_node_meta_t() const {
45 return lba_node_meta_t{ begin, end, depth };
46 }
47 };
48
49
50 /**
51 * LBAInternalNode
52 *
53 * Abstracts operations on and layout of internal nodes for the
54 * LBA Tree.
55 *
56 * Layout (4k):
57 * size : uint32_t[1] 4b
58 * (padding) : 4b
59 * meta : lba_node_meta_le_t[3] (1*24)b
60 * keys : laddr_t[255] (254*8)b
61 * values : paddr_t[255] (254*8)b
62 * = 4096
63
64 * TODO: make the above capacity calculation part of FixedKVNodeLayout
65 * TODO: the above alignment probably isn't portable without further work
66 */
67 constexpr size_t INTERNAL_NODE_CAPACITY = 254;
68 struct LBAInternalNode
69 : LBANode,
70 common::FixedKVNodeLayout<
71 INTERNAL_NODE_CAPACITY,
72 lba_node_meta_t, lba_node_meta_le_t,
73 laddr_t, laddr_le_t,
74 paddr_t, paddr_le_t> {
75 using internal_iterator_t = const_iterator;
76 template <typename... T>
77 LBAInternalNode(T&&... t) :
78 LBANode(std::forward<T>(t)...),
79 FixedKVNodeLayout(get_bptr().c_str()) {}
80
81 static constexpr extent_types_t type = extent_types_t::LADDR_INTERNAL;
82
83 lba_node_meta_t get_node_meta() const final { return get_meta(); }
84
85 CachedExtentRef duplicate_for_write() final {
86 assert(delta_buffer.empty());
87 return CachedExtentRef(new LBAInternalNode(*this));
88 };
89
90 delta_buffer_t delta_buffer;
91 delta_buffer_t *maybe_get_delta_buffer() {
92 return is_mutation_pending() ? &delta_buffer : nullptr;
93 }
94
95 lookup_ret lookup(op_context_t c, laddr_t addr, depth_t depth) final;
96
97 lookup_range_ret lookup_range(
98 op_context_t c,
99 laddr_t addr,
100 extent_len_t len) final;
101
102 insert_ret insert(
103 op_context_t c,
104 laddr_t laddr,
105 lba_map_val_t val) final;
106
107 mutate_mapping_ret mutate_mapping(
108 op_context_t c,
109 laddr_t laddr,
110 mutate_func_t &&f) final;
111 mutate_mapping_ret mutate_mapping_internal(
112 op_context_t c,
113 laddr_t laddr,
114 bool is_root,
115 mutate_func_t &&f) final;
116
117 mutate_internal_address_ret mutate_internal_address(
118 op_context_t c,
119 depth_t depth,
120 laddr_t laddr,
121 paddr_t paddr) final;
122
123 find_hole_ret find_hole(
124 op_context_t c,
125 laddr_t min,
126 laddr_t max,
127 extent_len_t len) final;
128
129 scan_mappings_ret scan_mappings(
130 op_context_t c,
131 laddr_t begin,
132 laddr_t end,
133 scan_mappings_func_t &f) final;
134
135 scan_mapped_space_ret scan_mapped_space(
136 op_context_t c,
137 scan_mapped_space_func_t &f) final;
138
139 std::tuple<LBANodeRef, LBANodeRef, laddr_t>
140 make_split_children(op_context_t c) final {
141 auto left = c.cache.alloc_new_extent<LBAInternalNode>(
142 c.trans, LBA_BLOCK_SIZE);
143 auto right = c.cache.alloc_new_extent<LBAInternalNode>(
144 c.trans, LBA_BLOCK_SIZE);
145 auto pivot = split_into(*left, *right);
146 left->pin.set_range(left->get_meta());
147 right->pin.set_range(right->get_meta());
148 return std::make_tuple(
149 left,
150 right,
151 pivot);
152 }
153
154 LBANodeRef make_full_merge(
155 op_context_t c,
156 LBANodeRef &right) final {
157 auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
158 c.trans, LBA_BLOCK_SIZE);
159 replacement->merge_from(*this, *right->cast<LBAInternalNode>());
160 replacement->pin.set_range(replacement->get_meta());
161 return replacement;
162 }
163
164 std::tuple<LBANodeRef, LBANodeRef, laddr_t>
165 make_balanced(
166 op_context_t c,
167 LBANodeRef &_right,
168 bool prefer_left) final {
169 ceph_assert(_right->get_type() == type);
170 auto &right = *_right->cast<LBAInternalNode>();
171 auto replacement_left = c.cache.alloc_new_extent<LBAInternalNode>(
172 c.trans, LBA_BLOCK_SIZE);
173 auto replacement_right = c.cache.alloc_new_extent<LBAInternalNode>(
174 c.trans, LBA_BLOCK_SIZE);
175
176 auto pivot = balance_into_new_nodes(
177 *this,
178 right,
179 prefer_left,
180 *replacement_left,
181 *replacement_right);
182
183 replacement_left->pin.set_range(replacement_left->get_meta());
184 replacement_right->pin.set_range(replacement_right->get_meta());
185 return std::make_tuple(
186 replacement_left,
187 replacement_right,
188 pivot);
189 }
190
191 /**
192 * Internal relative addresses on read or in memory prior to commit
193 * are either record or block relative depending on whether this
194 * physical node is is_initial_pending() or just is_pending().
195 *
196 * User passes appropriate base depending on lifecycle and
197 * resolve_relative_addrs fixes up relative internal references
198 * based on base.
199 */
200 void resolve_relative_addrs(paddr_t base) final;
201 void node_resolve_vals(iterator from, iterator to) const final {
202 if (is_initial_pending()) {
203 for (auto i = from; i != to; ++i) {
204 if (i->get_val().is_relative()) {
205 assert(i->get_val().is_block_relative());
206 i->set_val(get_paddr().add_relative(i->get_val()));
207 }
208 }
209 }
210 }
211 void node_unresolve_vals(iterator from, iterator to) const final {
212 if (is_initial_pending()) {
213 for (auto i = from; i != to; ++i) {
214 if (i->get_val().is_relative()) {
215 assert(i->get_val().is_record_relative());
216 i->set_val(i->get_val() - get_paddr());
217 }
218 }
219 }
220 }
221
222 extent_types_t get_type() const final {
223 return type;
224 }
225
226 std::ostream &print_detail(std::ostream &out) const final;
227
228 ceph::bufferlist get_delta() final {
229 assert(!delta_buffer.empty());
230 ceph::buffer::ptr bptr(delta_buffer.get_bytes());
231 delta_buffer.copy_out(bptr.c_str(), bptr.length());
232 ceph::bufferlist bl;
233 bl.push_back(bptr);
234 return bl;
235 }
236
237 void apply_delta_and_adjust_crc(
238 paddr_t base, const ceph::bufferlist &_bl) final {
239 assert(_bl.length());
240 ceph::bufferlist bl = _bl;
241 bl.rebuild();
242 delta_buffer_t buffer;
243 buffer.copy_in(bl.front().c_str(), bl.front().length());
244 buffer.replay(*this);
245 set_last_committed_crc(get_crc32c());
246 resolve_relative_addrs(base);
247 }
248
249 bool at_max_capacity() const final {
250 return get_size() == get_capacity();
251 }
252
253 bool at_min_capacity() const {
254 return get_size() == (get_capacity() / 2);
255 }
256
257 /// returns iterators containing [l, r)
258 std::pair<internal_iterator_t, internal_iterator_t> bound(
259 laddr_t l, laddr_t r) {
260 // TODO: inefficient
261 auto retl = begin();
262 for (; retl != end(); ++retl) {
263 if (retl->get_next_key_or_max() > l)
264 break;
265 }
266 auto retr = retl;
267 for (; retr != end(); ++retr) {
268 if (retr->get_key() >= r)
269 break;
270 }
271 return std::make_pair(retl, retr);
272 }
273
274 using split_ertr = crimson::errorator<
275 crimson::ct_error::input_output_error
276 >;
277 using split_ret = split_ertr::future<LBANodeRef>;
278 split_ret split_entry(
279 op_context_t c,
280 laddr_t addr,
281 internal_iterator_t,
282 LBANodeRef entry);
283
284 using merge_ertr = crimson::errorator<
285 crimson::ct_error::input_output_error
286 >;
287 using merge_ret = merge_ertr::future<LBANodeRef>;
288 merge_ret merge_entry(
289 op_context_t c,
290 laddr_t addr,
291 internal_iterator_t,
292 LBANodeRef entry,
293 bool is_root);
294
295 /// returns iterator for subtree containing laddr
296 internal_iterator_t get_containing_child(laddr_t laddr);
297 };
298
299 /**
300 * LBALeafNode
301 *
302 * Abstracts operations on and layout of leaf nodes for the
303 * LBA Tree.
304 *
305 * Layout (4k):
306 * size : uint32_t[1] 4b
307 * (padding) : 4b
308 * meta : lba_node_meta_le_t[3] (1*24)b
309 * keys : laddr_t[170] (145*8)b
310 * values : lba_map_val_t[170] (145*20)b
311 * = 4092
312 *
313 * TODO: update FixedKVNodeLayout to handle the above calculation
314 * TODO: the above alignment probably isn't portable without further work
315 */
316 constexpr size_t LEAF_NODE_CAPACITY = 145;
317
318 /**
319 * lba_map_val_le_t
320 *
321 * On disk layout for lba_map_val_t.
322 */
323 struct lba_map_val_le_t {
324 extent_len_le_t len = init_extent_len_le_t(0);
325 paddr_le_t paddr;
326 ceph_le32 refcount = init_le32(0);
327 ceph_le32 checksum = init_le32(0);
328
329 lba_map_val_le_t() = default;
330 lba_map_val_le_t(const lba_map_val_le_t &) = default;
331 explicit lba_map_val_le_t(const lba_map_val_t &val)
332 : len(init_extent_len_le_t(val.len)),
333 paddr(paddr_le_t(val.paddr)),
334 refcount(init_le32(val.refcount)),
335 checksum(init_le32(val.checksum)) {}
336
337 operator lba_map_val_t() const {
338 return lba_map_val_t{ len, paddr, refcount, checksum };
339 }
340 };
341
342 struct LBALeafNode
343 : LBANode,
344 common::FixedKVNodeLayout<
345 LEAF_NODE_CAPACITY,
346 lba_node_meta_t, lba_node_meta_le_t,
347 laddr_t, laddr_le_t,
348 lba_map_val_t, lba_map_val_le_t> {
349 using internal_iterator_t = const_iterator;
350 template <typename... T>
351 LBALeafNode(T&&... t) :
352 LBANode(std::forward<T>(t)...),
353 FixedKVNodeLayout(get_bptr().c_str()) {}
354
355 static constexpr extent_types_t type = extent_types_t::LADDR_LEAF;
356
357 lba_node_meta_t get_node_meta() const final { return get_meta(); }
358
359 CachedExtentRef duplicate_for_write() final {
360 assert(delta_buffer.empty());
361 return CachedExtentRef(new LBALeafNode(*this));
362 };
363
364 delta_buffer_t delta_buffer;
365 delta_buffer_t *maybe_get_delta_buffer() {
366 return is_mutation_pending() ? &delta_buffer : nullptr;
367 }
368
369 lookup_ret lookup(op_context_t c, laddr_t addr, depth_t depth) final
370 {
371 return lookup_ret(
372 lookup_ertr::ready_future_marker{},
373 this);
374 }
375
376 lookup_range_ret lookup_range(
377 op_context_t c,
378 laddr_t addr,
379 extent_len_t len) final;
380
381 insert_ret insert(
382 op_context_t c,
383 laddr_t laddr,
384 lba_map_val_t val) final;
385
386 mutate_mapping_ret mutate_mapping(
387 op_context_t c,
388 laddr_t laddr,
389 mutate_func_t &&f) final;
390 mutate_mapping_ret mutate_mapping_internal(
391 op_context_t c,
392 laddr_t laddr,
393 bool is_root,
394 mutate_func_t &&f) final;
395
396 mutate_internal_address_ret mutate_internal_address(
397 op_context_t c,
398 depth_t depth,
399 laddr_t laddr,
400 paddr_t paddr) final;
401
402 find_hole_ret find_hole(
403 op_context_t c,
404 laddr_t min,
405 laddr_t max,
406 extent_len_t len) final;
407
408 scan_mappings_ret scan_mappings(
409 op_context_t c,
410 laddr_t begin,
411 laddr_t end,
412 scan_mappings_func_t &f) final;
413
414 scan_mapped_space_ret scan_mapped_space(
415 op_context_t c,
416 scan_mapped_space_func_t &f) final;
417
418 std::tuple<LBANodeRef, LBANodeRef, laddr_t>
419 make_split_children(op_context_t c) final {
420 auto left = c.cache.alloc_new_extent<LBALeafNode>(
421 c.trans, LBA_BLOCK_SIZE);
422 auto right = c.cache.alloc_new_extent<LBALeafNode>(
423 c.trans, LBA_BLOCK_SIZE);
424 auto pivot = split_into(*left, *right);
425 left->pin.set_range(left->get_meta());
426 right->pin.set_range(right->get_meta());
427 return std::make_tuple(
428 left,
429 right,
430 pivot);
431 }
432
433 LBANodeRef make_full_merge(
434 op_context_t c,
435 LBANodeRef &right) final {
436 auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
437 c.trans, LBA_BLOCK_SIZE);
438 replacement->merge_from(*this, *right->cast<LBALeafNode>());
439 replacement->pin.set_range(replacement->get_meta());
440 return replacement;
441 }
442
443 std::tuple<LBANodeRef, LBANodeRef, laddr_t>
444 make_balanced(
445 op_context_t c,
446 LBANodeRef &_right,
447 bool prefer_left) final {
448 ceph_assert(_right->get_type() == type);
449 auto &right = *_right->cast<LBALeafNode>();
450 auto replacement_left = c.cache.alloc_new_extent<LBALeafNode>(
451 c.trans, LBA_BLOCK_SIZE);
452 auto replacement_right = c.cache.alloc_new_extent<LBALeafNode>(
453 c.trans, LBA_BLOCK_SIZE);
454
455 auto pivot = balance_into_new_nodes(
456 *this,
457 right,
458 prefer_left,
459 *replacement_left,
460 *replacement_right);
461
462 replacement_left->pin.set_range(replacement_left->get_meta());
463 replacement_right->pin.set_range(replacement_right->get_meta());
464 return std::make_tuple(
465 replacement_left,
466 replacement_right,
467 pivot);
468 }
469
470 // See LBAInternalNode, same concept
471 void resolve_relative_addrs(paddr_t base) final;
472 void node_resolve_vals(iterator from, iterator to) const final {
473 if (is_initial_pending()) {
474 for (auto i = from; i != to; ++i) {
475 auto val = i->get_val();
476 if (val.paddr.is_relative()) {
477 assert(val.paddr.is_block_relative());
478 val.paddr = get_paddr().add_relative(val.paddr);
479 i->set_val(val);
480 }
481 }
482 }
483 }
484 void node_unresolve_vals(iterator from, iterator to) const final {
485 if (is_initial_pending()) {
486 for (auto i = from; i != to; ++i) {
487 auto val = i->get_val();
488 if (val.paddr.is_relative()) {
489 auto val = i->get_val();
490 assert(val.paddr.is_record_relative());
491 val.paddr = val.paddr - get_paddr();
492 i->set_val(val);
493 }
494 }
495 }
496 }
497
498 ceph::bufferlist get_delta() final {
499 assert(!delta_buffer.empty());
500 ceph::buffer::ptr bptr(delta_buffer.get_bytes());
501 delta_buffer.copy_out(bptr.c_str(), bptr.length());
502 ceph::bufferlist bl;
503 bl.push_back(bptr);
504 return bl;
505 }
506
507 void apply_delta_and_adjust_crc(
508 paddr_t base, const ceph::bufferlist &_bl) final {
509 assert(_bl.length());
510 ceph::bufferlist bl = _bl;
511 bl.rebuild();
512 delta_buffer_t buffer;
513 buffer.copy_in(bl.front().c_str(), bl.front().length());
514 buffer.replay(*this);
515 set_last_committed_crc(get_crc32c());
516 resolve_relative_addrs(base);
517 }
518
519 extent_types_t get_type() const final {
520 return type;
521 }
522
523 std::ostream &print_detail(std::ostream &out) const final;
524
525 bool at_max_capacity() const final {
526 return get_size() == get_capacity();
527 }
528
529 bool at_min_capacity() const final {
530 return get_size() == (get_capacity() / 2);
531 }
532
533 /// returns iterators <lb, ub> containing addresses [l, r)
534 std::pair<internal_iterator_t, internal_iterator_t> bound(
535 laddr_t l, laddr_t r) {
536 // TODO: inefficient
537 auto retl = begin();
538 for (; retl != end(); ++retl) {
539 if (retl->get_key() >= l || (retl->get_key() + retl->get_val().len) > l)
540 break;
541 }
542 auto retr = retl;
543 for (; retr != end(); ++retr) {
544 if (retr->get_key() >= r)
545 break;
546 }
547 return std::make_pair(retl, retr);
548 }
549
550 std::pair<internal_iterator_t, internal_iterator_t>
551 get_leaf_entries(laddr_t addr, extent_len_t len);
552 };
553 using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
554
555 }