]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/onode_manager/staged-fltree/node.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / onode_manager / staged-fltree / node.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
1e59de90 6#include <compare>
f67539c2
TL
7#include <map>
8#include <memory>
9#include <ostream>
10#include <boost/smart_ptr/intrusive_ref_counter.hpp>
11
12#include "crimson/common/type_helpers.h"
13
14#include "node_extent_mutable.h"
15#include "stages/key_layout.h"
16#include "stages/stage_types.h"
17#include "super.h"
20effc67 18#include "value.h"
f67539c2
TL
19
20/**
21 * Tree example (2 levels):
22 *
23 * Root node keys: [ 3 7 ]
24 * values: [p1 p2 p3]
25 * / | \
26 * ------- | -------
27 * | | |
28 * V V V
29 * Leaf node keys: [ 1 2 3] [ 4 5 7] [ 9 11 12]
30 * values: [v1 v2 v3] [v4 v5 v6] [v7 v8 v9]
31 *
32 * Tree structure properties:
33 * - As illustrated above, the parent key is strictly equal to its left child's
34 * largest key;
35 * - If a tree is indexing multiple seastore transactions, each transaction
36 * will be mapped to a Super which points to a distinct root node. So the
37 * transactions are isolated at tree level. However, tree nodes from
38 * different transactions can reference the same seastore CachedExtent before
39 * modification;
40 * - The resources of the transactional tree are tracked by tree_cursor_ts held
41 * by users. As long as any cursor is alive, the according tree hierarchy is
42 * alive and keeps tracked. See the reversed resource management sections
43 * below;
44 */
45
46namespace crimson::os::seastore::onode {
47
48class LeafNode;
49class InternalNode;
50
20effc67
TL
51using layout_version_t = uint32_t;
52struct node_version_t {
53 layout_version_t layout;
54 nextent_state_t state;
55
56 bool operator==(const node_version_t& rhs) const {
57 return (layout == rhs.layout && state == rhs.state);
58 }
59 bool operator!=(const node_version_t& rhs) const {
60 return !(*this == rhs);
61 }
62};
63
f67539c2
TL
64/**
65 * tree_cursor_t
66 *
67 * A cursor points to a position (LeafNode and search_position_t) of the tree
68 * where it can find the according key and value pair. The position is updated
69 * by LeafNode insert/split/delete/merge internally and is kept valid. It also
70 * caches the key-value information for a specific node layout version.
71 *
72 * Exposes public interfaces for Btree::Cursor.
73 */
f67539c2
TL
74class tree_cursor_t final
75 : public boost::intrusive_ref_counter<
76 tree_cursor_t, boost::thread_unsafe_counter> {
77 public:
f67539c2
TL
78 ~tree_cursor_t();
79 tree_cursor_t(const tree_cursor_t&) = delete;
80 tree_cursor_t(tree_cursor_t&&) = delete;
81 tree_cursor_t& operator=(const tree_cursor_t&) = delete;
82 tree_cursor_t& operator=(tree_cursor_t&&) = delete;
83
20effc67
TL
84 // public to Btree
85
f67539c2
TL
86 /**
87 * is_end
88 *
89 * Represents one-past-the-last of all the sorted key-value
90 * pairs in the tree. An end cursor won't contain valid key-value
91 * information.
92 */
20effc67
TL
93 bool is_end() const { return !!ref_leaf_node && position.is_end(); }
94
95 /**
96 * is_tracked
97 *
98 * Represents a key-value pair stored in the tree, which is always tracked
99 * across insert/split/erase/merge operations.
100 */
101 bool is_tracked() const { return !!ref_leaf_node && !position.is_end(); }
102
103 /**
104 * is_invalid
105 *
106 * Represents an invalid cursor which was once valid and tracked by the tree
107 * but is now erased and untracked. User may still hold an invalid cursor.
108 */
109 bool is_invalid() const { return !ref_leaf_node; }
f67539c2
TL
110
111 /// Returns the key view in tree if it is not an end cursor.
20effc67
TL
112 const key_view_t& get_key_view(value_magic_t magic) const {
113 assert(is_tracked());
114 return cache.get_key_view(magic, position);
115 }
116
117 /// Returns the next tree_cursor_t in tree, can be end if there's no next.
118 eagain_ifuture<Ref<tree_cursor_t>> get_next(context_t);
119
120 /// Check that this is next to prv
121 void assert_next_to(const tree_cursor_t&, value_magic_t) const;
122
123 /// Erases the key-value pair from tree.
124 template <bool FORCE_MERGE = false>
125 eagain_ifuture<Ref<tree_cursor_t>> erase(context_t, bool get_next);
126
1e59de90 127 std::strong_ordering compare_to(const tree_cursor_t&, value_magic_t) const;
20effc67
TL
128
129 // public to Value
130
131 /// Get the latest value_header_t pointer for read.
132 const value_header_t* read_value_header(value_magic_t magic) const {
133 assert(is_tracked());
134 return cache.get_p_value_header(magic, position);
135 }
136
137 /// Prepare the node extent to be mutable and recorded.
138 std::pair<NodeExtentMutable&, ValueDeltaRecorder*>
139 prepare_mutate_value_payload(context_t c) {
140 assert(is_tracked());
1e59de90
TL
141 if (!is_mutated) {
142 is_mutated = true;
143 ++(c.t.get_onode_tree_stats().num_updates);
144 }
20effc67
TL
145 return cache.prepare_mutate_value_payload(c, position);
146 }
147
148 /// Extends the size of value payload.
149 eagain_ifuture<> extend_value(context_t, value_size_t);
f67539c2 150
20effc67
TL
151 /// Trim and shrink the value payload.
152 eagain_ifuture<> trim_value(context_t, value_size_t);
153
154 static Ref<tree_cursor_t> get_invalid() {
1e59de90 155 Ref<tree_cursor_t> INVALID = new tree_cursor_t();
20effc67
TL
156 return INVALID;
157 }
f67539c2
TL
158
159 private:
1e59de90 160 // create from insert
f67539c2 161 tree_cursor_t(Ref<LeafNode>, const search_position_t&);
1e59de90 162 // create from lookup
f67539c2 163 tree_cursor_t(Ref<LeafNode>, const search_position_t&,
20effc67 164 const key_view_t&, const value_header_t*);
f67539c2
TL
165 // lookup reaches the end, contain leaf node for further insert
166 tree_cursor_t(Ref<LeafNode>);
20effc67
TL
167 // create an invalid tree_cursor_t
168 tree_cursor_t() : cache{ref_leaf_node} {}
169
f67539c2 170 const search_position_t& get_position() const { return position; }
20effc67 171 Ref<LeafNode> get_leaf_node() const { return ref_leaf_node; }
f67539c2
TL
172 template <bool VALIDATE>
173 void update_track(Ref<LeafNode>, const search_position_t&);
20effc67
TL
174 void update_cache_same_node(const key_view_t&,
175 const value_header_t*) const;
176 void invalidate();
177
1e59de90
TL
178 static Ref<tree_cursor_t> create_inserted(
179 Ref<LeafNode> node, const search_position_t& pos) {
20effc67
TL
180 return new tree_cursor_t(node, pos);
181 }
182
1e59de90
TL
183 static Ref<tree_cursor_t> create_tracked(
184 Ref<LeafNode> node, const search_position_t& pos,
185 const key_view_t& key, const value_header_t* p_header) {
20effc67
TL
186 return new tree_cursor_t(node, pos, key, p_header);
187 }
188
189 static Ref<tree_cursor_t> create_end(Ref<LeafNode> node) {
190 return new tree_cursor_t(node);
191 }
f67539c2 192
f67539c2
TL
193 /**
194 * Reversed resource management (tree_cursor_t)
195 *
196 * tree_cursor_t holds a reference to the LeafNode, so the LeafNode will be
197 * alive as long as any of it's cursors is still referenced by user.
198 */
20effc67 199 Ref<LeafNode> ref_leaf_node;
f67539c2
TL
200 search_position_t position;
201
1e59de90
TL
202 // account 1 update even if there are multiple updates to the same value
203 bool is_mutated = false;
204
20effc67
TL
205 /** Cache
206 *
207 * Cached memory pointers or views which may be outdated due to
208 * extent copy-on-write or asynchronous leaf node updates.
209 */
210 class Cache {
211 public:
212 Cache(Ref<LeafNode>&);
213 void validate_is_latest(const search_position_t&) const;
214 void invalidate() { needs_update_all = true; }
215 void update_all(const node_version_t&, const key_view_t&, const value_header_t*);
216 const key_view_t& get_key_view(
217 value_magic_t magic, const search_position_t& pos) {
218 make_latest(magic, pos);
219 return *key_view;
220 }
221 const value_header_t* get_p_value_header(
222 value_magic_t magic, const search_position_t& pos) {
223 make_latest(magic, pos);
224 return p_value_header;
225 }
226 std::pair<NodeExtentMutable&, ValueDeltaRecorder*>
227 prepare_mutate_value_payload(context_t, const search_position_t&);
228
229 private:
230 void maybe_duplicate(const node_version_t&);
231 void make_latest(value_magic_t, const search_position_t&);
232
233 // metadata about how cache is valid
234 Ref<LeafNode>& ref_leaf_node;
235 bool needs_update_all = true;
236 node_version_t version;
237
238 // cached key value info
239 const char* p_node_base = nullptr;
240 std::optional<key_view_t> key_view;
241 const value_header_t* p_value_header = nullptr;
242
243 // cached data-structures to update value payload
244 std::optional<NodeExtentMutable> value_payload_mut;
245 ValueDeltaRecorder* p_value_recorder = nullptr;
246 };
247 mutable Cache cache;
f67539c2
TL
248
249 friend class LeafNode;
250 friend class Node; // get_position(), get_leaf_node()
251};
252
253/**
254 * Node
255 *
256 * An abstracted class for both InternalNode and LeafNode.
257 *
258 * Exposes public interfaces for Btree.
259 */
260class Node
261 : public boost::intrusive_ref_counter<
262 Node, boost::thread_unsafe_counter> {
263 public:
264 // public to Btree
f67539c2
TL
265 struct search_result_t {
266 bool is_end() const { return p_cursor->is_end(); }
267 Ref<tree_cursor_t> p_cursor;
268 match_stat_t mstat;
269
270 MatchKindBS match() const {
271 assert(mstat >= MSTAT_MIN && mstat <= MSTAT_MAX);
272 return (mstat == MSTAT_EQ ? MatchKindBS::EQ : MatchKindBS::NE);
273 }
20effc67
TL
274
275 void validate_input_key(const key_hobj_t& key, value_magic_t magic) const {
276#ifndef NDEBUG
277 if (match() == MatchKindBS::EQ) {
1e59de90 278 assert(key == p_cursor->get_key_view(magic));
20effc67
TL
279 } else {
280 assert(match() == MatchKindBS::NE);
281 if (p_cursor->is_tracked()) {
1e59de90 282 assert(key < p_cursor->get_key_view(magic));
20effc67
TL
283 } else if (p_cursor->is_end()) {
284 // good
285 } else {
286 assert(p_cursor->is_invalid());
287 ceph_abort("impossible");
288 }
289 }
290#endif
291 }
f67539c2
TL
292 };
293
294 virtual ~Node();
295 Node(const Node&) = delete;
296 Node(Node&&) = delete;
297 Node& operator=(const Node&) = delete;
298 Node& operator=(Node&&) = delete;
299
300 /**
301 * level
302 *
303 * A positive value denotes the level (or height) of this node in tree.
304 * 0 means LeafNode, positive means InternalNode.
305 */
306 level_t level() const;
307
308 /**
309 * lookup_smallest
310 *
311 * Returns a cursor pointing to the smallest key in the sub-tree formed by
312 * this node.
313 *
314 * Returns an end cursor if it is an empty root node.
315 */
20effc67 316 virtual eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) = 0;
f67539c2
TL
317
318 /**
319 * lookup_largest
320 *
321 * Returns a cursor pointing to the largest key in the sub-tree formed by
322 * this node.
323 *
324 * Returns an end cursor if it is an empty root node.
325 */
20effc67 326 virtual eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) = 0;
f67539c2
TL
327
328 /**
329 * lower_bound
330 *
331 * Returns a cursor pointing to the first element in the range [first, last)
332 * of the sub-tree which does not compare less than the input key. The
333 * result also denotes whether the pointed key is equal to the input key.
334 *
335 * Returns an end cursor with MatchKindBS::NE if:
336 * - It is an empty root node;
337 * - Or the input key is larger than all the keys in the sub-tree;
338 */
20effc67 339 eagain_ifuture<search_result_t> lower_bound(context_t c, const key_hobj_t& key);
f67539c2
TL
340
341 /**
342 * insert
343 *
344 * Try to insert a key-value pair into the sub-tree formed by this node.
345 *
346 * Returns a boolean denoting whether the insertion is successful:
347 * - If true, the returned cursor points to the inserted element in tree;
348 * - If false, the returned cursor points to the conflicting element in tree;
349 */
20effc67
TL
350 eagain_ifuture<std::pair<Ref<tree_cursor_t>, bool>> insert(
351 context_t, const key_hobj_t&, value_config_t, Ref<Node>&&);
352
353 /**
354 * erase
355 *
356 * Removes a key-value pair from the sub-tree formed by this node.
357 *
358 * Returns the number of erased key-value pairs (0 or 1).
359 */
360 eagain_ifuture<std::size_t> erase(context_t, const key_hobj_t&, Ref<Node>&&);
f67539c2
TL
361
362 /// Recursively collects the statistics of the sub-tree formed by this node
20effc67 363 eagain_ifuture<tree_stats_t> get_tree_stats(context_t);
f67539c2
TL
364
365 /// Returns an ostream containing a dump of all the elements in the node.
366 std::ostream& dump(std::ostream&) const;
367
368 /// Returns an ostream containing an one-line summary of this node.
369 std::ostream& dump_brief(std::ostream&) const;
370
20effc67
TL
371 /// Print the node name
372 const std::string& get_name() const;
373
f67539c2 374 /// Initializes the tree by allocating an empty root node.
20effc67 375 static eagain_ifuture<> mkfs(context_t, RootNodeTracker&);
f67539c2
TL
376
377 /// Loads the tree root. The tree must be initialized.
20effc67 378 static eagain_ifuture<Ref<Node>> load_root(context_t, RootNodeTracker&);
f67539c2
TL
379
380 // Only for unit test purposes.
381 void test_make_destructable(context_t, NodeExtentMutable&, Super::URef&&);
20effc67 382 virtual eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const = 0;
f67539c2
TL
383
384 protected:
20effc67 385 virtual eagain_ifuture<> test_clone_non_root(context_t, Ref<InternalNode>) const {
f67539c2
TL
386 ceph_abort("impossible path");
387 }
20effc67 388 virtual eagain_ifuture<search_result_t> lower_bound_tracked(
f67539c2 389 context_t, const key_hobj_t&, MatchHistory&) = 0;
20effc67
TL
390 virtual eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) = 0;
391
392 virtual bool is_tracking() const = 0;
393
394 virtual void track_merge(Ref<Node>, match_stage_t, search_position_t&) = 0;
f67539c2
TL
395
396 protected:
397 Node(NodeImplURef&&);
20effc67
TL
398
399 bool is_tracked() const {
400 assert(!(super && _parent_info.has_value()));
401 return (super || _parent_info.has_value());
402 }
403
f67539c2 404 bool is_root() const {
20effc67 405 assert(is_tracked());
f67539c2
TL
406 return !_parent_info.has_value();
407 }
408
409 // as root
410 void make_root(context_t c, Super::URef&& _super);
411 void make_root_new(context_t c, Super::URef&& _super) {
412 assert(_super->get_root_laddr() == L_ADDR_NULL);
413 make_root(c, std::move(_super));
414 }
415 void make_root_from(context_t c, Super::URef&& _super, laddr_t from_addr) {
416 assert(_super->get_root_laddr() == from_addr);
417 make_root(c, std::move(_super));
418 }
419 void as_root(Super::URef&& _super);
20effc67
TL
420 eagain_ifuture<> upgrade_root(context_t, laddr_t);
421
422 Super::URef deref_super();
f67539c2
TL
423
424 // as child/non-root
425 template <bool VALIDATE = true>
426 void as_child(const search_position_t&, Ref<InternalNode>);
20effc67 427
f67539c2
TL
428 struct parent_info_t {
429 search_position_t position;
430 Ref<InternalNode> ptr;
431 };
432 const parent_info_t& parent_info() const { return *_parent_info; }
20effc67
TL
433
434 Ref<InternalNode> deref_parent();
435
436 eagain_ifuture<> apply_split_to_parent(context_t, Ref<Node>&&, Ref<Node>&&, bool);
437 eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor_from_parent(context_t);
438 template <bool FORCE_MERGE = false>
439 eagain_ifuture<> try_merge_adjacent(context_t, bool, Ref<Node>&&);
440 eagain_ifuture<> erase_node(context_t, Ref<Node>&&);
441 template <bool FORCE_MERGE = false>
442 eagain_ifuture<> fix_parent_index(context_t, Ref<Node>&&, bool);
443 eagain_ifuture<NodeExtentMutable> rebuild_extent(context_t);
444 eagain_ifuture<> retire(context_t, Ref<Node>&&);
445 void make_tail(context_t);
f67539c2
TL
446
447 private:
448 /**
449 * Reversed resource management (Node)
450 *
451 * Root Node holds a reference to its parent Super class, so its parent
452 * will be alive as long as this root node is alive.
453 *
454 * None-root Node holds a reference to its parent Node, so its parent will
455 * be alive as long as any of it's children is alive.
456 */
457 // as root
458 Super::URef super;
459 // as child/non-root
460 std::optional<parent_info_t> _parent_info;
461
462 private:
20effc67 463 static eagain_ifuture<Ref<Node>> load(context_t, laddr_t, bool expect_is_level_tail);
f67539c2
TL
464
465 NodeImplURef impl;
466 friend class InternalNode;
467};
468inline std::ostream& operator<<(std::ostream& os, const Node& node) {
469 return node.dump_brief(os);
470}
471
472/**
473 * InternalNode
474 *
475 * A concrete implementation of Node class that represents an internal tree
476 * node. Its level is always positive and its values are logical block
477 * addresses to its child nodes. An internal node cannot be empty.
478 */
479class InternalNode final : public Node {
480 public:
481 // public to Node
482 InternalNode(InternalNodeImpl*, NodeImplURef&&);
483 ~InternalNode() override { assert(tracked_child_nodes.empty()); }
484 InternalNode(const InternalNode&) = delete;
485 InternalNode(InternalNode&&) = delete;
486 InternalNode& operator=(const InternalNode&) = delete;
487 InternalNode& operator=(InternalNode&&) = delete;
488
20effc67
TL
489 eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor(context_t, const search_position_t&);
490
491 eagain_ifuture<> apply_child_split(context_t, Ref<Node>&& left, Ref<Node>&& right, bool);
492
f67539c2
TL
493 template <bool VALIDATE>
494 void do_track_child(Node& child) {
495 if constexpr (VALIDATE) {
496 validate_child(child);
497 }
498 auto& child_pos = child.parent_info().position;
499 assert(tracked_child_nodes.find(child_pos) == tracked_child_nodes.end());
500 tracked_child_nodes[child_pos] = &child;
501 }
20effc67 502
f67539c2 503 void do_untrack_child(const Node& child) {
20effc67 504 assert(check_is_tracking(child));
f67539c2 505 auto& child_pos = child.parent_info().position;
f67539c2
TL
506 [[maybe_unused]] auto removed = tracked_child_nodes.erase(child_pos);
507 assert(removed);
508 }
509
20effc67
TL
510 bool check_is_tracking(const Node& child) const {
511 auto& child_pos = child.parent_info().position;
512 auto found = tracked_child_nodes.find(child_pos);
513 if (found != tracked_child_nodes.end() && found->second == &child) {
514 assert(child.parent_info().ptr == this);
515 return true;
516 } else {
517 return false;
518 }
519 }
f67539c2 520
20effc67
TL
521 eagain_ifuture<std::pair<Ref<Node>, Ref<Node>>> get_child_peers(
522 context_t, const search_position_t&);
f67539c2 523
20effc67
TL
524 eagain_ifuture<> erase_child(context_t, Ref<Node>&&);
525
526 template <bool FORCE_MERGE = false>
527 eagain_ifuture<> fix_index(context_t, Ref<Node>&&, bool);
528
529 template <bool FORCE_MERGE = false>
530 eagain_ifuture<> apply_children_merge(
531 context_t, Ref<Node>&& left, laddr_t, Ref<Node>&& right, bool update_index);
532
533 void validate_child_tracked(const Node& child) const {
534 validate_child(child);
535 assert(tracked_child_nodes.find(child.parent_info().position) !=
536 tracked_child_nodes.end());
537 assert(tracked_child_nodes.find(child.parent_info().position)->second == &child);
538 }
539
540 void validate_child_inconsistent(const Node& child) const;
f67539c2 541
f67539c2
TL
542 void validate_tracked_children() const {
543#ifndef NDEBUG
544 for (auto& kv : tracked_child_nodes) {
545 assert(kv.first == kv.second->parent_info().position);
546 validate_child(*kv.second);
547 }
548#endif
549 }
20effc67
TL
550
551 void track_make_tail(const search_position_t&);
552
553 static eagain_ifuture<Ref<InternalNode>> allocate_root(
554 context_t, laddr_t, level_t, laddr_t, Super::URef&&);
555
556 protected:
557 eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override;
558 eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override;
559 eagain_ifuture<search_result_t> lower_bound_tracked(
560 context_t, const key_hobj_t&, MatchHistory&) override;
561 eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override;
562 bool is_tracking() const override {
563 return !tracked_child_nodes.empty();
564 }
565 void track_merge(Ref<Node>, match_stage_t, search_position_t&) override;
566
567 eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override;
568
569 private:
570 eagain_ifuture<> try_downgrade_root(context_t, Ref<Node>&&);
571
572 eagain_ifuture<Ref<InternalNode>> insert_or_split(
573 context_t, const search_position_t&, const key_view_t&, Ref<Node>,
574 Ref<Node> outdated_child=nullptr);
575
576 // XXX: extract a common tracker for InternalNode to track Node,
577 // and LeafNode to track tree_cursor_t.
578 eagain_ifuture<Ref<Node>> get_or_track_child(context_t, const search_position_t&, laddr_t);
579 template <bool VALIDATE = true>
580 void track_insert(
581 const search_position_t&, match_stage_t, Ref<Node>, Ref<Node> nxt_child = nullptr);
582 void replace_track(Ref<Node> new_child, Ref<Node> old_child, bool);
583 void track_split(const search_position_t&, Ref<InternalNode>);
584 template <bool VALIDATE = true>
585 void track_erase(const search_position_t&, match_stage_t);
f67539c2
TL
586 void validate_child(const Node& child) const;
587
588 struct fresh_node_t {
589 Ref<InternalNode> node;
590 NodeExtentMutable mut;
591 std::pair<Ref<Node>, NodeExtentMutable> make_pair() {
592 return std::make_pair(Ref<Node>(node), mut);
593 }
594 };
20effc67 595 static eagain_ifuture<fresh_node_t> allocate(context_t, laddr_t, field_type_t, bool, level_t);
f67539c2
TL
596
597 private:
598 /**
599 * Reversed resource management (InternalNode)
600 *
601 * InteralNode keeps track of its child nodes which are still alive in
602 * memory, and their positions will be updated throughout
603 * insert/split/delete/merge operations of this node.
604 */
605 // XXX: leverage intrusive data structure to control memory overhead
606 std::map<search_position_t, Node*> tracked_child_nodes;
607 InternalNodeImpl* impl;
608};
609
610/**
611 * LeafNode
612 *
613 * A concrete implementation of Node class that represents a leaf tree node.
614 * Its level is always 0. A leaf node can only be empty if it is root.
615 */
616class LeafNode final : public Node {
617 public:
618 // public to tree_cursor_t
619 ~LeafNode() override { assert(tracked_cursors.empty()); }
620 LeafNode(const LeafNode&) = delete;
621 LeafNode(LeafNode&&) = delete;
622 LeafNode& operator=(const LeafNode&) = delete;
623 LeafNode& operator=(LeafNode&&) = delete;
624
625 bool is_level_tail() const;
20effc67
TL
626 node_version_t get_version() const;
627 const char* read() const;
628 extent_len_t get_node_size() const;
629 std::tuple<key_view_t, const value_header_t*> get_kv(const search_position_t&) const;
630 eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor(context_t, const search_position_t&);
631
632 /**
633 * erase
634 *
635 * Removes a key-value pair from the position.
636 *
637 * If get_next is true, returns the cursor pointing to the next key-value
638 * pair that followed the erased element, which can be nullptr if is end.
639 */
640 template <bool FORCE_MERGE>
641 eagain_ifuture<Ref<tree_cursor_t>> erase(
642 context_t, const search_position_t&, bool get_next);
643
f67539c2
TL
644 template <bool VALIDATE>
645 void do_track_cursor(tree_cursor_t& cursor) {
646 if constexpr (VALIDATE) {
647 validate_cursor(cursor);
648 }
649 auto& cursor_pos = cursor.get_position();
650 assert(tracked_cursors.find(cursor_pos) == tracked_cursors.end());
20effc67 651 tracked_cursors.emplace(cursor_pos, &cursor);
f67539c2 652 }
20effc67 653 void do_untrack_cursor(const tree_cursor_t& cursor) {
f67539c2
TL
654 validate_cursor(cursor);
655 auto& cursor_pos = cursor.get_position();
20effc67 656 assert(check_is_tracking(cursor));
f67539c2
TL
657 [[maybe_unused]] auto removed = tracked_cursors.erase(cursor_pos);
658 assert(removed);
659 }
20effc67
TL
660 bool check_is_tracking(const tree_cursor_t& cursor) const {
661 auto& cursor_pos = cursor.get_position();
662 auto found = tracked_cursors.find(cursor_pos);
663 if (found != tracked_cursors.end() && found->second == &cursor) {
664 assert(cursor.ref_leaf_node == this);
665 return true;
666 } else {
667 return false;
668 }
669 }
670
671 eagain_ifuture<> extend_value(context_t, const search_position_t&, value_size_t);
672 eagain_ifuture<> trim_value(context_t, const search_position_t&, value_size_t);
673
674 std::pair<NodeExtentMutable&, ValueDeltaRecorder*>
675 prepare_mutate_value_payload(context_t);
f67539c2
TL
676
677 protected:
20effc67
TL
678 eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override;
679 eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override;
680 eagain_ifuture<search_result_t> lower_bound_tracked(
f67539c2 681 context_t, const key_hobj_t&, MatchHistory&) override;
20effc67
TL
682 eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override;
683 bool is_tracking() const override {
684 return !tracked_cursors.empty();
685 }
686 void track_merge(Ref<Node>, match_stage_t, search_position_t&) override;
f67539c2 687
20effc67 688 eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override;
f67539c2
TL
689
690 private:
691 LeafNode(LeafNodeImpl*, NodeImplURef&&);
20effc67
TL
692 eagain_ifuture<Ref<tree_cursor_t>> insert_value(
693 context_t, const key_hobj_t&, value_config_t,
f67539c2
TL
694 const search_position_t&, const MatchHistory&,
695 match_stat_t mstat);
20effc67 696 static eagain_ifuture<Ref<LeafNode>> allocate_root(context_t, RootNodeTracker&);
f67539c2
TL
697 friend class Node;
698
699 private:
700 // XXX: extract a common tracker for InternalNode to track Node,
701 // and LeafNode to track tree_cursor_t.
702 Ref<tree_cursor_t> get_or_track_cursor(
20effc67 703 const search_position_t&, const key_view_t&, const value_header_t*);
f67539c2 704 Ref<tree_cursor_t> track_insert(
20effc67 705 const search_position_t&, match_stage_t, const value_header_t*);
f67539c2 706 void track_split(const search_position_t&, Ref<LeafNode>);
20effc67 707 void track_erase(const search_position_t&, match_stage_t);
f67539c2
TL
708 void validate_tracked_cursors() const {
709#ifndef NDEBUG
710 for (auto& kv : tracked_cursors) {
711 assert(kv.first == kv.second->get_position());
712 validate_cursor(*kv.second);
713 }
714#endif
715 }
20effc67 716 void validate_cursor(const tree_cursor_t& cursor) const;
f67539c2
TL
717 // invalidate p_value pointers in tree_cursor_t
718 void on_layout_change() { ++layout_version; }
719
720 struct fresh_node_t {
721 Ref<LeafNode> node;
722 NodeExtentMutable mut;
723 std::pair<Ref<Node>, NodeExtentMutable> make_pair() {
724 return std::make_pair(Ref<Node>(node), mut);
725 }
726 };
20effc67 727 static eagain_ifuture<fresh_node_t> allocate(context_t, laddr_t, field_type_t, bool);
f67539c2
TL
728
729 private:
730 /**
731 * Reversed resource management (LeafNode)
732 *
733 * LeafNode keeps track of the referencing cursors which are still alive in
734 * memory, and their positions will be updated throughout
735 * insert/split/delete/merge operations of this node.
736 */
737 // XXX: leverage intrusive data structure to control memory overhead
738 std::map<search_position_t, tree_cursor_t*> tracked_cursors;
739 LeafNodeImpl* impl;
740 layout_version_t layout_version = 0;
741};
742
743}