]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
1e59de90 | 6 | #include <compare> |
f67539c2 TL |
7 | #include <map> |
8 | #include <memory> | |
9 | #include <ostream> | |
10 | #include <boost/smart_ptr/intrusive_ref_counter.hpp> | |
11 | ||
12 | #include "crimson/common/type_helpers.h" | |
13 | ||
14 | #include "node_extent_mutable.h" | |
15 | #include "stages/key_layout.h" | |
16 | #include "stages/stage_types.h" | |
17 | #include "super.h" | |
20effc67 | 18 | #include "value.h" |
f67539c2 TL |
19 | |
20 | /** | |
21 | * Tree example (2 levels): | |
22 | * | |
23 | * Root node keys: [ 3 7 ] | |
24 | * values: [p1 p2 p3] | |
25 | * / | \ | |
26 | * ------- | ------- | |
27 | * | | | | |
28 | * V V V | |
29 | * Leaf node keys: [ 1 2 3] [ 4 5 7] [ 9 11 12] | |
30 | * values: [v1 v2 v3] [v4 v5 v6] [v7 v8 v9] | |
31 | * | |
32 | * Tree structure properties: | |
33 | * - As illustrated above, the parent key is strictly equal to its left child's | |
34 | * largest key; | |
35 | * - If a tree is indexing multiple seastore transactions, each transaction | |
36 | * will be mapped to a Super which points to a distinct root node. So the | |
37 | * transactions are isolated at tree level. However, tree nodes from | |
38 | * different transactions can reference the same seastore CachedExtent before | |
39 | * modification; | |
40 | * - The resources of the transactional tree are tracked by tree_cursor_ts held | |
41 | * by users. As long as any cursor is alive, the according tree hierarchy is | |
42 | * alive and keeps tracked. See the reversed resource management sections | |
43 | * below; | |
44 | */ | |
45 | ||
46 | namespace crimson::os::seastore::onode { | |
47 | ||
48 | class LeafNode; | |
49 | class InternalNode; | |
50 | ||
20effc67 TL |
51 | using layout_version_t = uint32_t; |
52 | struct node_version_t { | |
53 | layout_version_t layout; | |
54 | nextent_state_t state; | |
55 | ||
56 | bool operator==(const node_version_t& rhs) const { | |
57 | return (layout == rhs.layout && state == rhs.state); | |
58 | } | |
59 | bool operator!=(const node_version_t& rhs) const { | |
60 | return !(*this == rhs); | |
61 | } | |
62 | }; | |
63 | ||
f67539c2 TL |
64 | /** |
65 | * tree_cursor_t | |
66 | * | |
67 | * A cursor points to a position (LeafNode and search_position_t) of the tree | |
68 | * where it can find the according key and value pair. The position is updated | |
69 | * by LeafNode insert/split/delete/merge internally and is kept valid. It also | |
70 | * caches the key-value information for a specific node layout version. | |
71 | * | |
72 | * Exposes public interfaces for Btree::Cursor. | |
73 | */ | |
f67539c2 TL |
74 | class tree_cursor_t final |
75 | : public boost::intrusive_ref_counter< | |
76 | tree_cursor_t, boost::thread_unsafe_counter> { | |
77 | public: | |
f67539c2 TL |
78 | ~tree_cursor_t(); |
79 | tree_cursor_t(const tree_cursor_t&) = delete; | |
80 | tree_cursor_t(tree_cursor_t&&) = delete; | |
81 | tree_cursor_t& operator=(const tree_cursor_t&) = delete; | |
82 | tree_cursor_t& operator=(tree_cursor_t&&) = delete; | |
83 | ||
20effc67 TL |
84 | // public to Btree |
85 | ||
f67539c2 TL |
86 | /** |
87 | * is_end | |
88 | * | |
89 | * Represents one-past-the-last of all the sorted key-value | |
90 | * pairs in the tree. An end cursor won't contain valid key-value | |
91 | * information. | |
92 | */ | |
20effc67 TL |
93 | bool is_end() const { return !!ref_leaf_node && position.is_end(); } |
94 | ||
95 | /** | |
96 | * is_tracked | |
97 | * | |
98 | * Represents a key-value pair stored in the tree, which is always tracked | |
99 | * across insert/split/erase/merge operations. | |
100 | */ | |
101 | bool is_tracked() const { return !!ref_leaf_node && !position.is_end(); } | |
102 | ||
103 | /** | |
104 | * is_invalid | |
105 | * | |
106 | * Represents an invalid cursor which was once valid and tracked by the tree | |
107 | * but is now erased and untracked. User may still hold an invalid cursor. | |
108 | */ | |
109 | bool is_invalid() const { return !ref_leaf_node; } | |
f67539c2 TL |
110 | |
111 | /// Returns the key view in tree if it is not an end cursor. | |
20effc67 TL |
112 | const key_view_t& get_key_view(value_magic_t magic) const { |
113 | assert(is_tracked()); | |
114 | return cache.get_key_view(magic, position); | |
115 | } | |
116 | ||
117 | /// Returns the next tree_cursor_t in tree, can be end if there's no next. | |
118 | eagain_ifuture<Ref<tree_cursor_t>> get_next(context_t); | |
119 | ||
120 | /// Check that this is next to prv | |
121 | void assert_next_to(const tree_cursor_t&, value_magic_t) const; | |
122 | ||
123 | /// Erases the key-value pair from tree. | |
124 | template <bool FORCE_MERGE = false> | |
125 | eagain_ifuture<Ref<tree_cursor_t>> erase(context_t, bool get_next); | |
126 | ||
1e59de90 | 127 | std::strong_ordering compare_to(const tree_cursor_t&, value_magic_t) const; |
20effc67 TL |
128 | |
129 | // public to Value | |
130 | ||
131 | /// Get the latest value_header_t pointer for read. | |
132 | const value_header_t* read_value_header(value_magic_t magic) const { | |
133 | assert(is_tracked()); | |
134 | return cache.get_p_value_header(magic, position); | |
135 | } | |
136 | ||
137 | /// Prepare the node extent to be mutable and recorded. | |
138 | std::pair<NodeExtentMutable&, ValueDeltaRecorder*> | |
139 | prepare_mutate_value_payload(context_t c) { | |
140 | assert(is_tracked()); | |
1e59de90 TL |
141 | if (!is_mutated) { |
142 | is_mutated = true; | |
143 | ++(c.t.get_onode_tree_stats().num_updates); | |
144 | } | |
20effc67 TL |
145 | return cache.prepare_mutate_value_payload(c, position); |
146 | } | |
147 | ||
148 | /// Extends the size of value payload. | |
149 | eagain_ifuture<> extend_value(context_t, value_size_t); | |
f67539c2 | 150 | |
20effc67 TL |
151 | /// Trim and shrink the value payload. |
152 | eagain_ifuture<> trim_value(context_t, value_size_t); | |
153 | ||
154 | static Ref<tree_cursor_t> get_invalid() { | |
1e59de90 | 155 | Ref<tree_cursor_t> INVALID = new tree_cursor_t(); |
20effc67 TL |
156 | return INVALID; |
157 | } | |
f67539c2 TL |
158 | |
159 | private: | |
1e59de90 | 160 | // create from insert |
f67539c2 | 161 | tree_cursor_t(Ref<LeafNode>, const search_position_t&); |
1e59de90 | 162 | // create from lookup |
f67539c2 | 163 | tree_cursor_t(Ref<LeafNode>, const search_position_t&, |
20effc67 | 164 | const key_view_t&, const value_header_t*); |
f67539c2 TL |
165 | // lookup reaches the end, contain leaf node for further insert |
166 | tree_cursor_t(Ref<LeafNode>); | |
20effc67 TL |
167 | // create an invalid tree_cursor_t |
168 | tree_cursor_t() : cache{ref_leaf_node} {} | |
169 | ||
f67539c2 | 170 | const search_position_t& get_position() const { return position; } |
20effc67 | 171 | Ref<LeafNode> get_leaf_node() const { return ref_leaf_node; } |
f67539c2 TL |
172 | template <bool VALIDATE> |
173 | void update_track(Ref<LeafNode>, const search_position_t&); | |
20effc67 TL |
174 | void update_cache_same_node(const key_view_t&, |
175 | const value_header_t*) const; | |
176 | void invalidate(); | |
177 | ||
1e59de90 TL |
178 | static Ref<tree_cursor_t> create_inserted( |
179 | Ref<LeafNode> node, const search_position_t& pos) { | |
20effc67 TL |
180 | return new tree_cursor_t(node, pos); |
181 | } | |
182 | ||
1e59de90 TL |
183 | static Ref<tree_cursor_t> create_tracked( |
184 | Ref<LeafNode> node, const search_position_t& pos, | |
185 | const key_view_t& key, const value_header_t* p_header) { | |
20effc67 TL |
186 | return new tree_cursor_t(node, pos, key, p_header); |
187 | } | |
188 | ||
189 | static Ref<tree_cursor_t> create_end(Ref<LeafNode> node) { | |
190 | return new tree_cursor_t(node); | |
191 | } | |
f67539c2 | 192 | |
f67539c2 TL |
193 | /** |
194 | * Reversed resource management (tree_cursor_t) | |
195 | * | |
196 | * tree_cursor_t holds a reference to the LeafNode, so the LeafNode will be | |
197 | * alive as long as any of it's cursors is still referenced by user. | |
198 | */ | |
20effc67 | 199 | Ref<LeafNode> ref_leaf_node; |
f67539c2 TL |
200 | search_position_t position; |
201 | ||
1e59de90 TL |
202 | // account 1 update even if there are multiple updates to the same value |
203 | bool is_mutated = false; | |
204 | ||
20effc67 TL |
205 | /** Cache |
206 | * | |
207 | * Cached memory pointers or views which may be outdated due to | |
208 | * extent copy-on-write or asynchronous leaf node updates. | |
209 | */ | |
210 | class Cache { | |
211 | public: | |
212 | Cache(Ref<LeafNode>&); | |
213 | void validate_is_latest(const search_position_t&) const; | |
214 | void invalidate() { needs_update_all = true; } | |
215 | void update_all(const node_version_t&, const key_view_t&, const value_header_t*); | |
216 | const key_view_t& get_key_view( | |
217 | value_magic_t magic, const search_position_t& pos) { | |
218 | make_latest(magic, pos); | |
219 | return *key_view; | |
220 | } | |
221 | const value_header_t* get_p_value_header( | |
222 | value_magic_t magic, const search_position_t& pos) { | |
223 | make_latest(magic, pos); | |
224 | return p_value_header; | |
225 | } | |
226 | std::pair<NodeExtentMutable&, ValueDeltaRecorder*> | |
227 | prepare_mutate_value_payload(context_t, const search_position_t&); | |
228 | ||
229 | private: | |
230 | void maybe_duplicate(const node_version_t&); | |
231 | void make_latest(value_magic_t, const search_position_t&); | |
232 | ||
233 | // metadata about how cache is valid | |
234 | Ref<LeafNode>& ref_leaf_node; | |
235 | bool needs_update_all = true; | |
236 | node_version_t version; | |
237 | ||
238 | // cached key value info | |
239 | const char* p_node_base = nullptr; | |
240 | std::optional<key_view_t> key_view; | |
241 | const value_header_t* p_value_header = nullptr; | |
242 | ||
243 | // cached data-structures to update value payload | |
244 | std::optional<NodeExtentMutable> value_payload_mut; | |
245 | ValueDeltaRecorder* p_value_recorder = nullptr; | |
246 | }; | |
247 | mutable Cache cache; | |
f67539c2 TL |
248 | |
249 | friend class LeafNode; | |
250 | friend class Node; // get_position(), get_leaf_node() | |
251 | }; | |
252 | ||
253 | /** | |
254 | * Node | |
255 | * | |
256 | * An abstracted class for both InternalNode and LeafNode. | |
257 | * | |
258 | * Exposes public interfaces for Btree. | |
259 | */ | |
260 | class Node | |
261 | : public boost::intrusive_ref_counter< | |
262 | Node, boost::thread_unsafe_counter> { | |
263 | public: | |
264 | // public to Btree | |
f67539c2 TL |
265 | struct search_result_t { |
266 | bool is_end() const { return p_cursor->is_end(); } | |
267 | Ref<tree_cursor_t> p_cursor; | |
268 | match_stat_t mstat; | |
269 | ||
270 | MatchKindBS match() const { | |
271 | assert(mstat >= MSTAT_MIN && mstat <= MSTAT_MAX); | |
272 | return (mstat == MSTAT_EQ ? MatchKindBS::EQ : MatchKindBS::NE); | |
273 | } | |
20effc67 TL |
274 | |
275 | void validate_input_key(const key_hobj_t& key, value_magic_t magic) const { | |
276 | #ifndef NDEBUG | |
277 | if (match() == MatchKindBS::EQ) { | |
1e59de90 | 278 | assert(key == p_cursor->get_key_view(magic)); |
20effc67 TL |
279 | } else { |
280 | assert(match() == MatchKindBS::NE); | |
281 | if (p_cursor->is_tracked()) { | |
1e59de90 | 282 | assert(key < p_cursor->get_key_view(magic)); |
20effc67 TL |
283 | } else if (p_cursor->is_end()) { |
284 | // good | |
285 | } else { | |
286 | assert(p_cursor->is_invalid()); | |
287 | ceph_abort("impossible"); | |
288 | } | |
289 | } | |
290 | #endif | |
291 | } | |
f67539c2 TL |
292 | }; |
293 | ||
294 | virtual ~Node(); | |
295 | Node(const Node&) = delete; | |
296 | Node(Node&&) = delete; | |
297 | Node& operator=(const Node&) = delete; | |
298 | Node& operator=(Node&&) = delete; | |
299 | ||
300 | /** | |
301 | * level | |
302 | * | |
303 | * A positive value denotes the level (or height) of this node in tree. | |
304 | * 0 means LeafNode, positive means InternalNode. | |
305 | */ | |
306 | level_t level() const; | |
307 | ||
308 | /** | |
309 | * lookup_smallest | |
310 | * | |
311 | * Returns a cursor pointing to the smallest key in the sub-tree formed by | |
312 | * this node. | |
313 | * | |
314 | * Returns an end cursor if it is an empty root node. | |
315 | */ | |
20effc67 | 316 | virtual eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) = 0; |
f67539c2 TL |
317 | |
318 | /** | |
319 | * lookup_largest | |
320 | * | |
321 | * Returns a cursor pointing to the largest key in the sub-tree formed by | |
322 | * this node. | |
323 | * | |
324 | * Returns an end cursor if it is an empty root node. | |
325 | */ | |
20effc67 | 326 | virtual eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) = 0; |
f67539c2 TL |
327 | |
328 | /** | |
329 | * lower_bound | |
330 | * | |
331 | * Returns a cursor pointing to the first element in the range [first, last) | |
332 | * of the sub-tree which does not compare less than the input key. The | |
333 | * result also denotes whether the pointed key is equal to the input key. | |
334 | * | |
335 | * Returns an end cursor with MatchKindBS::NE if: | |
336 | * - It is an empty root node; | |
337 | * - Or the input key is larger than all the keys in the sub-tree; | |
338 | */ | |
20effc67 | 339 | eagain_ifuture<search_result_t> lower_bound(context_t c, const key_hobj_t& key); |
f67539c2 TL |
340 | |
341 | /** | |
342 | * insert | |
343 | * | |
344 | * Try to insert a key-value pair into the sub-tree formed by this node. | |
345 | * | |
346 | * Returns a boolean denoting whether the insertion is successful: | |
347 | * - If true, the returned cursor points to the inserted element in tree; | |
348 | * - If false, the returned cursor points to the conflicting element in tree; | |
349 | */ | |
20effc67 TL |
350 | eagain_ifuture<std::pair<Ref<tree_cursor_t>, bool>> insert( |
351 | context_t, const key_hobj_t&, value_config_t, Ref<Node>&&); | |
352 | ||
353 | /** | |
354 | * erase | |
355 | * | |
356 | * Removes a key-value pair from the sub-tree formed by this node. | |
357 | * | |
358 | * Returns the number of erased key-value pairs (0 or 1). | |
359 | */ | |
360 | eagain_ifuture<std::size_t> erase(context_t, const key_hobj_t&, Ref<Node>&&); | |
f67539c2 TL |
361 | |
362 | /// Recursively collects the statistics of the sub-tree formed by this node | |
20effc67 | 363 | eagain_ifuture<tree_stats_t> get_tree_stats(context_t); |
f67539c2 TL |
364 | |
365 | /// Returns an ostream containing a dump of all the elements in the node. | |
366 | std::ostream& dump(std::ostream&) const; | |
367 | ||
368 | /// Returns an ostream containing an one-line summary of this node. | |
369 | std::ostream& dump_brief(std::ostream&) const; | |
370 | ||
20effc67 TL |
371 | /// Print the node name |
372 | const std::string& get_name() const; | |
373 | ||
f67539c2 | 374 | /// Initializes the tree by allocating an empty root node. |
20effc67 | 375 | static eagain_ifuture<> mkfs(context_t, RootNodeTracker&); |
f67539c2 TL |
376 | |
377 | /// Loads the tree root. The tree must be initialized. | |
20effc67 | 378 | static eagain_ifuture<Ref<Node>> load_root(context_t, RootNodeTracker&); |
f67539c2 TL |
379 | |
380 | // Only for unit test purposes. | |
381 | void test_make_destructable(context_t, NodeExtentMutable&, Super::URef&&); | |
20effc67 | 382 | virtual eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const = 0; |
f67539c2 TL |
383 | |
384 | protected: | |
20effc67 | 385 | virtual eagain_ifuture<> test_clone_non_root(context_t, Ref<InternalNode>) const { |
f67539c2 TL |
386 | ceph_abort("impossible path"); |
387 | } | |
20effc67 | 388 | virtual eagain_ifuture<search_result_t> lower_bound_tracked( |
f67539c2 | 389 | context_t, const key_hobj_t&, MatchHistory&) = 0; |
20effc67 TL |
390 | virtual eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) = 0; |
391 | ||
392 | virtual bool is_tracking() const = 0; | |
393 | ||
394 | virtual void track_merge(Ref<Node>, match_stage_t, search_position_t&) = 0; | |
f67539c2 TL |
395 | |
396 | protected: | |
397 | Node(NodeImplURef&&); | |
20effc67 TL |
398 | |
399 | bool is_tracked() const { | |
400 | assert(!(super && _parent_info.has_value())); | |
401 | return (super || _parent_info.has_value()); | |
402 | } | |
403 | ||
f67539c2 | 404 | bool is_root() const { |
20effc67 | 405 | assert(is_tracked()); |
f67539c2 TL |
406 | return !_parent_info.has_value(); |
407 | } | |
408 | ||
409 | // as root | |
410 | void make_root(context_t c, Super::URef&& _super); | |
411 | void make_root_new(context_t c, Super::URef&& _super) { | |
412 | assert(_super->get_root_laddr() == L_ADDR_NULL); | |
413 | make_root(c, std::move(_super)); | |
414 | } | |
415 | void make_root_from(context_t c, Super::URef&& _super, laddr_t from_addr) { | |
416 | assert(_super->get_root_laddr() == from_addr); | |
417 | make_root(c, std::move(_super)); | |
418 | } | |
419 | void as_root(Super::URef&& _super); | |
20effc67 TL |
420 | eagain_ifuture<> upgrade_root(context_t, laddr_t); |
421 | ||
422 | Super::URef deref_super(); | |
f67539c2 TL |
423 | |
424 | // as child/non-root | |
425 | template <bool VALIDATE = true> | |
426 | void as_child(const search_position_t&, Ref<InternalNode>); | |
20effc67 | 427 | |
f67539c2 TL |
428 | struct parent_info_t { |
429 | search_position_t position; | |
430 | Ref<InternalNode> ptr; | |
431 | }; | |
432 | const parent_info_t& parent_info() const { return *_parent_info; } | |
20effc67 TL |
433 | |
434 | Ref<InternalNode> deref_parent(); | |
435 | ||
436 | eagain_ifuture<> apply_split_to_parent(context_t, Ref<Node>&&, Ref<Node>&&, bool); | |
437 | eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor_from_parent(context_t); | |
438 | template <bool FORCE_MERGE = false> | |
439 | eagain_ifuture<> try_merge_adjacent(context_t, bool, Ref<Node>&&); | |
440 | eagain_ifuture<> erase_node(context_t, Ref<Node>&&); | |
441 | template <bool FORCE_MERGE = false> | |
442 | eagain_ifuture<> fix_parent_index(context_t, Ref<Node>&&, bool); | |
443 | eagain_ifuture<NodeExtentMutable> rebuild_extent(context_t); | |
444 | eagain_ifuture<> retire(context_t, Ref<Node>&&); | |
445 | void make_tail(context_t); | |
f67539c2 TL |
446 | |
447 | private: | |
448 | /** | |
449 | * Reversed resource management (Node) | |
450 | * | |
451 | * Root Node holds a reference to its parent Super class, so its parent | |
452 | * will be alive as long as this root node is alive. | |
453 | * | |
454 | * None-root Node holds a reference to its parent Node, so its parent will | |
455 | * be alive as long as any of it's children is alive. | |
456 | */ | |
457 | // as root | |
458 | Super::URef super; | |
459 | // as child/non-root | |
460 | std::optional<parent_info_t> _parent_info; | |
461 | ||
462 | private: | |
20effc67 | 463 | static eagain_ifuture<Ref<Node>> load(context_t, laddr_t, bool expect_is_level_tail); |
f67539c2 TL |
464 | |
465 | NodeImplURef impl; | |
466 | friend class InternalNode; | |
467 | }; | |
468 | inline std::ostream& operator<<(std::ostream& os, const Node& node) { | |
469 | return node.dump_brief(os); | |
470 | } | |
471 | ||
472 | /** | |
473 | * InternalNode | |
474 | * | |
475 | * A concrete implementation of Node class that represents an internal tree | |
476 | * node. Its level is always positive and its values are logical block | |
477 | * addresses to its child nodes. An internal node cannot be empty. | |
478 | */ | |
479 | class InternalNode final : public Node { | |
480 | public: | |
481 | // public to Node | |
482 | InternalNode(InternalNodeImpl*, NodeImplURef&&); | |
483 | ~InternalNode() override { assert(tracked_child_nodes.empty()); } | |
484 | InternalNode(const InternalNode&) = delete; | |
485 | InternalNode(InternalNode&&) = delete; | |
486 | InternalNode& operator=(const InternalNode&) = delete; | |
487 | InternalNode& operator=(InternalNode&&) = delete; | |
488 | ||
20effc67 TL |
489 | eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor(context_t, const search_position_t&); |
490 | ||
491 | eagain_ifuture<> apply_child_split(context_t, Ref<Node>&& left, Ref<Node>&& right, bool); | |
492 | ||
f67539c2 TL |
493 | template <bool VALIDATE> |
494 | void do_track_child(Node& child) { | |
495 | if constexpr (VALIDATE) { | |
496 | validate_child(child); | |
497 | } | |
498 | auto& child_pos = child.parent_info().position; | |
499 | assert(tracked_child_nodes.find(child_pos) == tracked_child_nodes.end()); | |
500 | tracked_child_nodes[child_pos] = &child; | |
501 | } | |
20effc67 | 502 | |
f67539c2 | 503 | void do_untrack_child(const Node& child) { |
20effc67 | 504 | assert(check_is_tracking(child)); |
f67539c2 | 505 | auto& child_pos = child.parent_info().position; |
f67539c2 TL |
506 | [[maybe_unused]] auto removed = tracked_child_nodes.erase(child_pos); |
507 | assert(removed); | |
508 | } | |
509 | ||
20effc67 TL |
510 | bool check_is_tracking(const Node& child) const { |
511 | auto& child_pos = child.parent_info().position; | |
512 | auto found = tracked_child_nodes.find(child_pos); | |
513 | if (found != tracked_child_nodes.end() && found->second == &child) { | |
514 | assert(child.parent_info().ptr == this); | |
515 | return true; | |
516 | } else { | |
517 | return false; | |
518 | } | |
519 | } | |
f67539c2 | 520 | |
20effc67 TL |
521 | eagain_ifuture<std::pair<Ref<Node>, Ref<Node>>> get_child_peers( |
522 | context_t, const search_position_t&); | |
f67539c2 | 523 | |
20effc67 TL |
524 | eagain_ifuture<> erase_child(context_t, Ref<Node>&&); |
525 | ||
526 | template <bool FORCE_MERGE = false> | |
527 | eagain_ifuture<> fix_index(context_t, Ref<Node>&&, bool); | |
528 | ||
529 | template <bool FORCE_MERGE = false> | |
530 | eagain_ifuture<> apply_children_merge( | |
531 | context_t, Ref<Node>&& left, laddr_t, Ref<Node>&& right, bool update_index); | |
532 | ||
533 | void validate_child_tracked(const Node& child) const { | |
534 | validate_child(child); | |
535 | assert(tracked_child_nodes.find(child.parent_info().position) != | |
536 | tracked_child_nodes.end()); | |
537 | assert(tracked_child_nodes.find(child.parent_info().position)->second == &child); | |
538 | } | |
539 | ||
540 | void validate_child_inconsistent(const Node& child) const; | |
f67539c2 | 541 | |
f67539c2 TL |
542 | void validate_tracked_children() const { |
543 | #ifndef NDEBUG | |
544 | for (auto& kv : tracked_child_nodes) { | |
545 | assert(kv.first == kv.second->parent_info().position); | |
546 | validate_child(*kv.second); | |
547 | } | |
548 | #endif | |
549 | } | |
20effc67 TL |
550 | |
551 | void track_make_tail(const search_position_t&); | |
552 | ||
553 | static eagain_ifuture<Ref<InternalNode>> allocate_root( | |
554 | context_t, laddr_t, level_t, laddr_t, Super::URef&&); | |
555 | ||
556 | protected: | |
557 | eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override; | |
558 | eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override; | |
559 | eagain_ifuture<search_result_t> lower_bound_tracked( | |
560 | context_t, const key_hobj_t&, MatchHistory&) override; | |
561 | eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override; | |
562 | bool is_tracking() const override { | |
563 | return !tracked_child_nodes.empty(); | |
564 | } | |
565 | void track_merge(Ref<Node>, match_stage_t, search_position_t&) override; | |
566 | ||
567 | eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override; | |
568 | ||
569 | private: | |
570 | eagain_ifuture<> try_downgrade_root(context_t, Ref<Node>&&); | |
571 | ||
572 | eagain_ifuture<Ref<InternalNode>> insert_or_split( | |
573 | context_t, const search_position_t&, const key_view_t&, Ref<Node>, | |
574 | Ref<Node> outdated_child=nullptr); | |
575 | ||
576 | // XXX: extract a common tracker for InternalNode to track Node, | |
577 | // and LeafNode to track tree_cursor_t. | |
578 | eagain_ifuture<Ref<Node>> get_or_track_child(context_t, const search_position_t&, laddr_t); | |
579 | template <bool VALIDATE = true> | |
580 | void track_insert( | |
581 | const search_position_t&, match_stage_t, Ref<Node>, Ref<Node> nxt_child = nullptr); | |
582 | void replace_track(Ref<Node> new_child, Ref<Node> old_child, bool); | |
583 | void track_split(const search_position_t&, Ref<InternalNode>); | |
584 | template <bool VALIDATE = true> | |
585 | void track_erase(const search_position_t&, match_stage_t); | |
f67539c2 TL |
586 | void validate_child(const Node& child) const; |
587 | ||
588 | struct fresh_node_t { | |
589 | Ref<InternalNode> node; | |
590 | NodeExtentMutable mut; | |
591 | std::pair<Ref<Node>, NodeExtentMutable> make_pair() { | |
592 | return std::make_pair(Ref<Node>(node), mut); | |
593 | } | |
594 | }; | |
20effc67 | 595 | static eagain_ifuture<fresh_node_t> allocate(context_t, laddr_t, field_type_t, bool, level_t); |
f67539c2 TL |
596 | |
597 | private: | |
598 | /** | |
599 | * Reversed resource management (InternalNode) | |
600 | * | |
601 | * InteralNode keeps track of its child nodes which are still alive in | |
602 | * memory, and their positions will be updated throughout | |
603 | * insert/split/delete/merge operations of this node. | |
604 | */ | |
605 | // XXX: leverage intrusive data structure to control memory overhead | |
606 | std::map<search_position_t, Node*> tracked_child_nodes; | |
607 | InternalNodeImpl* impl; | |
608 | }; | |
609 | ||
610 | /** | |
611 | * LeafNode | |
612 | * | |
613 | * A concrete implementation of Node class that represents a leaf tree node. | |
614 | * Its level is always 0. A leaf node can only be empty if it is root. | |
615 | */ | |
616 | class LeafNode final : public Node { | |
617 | public: | |
618 | // public to tree_cursor_t | |
619 | ~LeafNode() override { assert(tracked_cursors.empty()); } | |
620 | LeafNode(const LeafNode&) = delete; | |
621 | LeafNode(LeafNode&&) = delete; | |
622 | LeafNode& operator=(const LeafNode&) = delete; | |
623 | LeafNode& operator=(LeafNode&&) = delete; | |
624 | ||
625 | bool is_level_tail() const; | |
20effc67 TL |
626 | node_version_t get_version() const; |
627 | const char* read() const; | |
628 | extent_len_t get_node_size() const; | |
629 | std::tuple<key_view_t, const value_header_t*> get_kv(const search_position_t&) const; | |
630 | eagain_ifuture<Ref<tree_cursor_t>> get_next_cursor(context_t, const search_position_t&); | |
631 | ||
632 | /** | |
633 | * erase | |
634 | * | |
635 | * Removes a key-value pair from the position. | |
636 | * | |
637 | * If get_next is true, returns the cursor pointing to the next key-value | |
638 | * pair that followed the erased element, which can be nullptr if is end. | |
639 | */ | |
640 | template <bool FORCE_MERGE> | |
641 | eagain_ifuture<Ref<tree_cursor_t>> erase( | |
642 | context_t, const search_position_t&, bool get_next); | |
643 | ||
f67539c2 TL |
644 | template <bool VALIDATE> |
645 | void do_track_cursor(tree_cursor_t& cursor) { | |
646 | if constexpr (VALIDATE) { | |
647 | validate_cursor(cursor); | |
648 | } | |
649 | auto& cursor_pos = cursor.get_position(); | |
650 | assert(tracked_cursors.find(cursor_pos) == tracked_cursors.end()); | |
20effc67 | 651 | tracked_cursors.emplace(cursor_pos, &cursor); |
f67539c2 | 652 | } |
20effc67 | 653 | void do_untrack_cursor(const tree_cursor_t& cursor) { |
f67539c2 TL |
654 | validate_cursor(cursor); |
655 | auto& cursor_pos = cursor.get_position(); | |
20effc67 | 656 | assert(check_is_tracking(cursor)); |
f67539c2 TL |
657 | [[maybe_unused]] auto removed = tracked_cursors.erase(cursor_pos); |
658 | assert(removed); | |
659 | } | |
20effc67 TL |
660 | bool check_is_tracking(const tree_cursor_t& cursor) const { |
661 | auto& cursor_pos = cursor.get_position(); | |
662 | auto found = tracked_cursors.find(cursor_pos); | |
663 | if (found != tracked_cursors.end() && found->second == &cursor) { | |
664 | assert(cursor.ref_leaf_node == this); | |
665 | return true; | |
666 | } else { | |
667 | return false; | |
668 | } | |
669 | } | |
670 | ||
671 | eagain_ifuture<> extend_value(context_t, const search_position_t&, value_size_t); | |
672 | eagain_ifuture<> trim_value(context_t, const search_position_t&, value_size_t); | |
673 | ||
674 | std::pair<NodeExtentMutable&, ValueDeltaRecorder*> | |
675 | prepare_mutate_value_payload(context_t); | |
f67539c2 TL |
676 | |
677 | protected: | |
20effc67 TL |
678 | eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override; |
679 | eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override; | |
680 | eagain_ifuture<search_result_t> lower_bound_tracked( | |
f67539c2 | 681 | context_t, const key_hobj_t&, MatchHistory&) override; |
20effc67 TL |
682 | eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override; |
683 | bool is_tracking() const override { | |
684 | return !tracked_cursors.empty(); | |
685 | } | |
686 | void track_merge(Ref<Node>, match_stage_t, search_position_t&) override; | |
f67539c2 | 687 | |
20effc67 | 688 | eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override; |
f67539c2 TL |
689 | |
690 | private: | |
691 | LeafNode(LeafNodeImpl*, NodeImplURef&&); | |
20effc67 TL |
692 | eagain_ifuture<Ref<tree_cursor_t>> insert_value( |
693 | context_t, const key_hobj_t&, value_config_t, | |
f67539c2 TL |
694 | const search_position_t&, const MatchHistory&, |
695 | match_stat_t mstat); | |
20effc67 | 696 | static eagain_ifuture<Ref<LeafNode>> allocate_root(context_t, RootNodeTracker&); |
f67539c2 TL |
697 | friend class Node; |
698 | ||
699 | private: | |
700 | // XXX: extract a common tracker for InternalNode to track Node, | |
701 | // and LeafNode to track tree_cursor_t. | |
702 | Ref<tree_cursor_t> get_or_track_cursor( | |
20effc67 | 703 | const search_position_t&, const key_view_t&, const value_header_t*); |
f67539c2 | 704 | Ref<tree_cursor_t> track_insert( |
20effc67 | 705 | const search_position_t&, match_stage_t, const value_header_t*); |
f67539c2 | 706 | void track_split(const search_position_t&, Ref<LeafNode>); |
20effc67 | 707 | void track_erase(const search_position_t&, match_stage_t); |
f67539c2 TL |
708 | void validate_tracked_cursors() const { |
709 | #ifndef NDEBUG | |
710 | for (auto& kv : tracked_cursors) { | |
711 | assert(kv.first == kv.second->get_position()); | |
712 | validate_cursor(*kv.second); | |
713 | } | |
714 | #endif | |
715 | } | |
20effc67 | 716 | void validate_cursor(const tree_cursor_t& cursor) const; |
f67539c2 TL |
717 | // invalidate p_value pointers in tree_cursor_t |
718 | void on_layout_change() { ++layout_version; } | |
719 | ||
720 | struct fresh_node_t { | |
721 | Ref<LeafNode> node; | |
722 | NodeExtentMutable mut; | |
723 | std::pair<Ref<Node>, NodeExtentMutable> make_pair() { | |
724 | return std::make_pair(Ref<Node>(node), mut); | |
725 | } | |
726 | }; | |
20effc67 | 727 | static eagain_ifuture<fresh_node_t> allocate(context_t, laddr_t, field_type_t, bool); |
f67539c2 TL |
728 | |
729 | private: | |
730 | /** | |
731 | * Reversed resource management (LeafNode) | |
732 | * | |
733 | * LeafNode keeps track of the referencing cursors which are still alive in | |
734 | * memory, and their positions will be updated throughout | |
735 | * insert/split/delete/merge operations of this node. | |
736 | */ | |
737 | // XXX: leverage intrusive data structure to control memory overhead | |
738 | std::map<search_position_t, tree_cursor_t*> tracked_cursors; | |
739 | LeafNodeImpl* impl; | |
740 | layout_version_t layout_version = 0; | |
741 | }; | |
742 | ||
743 | } |