]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / onode_manager / staged-fltree / stages / stage.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <cassert>
7 #include <optional>
8 #include <ostream>
9 #include <sstream>
10 #include <type_traits>
11
12 #include "common/likely.h"
13
14 #include "sub_items_stage.h"
15 #include "item_iterator_stage.h"
16
17 namespace crimson::os::seastore::onode {
18
19 struct search_result_bs_t {
20 index_t index;
21 MatchKindBS match;
22 };
23 template <typename FGetKey>
24 search_result_bs_t binary_search(
25 const full_key_t<KeyT::HOBJ>& key,
26 index_t begin, index_t end, FGetKey&& f_get_key) {
27 assert(begin <= end);
28 while (begin < end) {
29 auto total = begin + end;
30 auto mid = total >> 1;
31 // do not copy if return value is reference
32 decltype(f_get_key(mid)) target = f_get_key(mid);
33 auto match = compare_to<KeyT::HOBJ>(key, target);
34 if (match == MatchKindCMP::LT) {
35 end = mid;
36 } else if (match == MatchKindCMP::GT) {
37 begin = mid + 1;
38 } else {
39 return {mid, MatchKindBS::EQ};
40 }
41 }
42 return {begin , MatchKindBS::NE};
43 }
44
45 template <typename PivotType, typename FGet>
46 search_result_bs_t binary_search_r(
47 index_t rend, index_t rbegin, FGet&& f_get, const PivotType& key) {
48 assert(rend <= rbegin);
49 while (rend < rbegin) {
50 auto total = rend + rbegin + 1;
51 auto mid = total >> 1;
52 // do not copy if return value is reference
53 decltype(f_get(mid)) target = f_get(mid);
54 int match = target - key;
55 if (match < 0) {
56 rend = mid;
57 } else if (match > 0) {
58 rbegin = mid - 1;
59 } else {
60 return {mid, MatchKindBS::EQ};
61 }
62 }
63 return {rbegin, MatchKindBS::NE};
64 }
65
66 inline bool matchable(field_type_t type, match_stat_t mstat) {
67 assert(mstat >= MSTAT_MIN && mstat <= MSTAT_MAX);
68 /*
69 * compressed prefix by field type:
70 * N0: NONE
71 * N1: pool/shard
72 * N2: pool/shard crush
73 * N3: pool/shard crush ns/oid
74 *
75 * if key matches the node's compressed prefix, return true
76 * else, return false
77 */
78 #ifndef NDEBUG
79 if (mstat == MSTAT_END) {
80 assert(type == field_type_t::N0);
81 }
82 #endif
83 return mstat + to_unsigned(type) < 4;
84 }
85
86 inline void assert_mstat(
87 const full_key_t<KeyT::HOBJ>& key,
88 const full_key_t<KeyT::VIEW>& index,
89 match_stat_t mstat) {
90 assert(mstat >= MSTAT_MIN && mstat <= MSTAT_LT2);
91 // key < index ...
92 switch (mstat) {
93 case MSTAT_EQ:
94 break;
95 case MSTAT_LT0:
96 assert(compare_to<KeyT::HOBJ>(key, index.snap_gen_packed()) == MatchKindCMP::LT);
97 break;
98 case MSTAT_LT1:
99 assert(compare_to<KeyT::HOBJ>(key, index.ns_oid_view()) == MatchKindCMP::LT);
100 break;
101 case MSTAT_LT2:
102 if (index.has_shard_pool()) {
103 assert(compare_to<KeyT::HOBJ>(key, shard_pool_crush_t{
104 index.shard_pool_packed(), index.crush_packed()}) == MatchKindCMP::LT);
105 } else {
106 assert(compare_to<KeyT::HOBJ>(key, index.crush_packed()) == MatchKindCMP::LT);
107 }
108 break;
109 default:
110 ceph_abort("impossible path");
111 }
112 // key == index ...
113 switch (mstat) {
114 case MSTAT_EQ:
115 assert(compare_to<KeyT::HOBJ>(key, index.snap_gen_packed()) == MatchKindCMP::EQ);
116 case MSTAT_LT0:
117 if (!index.has_ns_oid())
118 break;
119 assert(index.ns_oid_view().type() == ns_oid_view_t::Type::MAX ||
120 compare_to<KeyT::HOBJ>(key, index.ns_oid_view()) == MatchKindCMP::EQ);
121 case MSTAT_LT1:
122 if (!index.has_crush())
123 break;
124 assert(compare_to<KeyT::HOBJ>(key, index.crush_packed()) == MatchKindCMP::EQ);
125 if (!index.has_shard_pool())
126 break;
127 assert(compare_to<KeyT::HOBJ>(key, index.shard_pool_packed()) == MatchKindCMP::EQ);
128 default:
129 break;
130 }
131 }
132
133 #define NXT_STAGE_T staged<next_param_t>
134
135 enum class TrimType { BEFORE, AFTER, AT };
136
137 /**
138 * staged
139 *
140 * Implements recursive logic that modifies or reads the node layout
141 * (N0/N1/N2/N3 * LEAF/INTERNAL) with the multi-stage design. The specific
142 * stage implementation is flexible. So the implementations for different
143 * stages can be assembled independently, as long as they follow the
144 * definitions of container interfaces.
145 *
146 * Multi-stage is designed to index different portions of onode keys
147 * stage-by-stage. There are at most 3 stages for a node:
148 * - STAGE_LEFT: index shard-pool-crush for N0, or index crush for N1 node;
149 * - STAGE_STRING: index ns-oid for N0/N1/N2 nodes;
150 * - STAGE_RIGHT: index snap-gen for N0/N1/N2/N3 nodes;
151 *
152 * The intention is to consolidate the high-level indexing implementations at
153 * the level of stage, so we don't need to write them repeatedly for every
154 * stage and for every node type.
155 */
156 template <typename Params>
157 struct staged {
158 static_assert(Params::STAGE >= STAGE_BOTTOM);
159 static_assert(Params::STAGE <= STAGE_TOP);
160 using container_t = typename Params::container_t;
161 using key_get_type = typename container_t::key_get_type;
162 using next_param_t = typename Params::next_param_t;
163 using position_t = staged_position_t<Params::STAGE>;
164 using result_t = staged_result_t<Params::NODE_TYPE, Params::STAGE>;
165 using value_input_t = value_input_type_t<Params::NODE_TYPE>;
166 using value_t = value_type_t<Params::NODE_TYPE>;
167 static constexpr auto CONTAINER_TYPE = container_t::CONTAINER_TYPE;
168 static constexpr bool IS_BOTTOM = (Params::STAGE == STAGE_BOTTOM);
169 static constexpr auto NODE_TYPE = Params::NODE_TYPE;
170 static constexpr auto STAGE = Params::STAGE;
171
172 template <bool is_exclusive>
173 static void _left_or_right(index_t& split_index, index_t insert_index,
174 std::optional<bool>& is_insert_left) {
175 assert(!is_insert_left.has_value());
176 assert(is_valid_index(split_index));
177 if constexpr (is_exclusive) {
178 if (split_index <= insert_index) {
179 // ...[s_index-1] |!| (i_index) [s_index]...
180 // offset i_position to right
181 is_insert_left = false;
182 } else {
183 // ...[s_index-1] (i_index)) |?[s_index]| ...
184 // ...(i_index)...[s_index-1] |?[s_index]| ...
185 is_insert_left = true;
186 --split_index;
187 }
188 } else {
189 if (split_index < insert_index) {
190 // ...[s_index-1] |?[s_index]| ...[(i_index)[s_index_k]...
191 is_insert_left = false;
192 } else if (split_index > insert_index) {
193 // ...[(i_index)s_index-1] |?[s_index]| ...
194 // ...[(i_index)s_index_k]...[s_index-1] |?[s_index]| ...
195 is_insert_left = true;
196 } else {
197 // ...[s_index-1] |?[(i_index)s_index]| ...
198 // i_to_left = std::nullopt;
199 }
200 }
201 }
202
203 template <ContainerType CTYPE, typename Enable = void> class _iterator_t;
204 template <ContainerType CTYPE>
205 class _iterator_t<CTYPE, std::enable_if_t<CTYPE == ContainerType::INDEXABLE>> {
206 /*
207 * indexable container type system:
208 * CONTAINER_TYPE = ContainerType::INDEXABLE
209 * keys() const -> index_t
210 * operator[](index_t) const -> key_get_type
211 * size_before(index_t) const -> extent_len_t
212 * size_overhead_at(index_t) const -> node_offset_t
213 * (IS_BOTTOM) get_p_value(index_t) const -> const value_t*
214 * (!IS_BOTTOM) size_to_nxt_at(index_t) const -> node_offset_t
215 * (!IS_BOTTOM) get_nxt_container(index_t) const
216 * encode(p_node_start, encoded)
217 * decode(p_node_start, node_size, delta) -> container_t
218 * static:
219 * header_size() -> node_offset_t
220 * estimate_insert(key, value) -> node_offset_t
221 * (IS_BOTTOM) insert_at(mut, src, key, value,
222 * index, size, p_left_bound) -> const value_t*
223 * (!IS_BOTTOM) insert_prefix_at(mut, src, key,
224 * index, size, p_left_bound) -> memory_range_t
225 * (!IS_BOTTOM) update_size_at(mut, src, index, size)
226 * trim_until(mut, container, index) -> trim_size
227 * (!IS_BOTTOM) trim_at(mut, container, index, trimmed) -> trim_size
228 * erase_at(mut, container, index, p_left_bound) -> erase_size
229 *
230 * Appender::append(const container_t& src, from, items)
231 */
232 public:
233 using me_t = _iterator_t<CTYPE>;
234
235 _iterator_t(const container_t& container) : container{container} {
236 assert(container.keys());
237 }
238
239 index_t index() const {
240 return _index;
241 }
242 key_get_type get_key() const {
243 assert(!is_end());
244 return container[_index];
245 }
246 node_offset_t size_to_nxt() const {
247 assert(!is_end());
248 return container.size_to_nxt_at(_index);
249 }
250 template <typename T = typename NXT_STAGE_T::container_t>
251 std::enable_if_t<!IS_BOTTOM, T> get_nxt_container() const {
252 assert(!is_end());
253 return container.get_nxt_container(_index);
254 }
255 template <typename T = value_t>
256 std::enable_if_t<IS_BOTTOM, const T*> get_p_value() const {
257 assert(!is_end());
258 return container.get_p_value(_index);
259 }
260 bool is_last() const {
261 return _index + 1 == container.keys();
262 }
263 bool is_end() const { return _index == container.keys(); }
264 node_offset_t size() const {
265 assert(!is_end());
266 assert(header_size() == container.size_before(0));
267 assert(container.size_before(_index + 1) > container.size_before(_index));
268 return container.size_before(_index + 1) -
269 container.size_before(_index);
270 }
271 node_offset_t size_overhead() const {
272 assert(!is_end());
273 return container.size_overhead_at(_index);
274 }
275
276 me_t& operator++() {
277 assert(!is_end());
278 assert(!is_last());
279 ++_index;
280 return *this;
281 }
282 void seek_at(index_t index) {
283 assert(index < container.keys());
284 seek_till_end(index);
285 }
286 void seek_till_end(index_t index) {
287 assert(!is_end());
288 assert(this->index() == 0);
289 assert(index <= container.keys());
290 _index = index;
291 }
292 void seek_last() {
293 assert(!is_end());
294 assert(index() == 0);
295 _index = container.keys() - 1;
296 }
297 void set_end() {
298 assert(!is_end());
299 assert(is_last());
300 ++_index;
301 }
302 // Note: possible to return an end iterator
303 MatchKindBS seek(const full_key_t<KeyT::HOBJ>& key, bool exclude_last) {
304 assert(!is_end());
305 assert(index() == 0);
306 index_t end_index = container.keys();
307 if (exclude_last) {
308 assert(end_index);
309 --end_index;
310 assert(compare_to<KeyT::HOBJ>(key, container[end_index]) == MatchKindCMP::LT);
311 }
312 auto ret = binary_search(key, _index, end_index,
313 [this] (index_t index) { return container[index]; });
314 _index = ret.index;
315 return ret.match;
316 }
317
318 template <KeyT KT, typename T = value_t>
319 std::enable_if_t<IS_BOTTOM, const T*> insert(
320 NodeExtentMutable& mut,
321 const full_key_t<KT>& key,
322 const value_input_t& value,
323 node_offset_t insert_size,
324 const char* p_left_bound) {
325 return container_t::template insert_at<KT>(
326 mut, container, key, value, _index, insert_size, p_left_bound);
327 }
328
329 template <KeyT KT, typename T = memory_range_t>
330 std::enable_if_t<!IS_BOTTOM, T> insert_prefix(
331 NodeExtentMutable& mut, const full_key_t<KT>& key,
332 node_offset_t size, const char* p_left_bound) {
333 return container_t::template insert_prefix_at<KT>(
334 mut, container, key, _index, size, p_left_bound);
335 }
336
337 template <typename T = void>
338 std::enable_if_t<!IS_BOTTOM, T>
339 update_size(NodeExtentMutable& mut, int insert_size) {
340 assert(!is_end());
341 container_t::update_size_at(mut, container, _index, insert_size);
342 }
343
344 // Note: possible to return an end iterator when is_exclusive is true
345 template <bool is_exclusive>
346 size_t seek_split_inserted(
347 size_t start_size, size_t extra_size, size_t target_size,
348 index_t& insert_index, size_t insert_size,
349 std::optional<bool>& is_insert_left) {
350 assert(!is_end());
351 assert(index() == 0);
352 // replace insert_index placeholder
353 if constexpr (!is_exclusive) {
354 if (insert_index == INDEX_LAST) {
355 insert_index = container.keys() - 1;
356 }
357 } else {
358 if (insert_index == INDEX_END) {
359 insert_index = container.keys();
360 }
361 }
362 assert(insert_index <= container.keys());
363
364 auto start_size_1 = start_size + extra_size;
365 auto f_get_used_size = [this, start_size, start_size_1,
366 insert_index, insert_size] (index_t index) {
367 size_t current_size;
368 if (unlikely(index == 0)) {
369 current_size = start_size;
370 } else {
371 current_size = start_size_1;
372 if (index > insert_index) {
373 current_size += insert_size;
374 if constexpr (is_exclusive) {
375 --index;
376 }
377 }
378 // already includes header size
379 current_size += container.size_before(index);
380 }
381 return current_size;
382 };
383 index_t s_end;
384 if constexpr (is_exclusive) {
385 s_end = container.keys();
386 } else {
387 s_end = container.keys() - 1;
388 }
389 _index = binary_search_r(0, s_end, f_get_used_size, target_size).index;
390 size_t current_size = f_get_used_size(_index);
391 assert(current_size <= target_size);
392
393 _left_or_right<is_exclusive>(_index, insert_index, is_insert_left);
394 return current_size;
395 }
396
397 size_t seek_split(size_t start_size, size_t extra_size, size_t target_size) {
398 assert(!is_end());
399 assert(index() == 0);
400 auto start_size_1 = start_size + extra_size;
401 auto f_get_used_size = [this, start_size, start_size_1] (index_t index) {
402 size_t current_size;
403 if (unlikely(index == 0)) {
404 current_size = start_size;
405 } else {
406 // already includes header size
407 current_size = start_size_1 + container.size_before(index);
408 }
409 return current_size;
410 };
411 _index = binary_search_r(
412 0, container.keys() - 1, f_get_used_size, target_size).index;
413 size_t current_size = f_get_used_size(_index);
414 assert(current_size <= target_size);
415 return current_size;
416 }
417
418 // Note: possible to return an end iterater if to_index == INDEX_END
419 template <KeyT KT>
420 void copy_out_until(
421 typename container_t::template Appender<KT>& appender, index_t& to_index) {
422 auto num_keys = container.keys();
423 index_t items;
424 if (to_index == INDEX_END) {
425 items = num_keys - _index;
426 appender.append(container, _index, items);
427 _index = num_keys;
428 to_index = _index;
429 } else if (to_index == INDEX_LAST) {
430 assert(!is_end());
431 items = num_keys - 1 - _index;
432 appender.append(container, _index, items);
433 _index = num_keys - 1;
434 to_index = _index;
435 } else {
436 assert(_index <= to_index);
437 assert(to_index <= num_keys);
438 items = to_index - _index;
439 appender.append(container, _index, items);
440 _index = to_index;
441 }
442 }
443
444 node_offset_t trim_until(NodeExtentMutable& mut) {
445 return container_t::trim_until(mut, container, _index);
446 }
447
448 template <typename T = node_offset_t>
449 std::enable_if_t<!IS_BOTTOM, T>
450 trim_at(NodeExtentMutable& mut, node_offset_t trimmed) {
451 return container_t::trim_at(mut, container, _index, trimmed);
452 }
453
454 node_offset_t erase(NodeExtentMutable& mut, const char* p_left_bound) {
455 assert(!is_end());
456 return container_t::erase_at(mut, container, _index, p_left_bound);
457 }
458
459 template <KeyT KT>
460 typename container_t::template Appender<KT>
461 get_appender(NodeExtentMutable* p_mut) {
462 assert(_index + 1 == container.keys());
463 return typename container_t::template Appender<KT>(p_mut, container);
464 }
465
466 template <KeyT KT>
467 typename container_t::template Appender<KT>
468 get_appender_opened(NodeExtentMutable* p_mut) {
469 if constexpr (!IS_BOTTOM) {
470 assert(_index + 1 == container.keys());
471 return typename container_t::template Appender<KT>(p_mut, container, true);
472 } else {
473 ceph_abort("impossible path");
474 }
475 }
476
477 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
478 container.encode(p_node_start, encoded);
479 ceph::encode(_index, encoded);
480 }
481
482 static me_t decode(const char* p_node_start,
483 extent_len_t node_size,
484 ceph::bufferlist::const_iterator& delta) {
485 auto container = container_t::decode(
486 p_node_start, node_size, delta);
487 auto ret = me_t(container);
488 index_t index;
489 ceph::decode(index, delta);
490 ret.seek_till_end(index);
491 return ret;
492 }
493
494 static node_offset_t header_size() {
495 return container_t::header_size();
496 }
497
498 template <KeyT KT>
499 static node_offset_t estimate_insert(
500 const full_key_t<KT>& key, const value_input_t& value) {
501 return container_t::template estimate_insert<KT>(key, value);
502 }
503
504 private:
505 container_t container;
506 index_t _index = 0;
507 };
508
509 template <ContainerType CTYPE>
510 class _iterator_t<CTYPE, std::enable_if_t<CTYPE == ContainerType::ITERATIVE>> {
511 /*
512 * iterative container type system (!IS_BOTTOM):
513 * CONTAINER_TYPE = ContainerType::ITERATIVE
514 * index() const -> index_t
515 * get_key() const -> key_get_type
516 * size() const -> node_offset_t
517 * size_to_nxt() const -> node_offset_t
518 * size_overhead() const -> node_offset_t
519 * get_nxt_container() const
520 * has_next() const -> bool
521 * encode(p_node_start, encoded)
522 * decode(p_node_start, node_length, delta) -> container_t
523 * operator++()
524 * static:
525 * header_size() -> node_offset_t
526 * estimate_insert(key, value) -> node_offset_t
527 * insert_prefix(mut, src, key, is_end, size, p_left_bound) -> memory_range_t
528 * update_size(mut, src, size)
529 * trim_until(mut, container) -> trim_size
530 * trim_at(mut, container, trimmed) -> trim_size
531 * erase(mut, container, p_left_bound) -> erase_size
532 */
533 // currently the iterative iterator is only implemented with STAGE_STRING
534 // for in-node space efficiency
535 static_assert(STAGE == STAGE_STRING);
536 public:
537 using me_t = _iterator_t<CTYPE>;
538
539 _iterator_t(const container_t& container) : container{container} {}
540
541 index_t index() const {
542 if (is_end()) {
543 return container.index() + 1;
544 } else {
545 return container.index();
546 }
547 }
548 key_get_type get_key() const {
549 assert(!is_end());
550 return container.get_key();
551 }
552 node_offset_t size_to_nxt() const {
553 assert(!is_end());
554 return container.size_to_nxt();
555 }
556 const typename NXT_STAGE_T::container_t get_nxt_container() const {
557 assert(!is_end());
558 return container.get_nxt_container();
559 }
560 bool is_last() const {
561 assert(!is_end());
562 return !container.has_next();
563 }
564 bool is_end() const {
565 #ifndef NDEBUG
566 if (_is_end) {
567 assert(!container.has_next());
568 }
569 #endif
570 return _is_end;
571 }
572 node_offset_t size() const {
573 assert(!is_end());
574 return container.size();
575 }
576 node_offset_t size_overhead() const {
577 assert(!is_end());
578 return container.size_overhead();
579 }
580
581 me_t& operator++() {
582 assert(!is_end());
583 assert(!is_last());
584 ++container;
585 return *this;
586 }
587 void seek_at(index_t index) {
588 assert(!is_end());
589 assert(this->index() == 0);
590 while (index > 0) {
591 assert(container.has_next());
592 ++container;
593 --index;
594 }
595 }
596 void seek_till_end(index_t index) {
597 assert(!is_end());
598 assert(this->index() == 0);
599 while (index > 0) {
600 if (!container.has_next()) {
601 assert(index == 1);
602 set_end();
603 break;
604 }
605 ++container;
606 --index;
607 }
608 }
609 void seek_last() {
610 assert(!is_end());
611 assert(index() == 0);
612 while (container.has_next()) {
613 ++container;
614 }
615 }
616 void set_end() {
617 assert(!is_end());
618 assert(is_last());
619 _is_end = true;
620 }
621 // Note: possible to return an end iterator
622 MatchKindBS seek(const full_key_t<KeyT::HOBJ>& key, bool exclude_last) {
623 assert(!is_end());
624 assert(index() == 0);
625 do {
626 if (exclude_last && is_last()) {
627 assert(compare_to<KeyT::HOBJ>(key, get_key()) == MatchKindCMP::LT);
628 return MatchKindBS::NE;
629 }
630 auto match = compare_to<KeyT::HOBJ>(key, get_key());
631 if (match == MatchKindCMP::LT) {
632 return MatchKindBS::NE;
633 } else if (match == MatchKindCMP::EQ) {
634 return MatchKindBS::EQ;
635 } else {
636 if (container.has_next()) {
637 ++container;
638 } else {
639 // end
640 break;
641 }
642 }
643 } while (true);
644 assert(!exclude_last);
645 set_end();
646 return MatchKindBS::NE;
647 }
648
649 template <KeyT KT>
650 memory_range_t insert_prefix(
651 NodeExtentMutable& mut, const full_key_t<KT>& key,
652 node_offset_t size, const char* p_left_bound) {
653 return container_t::template insert_prefix<KT>(
654 mut, container, key, is_end(), size, p_left_bound);
655 }
656
657 void update_size(NodeExtentMutable& mut, int insert_size) {
658 assert(!is_end());
659 container_t::update_size(mut, container, insert_size);
660 }
661
662 // Note: possible to return an end iterator when is_exclusive is true
663 // insert_index can still be INDEX_LAST or INDEX_END
664 template <bool is_exclusive>
665 size_t seek_split_inserted(
666 size_t start_size, size_t extra_size, size_t target_size,
667 index_t& insert_index, size_t insert_size,
668 std::optional<bool>& is_insert_left) {
669 assert(!is_end());
670 assert(index() == 0);
671 size_t current_size = start_size;
672 index_t split_index = 0;
673 extra_size += header_size();
674 do {
675 if constexpr (!is_exclusive) {
676 if (is_last()) {
677 assert(split_index == index());
678 if (insert_index == INDEX_LAST) {
679 insert_index = index();
680 }
681 assert(insert_index <= index());
682 break;
683 }
684 }
685
686 size_t nxt_size = current_size;
687 if (split_index == 0) {
688 nxt_size += extra_size;
689 }
690 if (split_index == insert_index) {
691 nxt_size += insert_size;
692 if constexpr (is_exclusive) {
693 if (nxt_size > target_size) {
694 break;
695 }
696 current_size = nxt_size;
697 ++split_index;
698 }
699 }
700 nxt_size += size();
701 if (nxt_size > target_size) {
702 break;
703 }
704 current_size = nxt_size;
705
706 if constexpr (is_exclusive) {
707 if (is_last()) {
708 assert(split_index == index());
709 set_end();
710 split_index = index();
711 if (insert_index == INDEX_END) {
712 insert_index = index();
713 }
714 assert(insert_index == index());
715 break;
716 } else {
717 ++(*this);
718 ++split_index;
719 }
720 } else {
721 ++(*this);
722 ++split_index;
723 }
724 } while (true);
725 assert(current_size <= target_size);
726
727 _left_or_right<is_exclusive>(split_index, insert_index, is_insert_left);
728 assert(split_index == index());
729 return current_size;
730 }
731
732 size_t seek_split(size_t start_size, size_t extra_size, size_t target_size) {
733 assert(!is_end());
734 assert(index() == 0);
735 size_t current_size = start_size;
736 do {
737 if (is_last()) {
738 break;
739 }
740
741 size_t nxt_size = current_size;
742 if (index() == 0) {
743 nxt_size += extra_size;
744 }
745 nxt_size += size();
746 if (nxt_size > target_size) {
747 break;
748 }
749 current_size = nxt_size;
750 ++(*this);
751 } while (true);
752 assert(current_size <= target_size);
753 return current_size;
754 }
755
756 // Note: possible to return an end iterater if to_index == INDEX_END
757 template <KeyT KT>
758 void copy_out_until(
759 typename container_t::template Appender<KT>& appender, index_t& to_index) {
760 if (is_end()) {
761 assert(!container.has_next());
762 if (to_index == INDEX_END) {
763 to_index = index();
764 }
765 assert(to_index == index());
766 return;
767 }
768 index_t items;
769 if (to_index == INDEX_END || to_index == INDEX_LAST) {
770 items = to_index;
771 } else {
772 assert(is_valid_index(to_index));
773 assert(index() <= to_index);
774 items = to_index - index();
775 }
776 if (appender.append(container, items)) {
777 set_end();
778 }
779 to_index = index();
780 }
781
782 node_offset_t trim_until(NodeExtentMutable& mut) {
783 if (is_end()) {
784 return 0;
785 }
786 return container_t::trim_until(mut, container);
787 }
788
789 node_offset_t trim_at(NodeExtentMutable& mut, node_offset_t trimmed) {
790 assert(!is_end());
791 return container_t::trim_at(mut, container, trimmed);
792 }
793
794 node_offset_t erase(NodeExtentMutable& mut, const char* p_left_bound) {
795 assert(!is_end());
796 return container_t::erase(mut, container, p_left_bound);
797 }
798
799 template <KeyT KT>
800 typename container_t::template Appender<KT>
801 get_appender(NodeExtentMutable* p_mut) {
802 return typename container_t::template Appender<KT>(p_mut, container, false);
803 }
804
805 template <KeyT KT>
806 typename container_t::template Appender<KT>
807 get_appender_opened(NodeExtentMutable* p_mut) {
808 if constexpr (!IS_BOTTOM) {
809 return typename container_t::template Appender<KT>(p_mut, container, true);
810 } else {
811 ceph_abort("impossible path");
812 }
813 }
814
815 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
816 container.encode(p_node_start, encoded);
817 uint8_t is_end = _is_end;
818 ceph::encode(is_end, encoded);
819 }
820
821 static me_t decode(const char* p_node_start,
822 extent_len_t node_size,
823 ceph::bufferlist::const_iterator& delta) {
824 auto container = container_t::decode(
825 p_node_start, node_size, delta);
826 auto ret = me_t(container);
827 uint8_t is_end;
828 ceph::decode(is_end, delta);
829 if (is_end) {
830 ret.set_end();
831 }
832 return ret;
833 }
834
835 static node_offset_t header_size() {
836 return container_t::header_size();
837 }
838
839 template <KeyT KT>
840 static node_offset_t estimate_insert(const full_key_t<KT>& key,
841 const value_input_t& value) {
842 return container_t::template estimate_insert<KT>(key, value);
843 }
844
845 private:
846 container_t container;
847 bool _is_end = false;
848 };
849
850 /*
851 * iterator_t encapsulates both indexable and iterative implementations
852 * from a *non-empty* container.
853 * cstr(const container_t&)
854 * access:
855 * index() -> index_t
856 * get_key() -> key_get_type (const reference or value type)
857 * is_last() -> bool
858 * is_end() -> bool
859 * size() -> node_offset_t
860 * size_overhead() -> node_offset_t
861 * (IS_BOTTOM) get_p_value() -> const value_t*
862 * (!IS_BOTTOM) get_nxt_container() -> container_range_t
863 * (!IS_BOTTOM) size_to_nxt() -> node_offset_t
864 * seek:
865 * operator++() -> iterator_t&
866 * seek_at(index)
867 * seek_till_end(index)
868 * seek_last()
869 * set_end()
870 * seek(key, exclude_last) -> MatchKindBS
871 * insert:
872 * (IS_BOTTOM) insert(mut, key, value, size, p_left_bound) -> p_value
873 * (!IS_BOTTOM) insert_prefix(mut, key, size, p_left_bound) -> memory_range_t
874 * (!IS_BOTTOM) update_size(mut, size)
875 * split:
876 * seek_split_inserted<bool is_exclusive>(
877 * start_size, extra_size, target_size, insert_index, insert_size,
878 * std::optional<bool>& is_insert_left)
879 * -> insert to left/right/unknown (!exclusive)
880 * -> insert to left/right (exclusive, can be end)
881 * -> split_size
882 * seek_split(start_size, extra_size, target_size) -> split_size
883 * copy_out_until(appender, to_index) (can be end)
884 * trim_until(mut) -> trim_size
885 * (!IS_BOTTOM) trim_at(mut, trimmed) -> trim_size
886 * erase:
887 * erase(mut, p_left_bound) -> erase_size
888 * merge:
889 * get_appender(p_mut) -> Appender
890 * (!IS_BOTTOM)get_appender_opened(p_mut) -> Appender
891 * denc:
892 * encode(p_node_start, encoded)
893 * decode(p_node_start, node_size, delta) -> iterator_t
894 * static:
895 * header_size() -> node_offset_t
896 * estimate_insert(key, value) -> node_offset_t
897 */
898 using iterator_t = _iterator_t<CONTAINER_TYPE>;
899 /* TODO: detailed comments
900 * - trim_until(mut) -> trim_size
901 * * keep 0 to i - 1, and remove the rest, return the size trimmed.
902 * * if this is the end iterator, do nothing and return 0.
903 * * if this is the start iterator, normally needs to go to the higher
904 * stage to trim the entire container.
905 * - trim_at(mut, trimmed) -> trim_size
906 * * trim happens inside the current iterator, causing the size reduced by
907 * <trimmed>, return the total size trimmed.
908 */
909
910 /*
911 * Lookup internals (hide?)
912 */
913
914 static bool is_keys_one(
915 const container_t& container) { // IN
916 auto iter = iterator_t(container);
917 iter.seek_last();
918 if (iter.index() == 0) {
919 if constexpr (IS_BOTTOM) {
920 // ok, there is only 1 key
921 return true;
922 } else {
923 auto nxt_container = iter.get_nxt_container();
924 return NXT_STAGE_T::is_keys_one(nxt_container);
925 }
926 } else {
927 // more than 1 keys
928 return false;
929 }
930 }
931
932 template <bool GET_KEY>
933 static result_t smallest_result(
934 const iterator_t& iter, full_key_t<KeyT::VIEW>* p_index_key) {
935 static_assert(!IS_BOTTOM);
936 assert(!iter.is_end());
937 auto nxt_container = iter.get_nxt_container();
938 auto pos_smallest = NXT_STAGE_T::position_t::begin();
939 const value_t* p_value;
940 NXT_STAGE_T::template get_slot<GET_KEY, true>(
941 nxt_container, pos_smallest, p_index_key, &p_value);
942 if constexpr (GET_KEY) {
943 assert(p_index_key);
944 p_index_key->set(iter.get_key());
945 } else {
946 assert(!p_index_key);
947 }
948 return result_t{{iter.index(), pos_smallest}, p_value, STAGE};
949 }
950
951 template <bool GET_KEY>
952 static result_t nxt_lower_bound(
953 const full_key_t<KeyT::HOBJ>& key, iterator_t& iter,
954 MatchHistory& history, full_key_t<KeyT::VIEW>* index_key) {
955 static_assert(!IS_BOTTOM);
956 assert(!iter.is_end());
957 auto nxt_container = iter.get_nxt_container();
958 auto nxt_result = NXT_STAGE_T::template lower_bound<GET_KEY>(
959 nxt_container, key, history, index_key);
960 if (nxt_result.is_end()) {
961 if (iter.is_last()) {
962 return result_t::end();
963 } else {
964 return smallest_result<GET_KEY>(++iter, index_key);
965 }
966 } else {
967 if constexpr (GET_KEY) {
968 index_key->set(iter.get_key());
969 }
970 return result_t::from_nxt(iter.index(), nxt_result);
971 }
972 }
973
974 template <bool GET_POS, bool GET_KEY, bool GET_VAL>
975 static void get_largest_slot(
976 const container_t& container, // IN
977 position_t* p_position, // OUT
978 full_key_t<KeyT::VIEW>* p_index_key, // OUT
979 const value_t** pp_value) { // OUT
980 auto iter = iterator_t(container);
981 iter.seek_last();
982 if constexpr (GET_KEY) {
983 assert(p_index_key);
984 p_index_key->set(iter.get_key());
985 } else {
986 assert(!p_index_key);
987 }
988 if constexpr (GET_POS) {
989 assert(p_position);
990 p_position->index = iter.index();
991 } else {
992 assert(!p_position);
993 }
994 if constexpr (IS_BOTTOM) {
995 if constexpr (GET_VAL) {
996 assert(pp_value);
997 *pp_value = iter.get_p_value();
998 } else {
999 assert(!pp_value);
1000 }
1001 } else {
1002 auto nxt_container = iter.get_nxt_container();
1003 if constexpr (GET_POS) {
1004 NXT_STAGE_T::template get_largest_slot<true, GET_KEY, GET_VAL>(
1005 nxt_container, &p_position->nxt, p_index_key, pp_value);
1006 } else {
1007 NXT_STAGE_T::template get_largest_slot<false, GET_KEY, GET_VAL>(
1008 nxt_container, nullptr, p_index_key, pp_value);
1009 }
1010 }
1011 }
1012
1013 template <bool GET_KEY, bool GET_VAL>
1014 static void get_slot(
1015 const container_t& container, // IN
1016 const position_t& pos, // IN
1017 full_key_t<KeyT::VIEW>* p_index_key, // OUT
1018 const value_t** pp_value) { // OUT
1019 auto iter = iterator_t(container);
1020 iter.seek_at(pos.index);
1021
1022 if constexpr (GET_KEY) {
1023 assert(p_index_key);
1024 p_index_key->set(iter.get_key());
1025 } else {
1026 assert(!p_index_key);
1027 }
1028
1029 if constexpr (!IS_BOTTOM) {
1030 auto nxt_container = iter.get_nxt_container();
1031 NXT_STAGE_T::template get_slot<GET_KEY, GET_VAL>(
1032 nxt_container, pos.nxt, p_index_key, pp_value);
1033 } else {
1034 if constexpr (GET_VAL) {
1035 assert(pp_value);
1036 *pp_value = iter.get_p_value();
1037 } else {
1038 assert(!pp_value);
1039 }
1040 }
1041 }
1042
1043 template <bool GET_KEY = false>
1044 static result_t lower_bound(
1045 const container_t& container,
1046 const full_key_t<KeyT::HOBJ>& key,
1047 MatchHistory& history,
1048 full_key_t<KeyT::VIEW>* index_key = nullptr) {
1049 bool exclude_last = false;
1050 if (history.get<STAGE>().has_value()) {
1051 if (*history.get<STAGE>() == MatchKindCMP::EQ) {
1052 // lookup is short-circuited
1053 if constexpr (!IS_BOTTOM) {
1054 assert(history.get<STAGE - 1>().has_value());
1055 if (history.is_GT<STAGE - 1>()) {
1056 auto iter = iterator_t(container);
1057 bool test_key_equal;
1058 if constexpr (STAGE == STAGE_STRING) {
1059 // TODO(cross-node string dedup)
1060 // test_key_equal = (iter.get_key().type() == ns_oid_view_t::Type::MIN);
1061 auto cmp = compare_to<KeyT::HOBJ>(key, iter.get_key());
1062 assert(cmp != MatchKindCMP::GT);
1063 test_key_equal = (cmp == MatchKindCMP::EQ);
1064 } else {
1065 auto cmp = compare_to<KeyT::HOBJ>(key, iter.get_key());
1066 // From history, key[stage] == parent[stage][index - 1]
1067 // which should be the smallest possible value for all
1068 // index[stage][*]
1069 assert(cmp != MatchKindCMP::GT);
1070 test_key_equal = (cmp == MatchKindCMP::EQ);
1071 }
1072 if (test_key_equal) {
1073 return nxt_lower_bound<GET_KEY>(key, iter, history, index_key);
1074 } else {
1075 // key[stage] < index[stage][left-most]
1076 return smallest_result<GET_KEY>(iter, index_key);
1077 }
1078 }
1079 }
1080 // IS_BOTTOM || !history.is_GT<STAGE - 1>()
1081 auto iter = iterator_t(container);
1082 iter.seek_last();
1083 if constexpr (STAGE == STAGE_STRING) {
1084 // TODO(cross-node string dedup)
1085 // assert(iter.get_key().type() == ns_oid_view_t::Type::MAX);
1086 assert(compare_to<KeyT::HOBJ>(key, iter.get_key()) == MatchKindCMP::EQ);
1087 } else {
1088 assert(compare_to<KeyT::HOBJ>(key, iter.get_key()) == MatchKindCMP::EQ);
1089 }
1090 if constexpr (GET_KEY) {
1091 index_key->set(iter.get_key());
1092 }
1093 if constexpr (IS_BOTTOM) {
1094 auto value_ptr = iter.get_p_value();
1095 return result_t{{iter.index()}, value_ptr, MSTAT_EQ};
1096 } else {
1097 auto nxt_container = iter.get_nxt_container();
1098 auto nxt_result = NXT_STAGE_T::template lower_bound<GET_KEY>(
1099 nxt_container, key, history, index_key);
1100 // !history.is_GT<STAGE - 1>() means
1101 // key[stage+1 ...] <= index[stage+1 ...][*]
1102 assert(!nxt_result.is_end());
1103 return result_t::from_nxt(iter.index(), nxt_result);
1104 }
1105 } else if (*history.get<STAGE>() == MatchKindCMP::LT) {
1106 exclude_last = true;
1107 }
1108 }
1109 auto iter = iterator_t(container);
1110 auto bs_match = iter.seek(key, exclude_last);
1111 if (iter.is_end()) {
1112 assert(!exclude_last);
1113 assert(bs_match == MatchKindBS::NE);
1114 history.set<STAGE>(MatchKindCMP::GT);
1115 return result_t::end();
1116 }
1117 history.set<STAGE>(bs_match == MatchKindBS::EQ ?
1118 MatchKindCMP::EQ : MatchKindCMP::LT);
1119 if constexpr (IS_BOTTOM) {
1120 if constexpr (GET_KEY) {
1121 index_key->set(iter.get_key());
1122 }
1123 auto value_ptr = iter.get_p_value();
1124 return result_t{{iter.index()}, value_ptr,
1125 (bs_match == MatchKindBS::EQ ? MSTAT_EQ : MSTAT_LT0)};
1126 } else {
1127 if (bs_match == MatchKindBS::EQ) {
1128 return nxt_lower_bound<GET_KEY>(key, iter, history, index_key);
1129 } else {
1130 return smallest_result<GET_KEY>(iter, index_key);
1131 }
1132 }
1133 }
1134
1135 template <KeyT KT>
1136 static node_offset_t insert_size(const full_key_t<KT>& key,
1137 const value_input_t& value) {
1138 if constexpr (IS_BOTTOM) {
1139 return iterator_t::template estimate_insert<KT>(key, value);
1140 } else {
1141 return iterator_t::template estimate_insert<KT>(key, value) +
1142 NXT_STAGE_T::iterator_t::header_size() +
1143 NXT_STAGE_T::template insert_size<KT>(key, value);
1144 }
1145 }
1146
1147 template <KeyT KT>
1148 static node_offset_t insert_size_at(match_stage_t stage,
1149 const full_key_t<KeyT::HOBJ>& key,
1150 const value_input_t& value) {
1151 if (stage == STAGE) {
1152 return insert_size<KT>(key, value);
1153 } else {
1154 assert(stage < STAGE);
1155 return NXT_STAGE_T::template insert_size_at<KT>(stage, key, value);
1156 }
1157 }
1158
1159 template <typename T = std::tuple<match_stage_t, node_offset_t>>
1160 static std::enable_if_t<NODE_TYPE == node_type_t::INTERNAL, T> evaluate_insert(
1161 const container_t& container, const full_key_t<KeyT::VIEW>& key,
1162 const value_input_t& value, position_t& position, bool evaluate_last) {
1163 auto iter = iterator_t(container);
1164 auto& index = position.index;
1165 if (evaluate_last || index == INDEX_END) {
1166 iter.seek_last();
1167 index = iter.index();
1168 // evaluate the previous index
1169 } else {
1170 assert(is_valid_index(index));
1171 // evaluate the current index
1172 iter.seek_at(index);
1173 auto match = compare_to<KeyT::VIEW>(key, iter.get_key());
1174 if (match == MatchKindCMP::EQ) {
1175 if constexpr (IS_BOTTOM) {
1176 ceph_abort("insert conflict at current index!");
1177 } else {
1178 // insert into the current index
1179 auto nxt_container = iter.get_nxt_container();
1180 return NXT_STAGE_T::evaluate_insert(
1181 nxt_container, key, value, position.nxt, false);
1182 }
1183 } else {
1184 assert(match == MatchKindCMP::LT);
1185 if (index == 0) {
1186 // already the first index, so insert at the current index
1187 return {STAGE, insert_size<KeyT::VIEW>(key, value)};
1188 }
1189 --index;
1190 iter = iterator_t(container);
1191 iter.seek_at(index);
1192 // proceed to evaluate the previous index
1193 }
1194 }
1195
1196 // XXX(multi-type): when key is from a different type of node
1197 auto match = compare_to<KeyT::VIEW>(key, iter.get_key());
1198 if (match == MatchKindCMP::GT) {
1199 // key doesn't match both indexes, so insert at the current index
1200 ++index;
1201 return {STAGE, insert_size<KeyT::VIEW>(key, value)};
1202 } else {
1203 assert(match == MatchKindCMP::EQ);
1204 if constexpr (IS_BOTTOM) {
1205 // ceph_abort?
1206 ceph_abort("insert conflict at the previous index!");
1207 } else {
1208 // insert into the previous index
1209 auto nxt_container = iter.get_nxt_container();
1210 return NXT_STAGE_T::evaluate_insert(
1211 nxt_container, key, value, position.nxt, true);
1212 }
1213 }
1214 }
1215
1216 template <typename T = bool>
1217 static std::enable_if_t<NODE_TYPE == node_type_t::LEAF, T>
1218 compensate_insert_position_at(match_stage_t stage, position_t& position) {
1219 auto& index = position.index;
1220 if (stage == STAGE) {
1221 assert(index == 0);
1222 // insert at the end of the current stage
1223 index = INDEX_END;
1224 return true;
1225 } else {
1226 if constexpr (IS_BOTTOM) {
1227 ceph_abort("impossible path");
1228 } else {
1229 assert(stage < STAGE);
1230 bool compensate = NXT_STAGE_T::
1231 compensate_insert_position_at(stage, position.nxt);
1232 if (compensate) {
1233 assert(is_valid_index(index));
1234 if (index == 0) {
1235 // insert into the *last* index of the current stage
1236 index = INDEX_LAST;
1237 return true;
1238 } else {
1239 --index;
1240 return false;
1241 }
1242 } else {
1243 return false;
1244 }
1245 }
1246 }
1247 }
1248
1249 static void patch_insert_end(position_t& insert_pos, match_stage_t insert_stage) {
1250 assert(insert_stage <= STAGE);
1251 if (insert_stage == STAGE) {
1252 insert_pos.index = INDEX_END;
1253 } else if constexpr (!IS_BOTTOM) {
1254 insert_pos.index = INDEX_LAST;
1255 NXT_STAGE_T::patch_insert_end(insert_pos.nxt, insert_stage);
1256 }
1257 }
1258
1259 template <typename T = std::tuple<match_stage_t, node_offset_t>>
1260 static std::enable_if_t<NODE_TYPE == node_type_t::LEAF, T> evaluate_insert(
1261 const full_key_t<KeyT::HOBJ>& key, const value_config_t& value,
1262 const MatchHistory& history, match_stat_t mstat, position_t& position) {
1263 match_stage_t insert_stage = STAGE_TOP;
1264 while (*history.get_by_stage(insert_stage) == MatchKindCMP::EQ) {
1265 assert(insert_stage != STAGE_BOTTOM && "insert conflict!");
1266 --insert_stage;
1267 }
1268
1269 if (history.is_GT()) {
1270 if (position.is_end()) {
1271 // no need to compensate insert position
1272 assert(insert_stage <= STAGE && "impossible insert stage");
1273 } else if (position == position_t::begin()) {
1274 // I must be short-circuited by staged::smallest_result()
1275 // in staged::lower_bound(), so we need to rely on mstat instead
1276 assert(mstat >= MSTAT_LT0 && mstat <= MSTAT_LT3);
1277 if (mstat == MSTAT_LT0) {
1278 insert_stage = STAGE_RIGHT;
1279 } else if (mstat == MSTAT_LT1) {
1280 insert_stage = STAGE_STRING;
1281 } else {
1282 insert_stage = STAGE_LEFT;
1283 }
1284 // XXX(multi-type): need to upgrade node type before inserting an
1285 // incompatible index at front.
1286 assert(insert_stage <= STAGE && "incompatible insert");
1287 } else {
1288 assert(insert_stage <= STAGE && "impossible insert stage");
1289 [[maybe_unused]] bool ret = compensate_insert_position_at(insert_stage, position);
1290 assert(!ret);
1291 }
1292 }
1293
1294 if (position.is_end()) {
1295 patch_insert_end(position, insert_stage);
1296 }
1297
1298 node_offset_t insert_size = insert_size_at<KeyT::HOBJ>(insert_stage, key, value);
1299
1300 return {insert_stage, insert_size};
1301 }
1302
1303 template <KeyT KT>
1304 static const value_t* insert_new(
1305 NodeExtentMutable& mut, const memory_range_t& range,
1306 const full_key_t<KT>& key, const value_input_t& value) {
1307 char* p_insert = const_cast<char*>(range.p_end);
1308 const value_t* p_value = nullptr;
1309 StagedAppender<KT> appender;
1310 appender.init_empty(&mut, p_insert);
1311 appender.append(key, value, p_value);
1312 [[maybe_unused]] const char* p_insert_front = appender.wrap();
1313 assert(p_insert_front == range.p_start);
1314 return p_value;
1315 }
1316
1317 template <KeyT KT, bool SPLIT>
1318 static const value_t* proceed_insert_recursively(
1319 NodeExtentMutable& mut, const container_t& container,
1320 const full_key_t<KT>& key, const value_input_t& value,
1321 position_t& position, match_stage_t& stage,
1322 node_offset_t& _insert_size, const char* p_left_bound) {
1323 // proceed insert from right to left
1324 assert(stage <= STAGE);
1325 auto iter = iterator_t(container);
1326 auto& index = position.index;
1327
1328 bool do_insert = false;
1329 if (stage == STAGE) {
1330 if (index == INDEX_END) {
1331 iter.seek_last();
1332 iter.set_end();
1333 index = iter.index();
1334 } else {
1335 assert(is_valid_index(index));
1336 iter.seek_till_end(index);
1337 }
1338 do_insert = true;
1339 } else { // stage < STAGE
1340 if (index == INDEX_LAST) {
1341 iter.seek_last();
1342 index = iter.index();
1343 } else {
1344 assert(is_valid_index(index));
1345 iter.seek_till_end(index);
1346 }
1347 if constexpr (SPLIT) {
1348 if (iter.is_end()) {
1349 // insert at the higher stage due to split
1350 do_insert = true;
1351 _insert_size = insert_size<KT>(key, value);
1352 stage = STAGE;
1353 }
1354 } else {
1355 assert(!iter.is_end());
1356 }
1357 }
1358
1359 if (do_insert) {
1360 if constexpr (!IS_BOTTOM) {
1361 position.nxt = position_t::nxt_t::begin();
1362 }
1363 assert(_insert_size == insert_size<KT>(key, value));
1364 if constexpr (IS_BOTTOM) {
1365 return iter.template insert<KT>(
1366 mut, key, value, _insert_size, p_left_bound);
1367 } else {
1368 auto range = iter.template insert_prefix<KT>(
1369 mut, key, _insert_size, p_left_bound);
1370 return NXT_STAGE_T::template insert_new<KT>(mut, range, key, value);
1371 }
1372 } else {
1373 if constexpr (!IS_BOTTOM) {
1374 auto nxt_container = iter.get_nxt_container();
1375 auto p_value = NXT_STAGE_T::template proceed_insert_recursively<KT, SPLIT>(
1376 mut, nxt_container, key, value,
1377 position.nxt, stage, _insert_size, p_left_bound);
1378 iter.update_size(mut, _insert_size);
1379 return p_value;
1380 } else {
1381 ceph_abort("impossible path");
1382 }
1383 }
1384 }
1385
1386 template <KeyT KT, bool SPLIT>
1387 static const value_t* proceed_insert(
1388 NodeExtentMutable& mut, const container_t& container,
1389 const full_key_t<KT>& key, const value_input_t& value,
1390 position_t& position, match_stage_t& stage, node_offset_t& _insert_size) {
1391 auto p_left_bound = container.p_left_bound();
1392 if (unlikely(!container.keys())) {
1393 if (position.is_end()) {
1394 position = position_t::begin();
1395 assert(stage == STAGE);
1396 assert(_insert_size == insert_size<KT>(key, value));
1397 } else if (position == position_t::begin()) {
1398 // when insert into a trimmed and empty left node
1399 stage = STAGE;
1400 _insert_size = insert_size<KT>(key, value);
1401 } else {
1402 ceph_abort("impossible path");
1403 }
1404 if constexpr (IS_BOTTOM) {
1405 return container_t::template insert_at<KT>(
1406 mut, container, key, value, 0, _insert_size, p_left_bound);
1407 } else {
1408 auto range = container_t::template insert_prefix_at<KT>(
1409 mut, container, key, 0, _insert_size, p_left_bound);
1410 return NXT_STAGE_T::template insert_new<KT>(mut, range, key, value);
1411 }
1412 } else {
1413 return proceed_insert_recursively<KT, SPLIT>(
1414 mut, container, key, value,
1415 position, stage, _insert_size, p_left_bound);
1416 }
1417 }
1418
1419 static std::ostream& dump(const container_t& container,
1420 std::ostream& os,
1421 const std::string& prefix,
1422 size_t& size,
1423 const char* p_start) {
1424 auto iter = iterator_t(container);
1425 assert(!iter.is_end());
1426 std::string prefix_blank(prefix.size(), ' ');
1427 const std::string* p_prefix = &prefix;
1428 size += iterator_t::header_size();
1429 do {
1430 std::ostringstream sos;
1431 sos << *p_prefix << iter.get_key() << ": ";
1432 std::string i_prefix = sos.str();
1433 if constexpr (!IS_BOTTOM) {
1434 auto nxt_container = iter.get_nxt_container();
1435 size += iter.size_to_nxt();
1436 NXT_STAGE_T::dump(nxt_container, os, i_prefix, size, p_start);
1437 } else {
1438 auto value_ptr = iter.get_p_value();
1439 int offset = reinterpret_cast<const char*>(value_ptr) - p_start;
1440 size += iter.size();
1441 os << "\n" << i_prefix;
1442 if constexpr (NODE_TYPE == node_type_t::LEAF) {
1443 os << *value_ptr;
1444 } else {
1445 os << "0x" << std::hex << value_ptr->value << std::dec;
1446 }
1447 os << " " << size << "B"
1448 << " @" << offset << "B";
1449 }
1450 if (iter.is_last()) {
1451 break;
1452 } else {
1453 ++iter;
1454 p_prefix = &prefix_blank;
1455 }
1456 } while (true);
1457 return os;
1458 }
1459
1460 static void validate(const container_t& container) {
1461 auto iter = iterator_t(container);
1462 assert(!iter.is_end());
1463 auto key = iter.get_key();
1464 do {
1465 if constexpr (!IS_BOTTOM) {
1466 auto nxt_container = iter.get_nxt_container();
1467 NXT_STAGE_T::validate(nxt_container);
1468 }
1469 if (iter.is_last()) {
1470 break;
1471 } else {
1472 ++iter;
1473 assert(compare_to(key, iter.get_key()) == MatchKindCMP::LT);
1474 key = iter.get_key();
1475 }
1476 } while (true);
1477 }
1478
1479 static void get_stats(const container_t& container, node_stats_t& stats,
1480 full_key_t<KeyT::VIEW>& index_key) {
1481 auto iter = iterator_t(container);
1482 assert(!iter.is_end());
1483 stats.size_overhead += iterator_t::header_size();
1484 do {
1485 index_key.replace(iter.get_key());
1486 stats.size_overhead += iter.size_overhead();
1487 if constexpr (!IS_BOTTOM) {
1488 auto nxt_container = iter.get_nxt_container();
1489 NXT_STAGE_T::get_stats(nxt_container, stats, index_key);
1490 } else {
1491 ++stats.num_kvs;
1492 size_t kv_logical_size = index_key.size_logical();
1493 size_t value_size;
1494 if constexpr (NODE_TYPE == node_type_t::LEAF) {
1495 value_size = iter.get_p_value()->allocation_size();
1496 } else {
1497 value_size = sizeof(value_t);
1498 }
1499 stats.size_value += value_size;
1500 kv_logical_size += value_size;
1501 stats.size_logical += kv_logical_size;
1502 }
1503 if (iter.is_last()) {
1504 break;
1505 } else {
1506 ++iter;
1507 }
1508 } while (true);
1509 }
1510
1511 template <bool GET_KEY, bool GET_VAL>
1512 static bool get_next_slot(
1513 const container_t& container, // IN
1514 position_t& pos, // IN&OUT
1515 full_key_t<KeyT::VIEW>* p_index_key, // OUT
1516 const value_t** pp_value) { // OUT
1517 auto iter = iterator_t(container);
1518 assert(!iter.is_end());
1519 iter.seek_at(pos.index);
1520 bool find_next;
1521 if constexpr (!IS_BOTTOM) {
1522 auto nxt_container = iter.get_nxt_container();
1523 find_next = NXT_STAGE_T::template get_next_slot<GET_KEY, GET_VAL>(
1524 nxt_container, pos.nxt, p_index_key, pp_value);
1525 } else {
1526 find_next = true;
1527 }
1528
1529 if (find_next) {
1530 if (iter.is_last()) {
1531 return true;
1532 } else {
1533 pos.index = iter.index() + 1;
1534 if constexpr (!IS_BOTTOM) {
1535 pos.nxt = NXT_STAGE_T::position_t::begin();
1536 }
1537 get_slot<GET_KEY, GET_VAL>(
1538 container, pos, p_index_key, pp_value);
1539 return false;
1540 }
1541 } else { // !find_next && !IS_BOTTOM
1542 if constexpr (GET_KEY) {
1543 assert(p_index_key);
1544 p_index_key->set(iter.get_key());
1545 } else {
1546 assert(!p_index_key);
1547 }
1548 return false;
1549 }
1550 }
1551
1552 template <bool GET_KEY, bool GET_VAL>
1553 static void get_prev_slot(
1554 const container_t& container, // IN
1555 position_t& pos, // IN&OUT
1556 full_key_t<KeyT::VIEW>* p_index_key, // OUT
1557 const value_t** pp_value) { // OUT
1558 assert(pos != position_t::begin());
1559 assert(!pos.is_end());
1560 auto& index = pos.index;
1561 auto iter = iterator_t(container);
1562 if constexpr (!IS_BOTTOM) {
1563 auto& nxt_pos = pos.nxt;
1564 if (nxt_pos == NXT_STAGE_T::position_t::begin()) {
1565 assert(index);
1566 --index;
1567 iter.seek_at(index);
1568 auto nxt_container = iter.get_nxt_container();
1569 NXT_STAGE_T::template get_largest_slot<true, GET_KEY, GET_VAL>(
1570 nxt_container, &nxt_pos, p_index_key, pp_value);
1571 } else {
1572 iter.seek_at(index);
1573 auto nxt_container = iter.get_nxt_container();
1574 NXT_STAGE_T::template get_prev_slot<GET_KEY, GET_VAL>(
1575 nxt_container, nxt_pos, p_index_key, pp_value);
1576 }
1577 } else {
1578 assert(index);
1579 --index;
1580 iter.seek_at(index);
1581 if constexpr (GET_VAL) {
1582 assert(pp_value);
1583 *pp_value = iter.get_p_value();
1584 } else {
1585 assert(!pp_value);
1586 }
1587 }
1588 if constexpr (GET_KEY) {
1589 p_index_key->set(iter.get_key());
1590 } else {
1591 assert(!p_index_key);
1592 }
1593 }
1594
1595 struct _BaseEmpty {};
1596 class _BaseWithNxtIterator {
1597 protected:
1598 typename NXT_STAGE_T::StagedIterator _nxt;
1599 };
1600 class StagedIterator
1601 : std::conditional_t<IS_BOTTOM, _BaseEmpty, _BaseWithNxtIterator> {
1602 public:
1603 StagedIterator() = default;
1604 bool valid() const { return iter.has_value(); }
1605 index_t index() const {
1606 return iter->index();
1607 }
1608 bool is_end() const { return iter->is_end(); }
1609 bool in_progress() const {
1610 assert(valid());
1611 assert(!is_end());
1612 if constexpr (!IS_BOTTOM) {
1613 if (this->_nxt.valid()) {
1614 if (this->_nxt.index() == 0) {
1615 return this->_nxt.in_progress();
1616 } else {
1617 return true;
1618 }
1619 } else {
1620 return false;
1621 }
1622 } else {
1623 return false;
1624 }
1625 }
1626 key_get_type get_key() const { return iter->get_key(); }
1627
1628 iterator_t& get() { return *iter; }
1629 void set(const container_t& container) {
1630 assert(!valid());
1631 iter = iterator_t(container);
1632 }
1633 void set_end() { iter->set_end(); }
1634 typename NXT_STAGE_T::StagedIterator& nxt() {
1635 if constexpr (!IS_BOTTOM) {
1636 if (!this->_nxt.valid()) {
1637 auto nxt_container = iter->get_nxt_container();
1638 this->_nxt.set(nxt_container);
1639 }
1640 return this->_nxt;
1641 } else {
1642 ceph_abort("impossible path");
1643 }
1644 }
1645 typename NXT_STAGE_T::StagedIterator& get_nxt() {
1646 if constexpr (!IS_BOTTOM) {
1647 return this->_nxt;
1648 } else {
1649 ceph_abort("impossible path");
1650 }
1651 }
1652 StagedIterator& operator++() {
1653 if (iter->is_last()) {
1654 iter->set_end();
1655 } else {
1656 ++(*iter);
1657 }
1658 if constexpr (!IS_BOTTOM) {
1659 this->_nxt.reset();
1660 }
1661 return *this;
1662 }
1663 void reset() {
1664 if (valid()) {
1665 iter.reset();
1666 if constexpr (!IS_BOTTOM) {
1667 this->_nxt.reset();
1668 }
1669 }
1670 }
1671 std::ostream& print(std::ostream& os, bool is_top) const {
1672 if (valid()) {
1673 if (iter->is_end()) {
1674 return os << "END";
1675 } else {
1676 os << index();
1677 }
1678 } else {
1679 if (is_top) {
1680 return os << "invalid StagedIterator!";
1681 } else {
1682 os << "0!";
1683 }
1684 }
1685 if constexpr (!IS_BOTTOM) {
1686 os << ", ";
1687 return this->_nxt.print(os, false);
1688 } else {
1689 return os;
1690 }
1691 }
1692 position_t get_pos() const {
1693 if (valid()) {
1694 if constexpr (IS_BOTTOM) {
1695 return position_t{index()};
1696 } else {
1697 return position_t{index(), this->_nxt.get_pos()};
1698 }
1699 } else {
1700 return position_t::begin();
1701 }
1702 }
1703 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
1704 uint8_t present = static_cast<bool>(iter);
1705 ceph::encode(present, encoded);
1706 if (iter.has_value()) {
1707 iter->encode(p_node_start, encoded);
1708 if constexpr (!IS_BOTTOM) {
1709 this->_nxt.encode(p_node_start, encoded);
1710 }
1711 }
1712 }
1713 static StagedIterator decode(const char* p_node_start,
1714 extent_len_t node_size,
1715 ceph::bufferlist::const_iterator& delta) {
1716 StagedIterator ret;
1717 uint8_t present;
1718 ceph::decode(present, delta);
1719 if (present) {
1720 ret.iter = iterator_t::decode(
1721 p_node_start, node_size, delta);
1722 if constexpr (!IS_BOTTOM) {
1723 ret._nxt = NXT_STAGE_T::StagedIterator::decode(
1724 p_node_start, node_size, delta);
1725 }
1726 }
1727 return ret;
1728 }
1729 friend std::ostream& operator<<(std::ostream& os, const StagedIterator& iter) {
1730 return iter.print(os, true);
1731 }
1732 private:
1733 std::optional<iterator_t> iter;
1734 };
1735
1736 static bool recursively_locate_split(
1737 size_t& current_size, size_t extra_size,
1738 size_t target_size, StagedIterator& split_at) {
1739 assert(current_size <= target_size);
1740 iterator_t& split_iter = split_at.get();
1741 current_size = split_iter.seek_split(current_size, extra_size, target_size);
1742 assert(current_size <= target_size);
1743 assert(!split_iter.is_end());
1744 if (split_iter.index() == 0) {
1745 extra_size += iterator_t::header_size();
1746 } else {
1747 extra_size = 0;
1748 }
1749 bool locate_nxt;
1750 if constexpr (!IS_BOTTOM) {
1751 locate_nxt = NXT_STAGE_T::recursively_locate_split(
1752 current_size, extra_size + split_iter.size_to_nxt(),
1753 target_size, split_at.nxt());
1754 } else { // IS_BOTTOM
1755 // located upper_bound, fair split strategy
1756 size_t nxt_size = split_iter.size() + extra_size;
1757 assert(current_size + nxt_size > target_size);
1758 if (current_size + nxt_size/2 < target_size) {
1759 // include next
1760 current_size += nxt_size;
1761 locate_nxt = true;
1762 } else {
1763 // exclude next
1764 locate_nxt = false;
1765 }
1766 }
1767 if (locate_nxt) {
1768 if (split_iter.is_last()) {
1769 return true;
1770 } else {
1771 ++split_at;
1772 return false;
1773 }
1774 } else {
1775 return false;
1776 }
1777 }
1778
1779 static bool recursively_locate_split_inserted(
1780 size_t& current_size, size_t extra_size, size_t target_size,
1781 position_t& insert_pos, match_stage_t insert_stage, size_t insert_size,
1782 std::optional<bool>& is_insert_left, StagedIterator& split_at) {
1783 assert(current_size <= target_size);
1784 assert(!is_insert_left.has_value());
1785 iterator_t& split_iter = split_at.get();
1786 auto& insert_index = insert_pos.index;
1787 if (insert_stage == STAGE) {
1788 current_size = split_iter.template seek_split_inserted<true>(
1789 current_size, extra_size, target_size,
1790 insert_index, insert_size, is_insert_left);
1791 assert(is_insert_left.has_value());
1792 assert(current_size <= target_size);
1793 if (split_iter.index() == 0) {
1794 if (insert_index == 0) {
1795 if (*is_insert_left == false) {
1796 extra_size += iterator_t::header_size();
1797 } else {
1798 extra_size = 0;
1799 }
1800 } else {
1801 extra_size += iterator_t::header_size();
1802 }
1803 } else {
1804 extra_size = 0;
1805 }
1806 if (*is_insert_left == false && split_iter.index() == insert_index) {
1807 // split_iter can be end
1808 // found the lower-bound of target_size
1809 // ...[s_index-1] |!| (i_index) [s_index]...
1810
1811 // located upper-bound, fair split strategy
1812 // look at the next slot (the insert item)
1813 size_t nxt_size = insert_size + extra_size;
1814 assert(current_size + nxt_size > target_size);
1815 if (current_size + nxt_size/2 < target_size) {
1816 // include next
1817 *is_insert_left = true;
1818 current_size += nxt_size;
1819 if (split_iter.is_end()) {
1820 // ...[s_index-1] (i_index) |!|
1821 return true;
1822 } else {
1823 return false;
1824 }
1825 } else {
1826 // exclude next
1827 return false;
1828 }
1829 } else {
1830 // Already considered insert effect in the current stage.
1831 // Look into the next stage to identify the target_size lower-bound w/o
1832 // insert effect.
1833 assert(!split_iter.is_end());
1834 bool locate_nxt;
1835 if constexpr (!IS_BOTTOM) {
1836 locate_nxt = NXT_STAGE_T::recursively_locate_split(
1837 current_size, extra_size + split_iter.size_to_nxt(),
1838 target_size, split_at.nxt());
1839 } else { // IS_BOTTOM
1840 // located upper-bound, fair split strategy
1841 // look at the next slot
1842 size_t nxt_size = split_iter.size() + extra_size;
1843 assert(current_size + nxt_size > target_size);
1844 if (current_size + nxt_size/2 < target_size) {
1845 // include next
1846 current_size += nxt_size;
1847 locate_nxt = true;
1848 } else {
1849 // exclude next
1850 locate_nxt = false;
1851 }
1852 }
1853 if (locate_nxt) {
1854 if (split_iter.is_last()) {
1855 auto end_index = split_iter.index() + 1;
1856 if (insert_index == INDEX_END) {
1857 insert_index = end_index;
1858 }
1859 assert(insert_index <= end_index);
1860 if (insert_index == end_index) {
1861 assert(*is_insert_left == false);
1862 split_iter.set_end();
1863 // ...[s_index-1] |!| (i_index)
1864 return false;
1865 } else {
1866 assert(*is_insert_left == true);
1867 return true;
1868 }
1869 } else {
1870 ++split_at;
1871 return false;
1872 }
1873 } else {
1874 return false;
1875 }
1876 }
1877 } else {
1878 if constexpr (!IS_BOTTOM) {
1879 assert(insert_stage < STAGE);
1880 current_size = split_iter.template seek_split_inserted<false>(
1881 current_size, extra_size, target_size,
1882 insert_index, insert_size, is_insert_left);
1883 assert(!split_iter.is_end());
1884 assert(current_size <= target_size);
1885 if (split_iter.index() == 0) {
1886 extra_size += iterator_t::header_size();
1887 } else {
1888 extra_size = 0;
1889 }
1890 bool locate_nxt;
1891 if (!is_insert_left.has_value()) {
1892 // Considered insert effect in the current stage, and insert happens
1893 // in the lower stage.
1894 // Look into the next stage to identify the target_size lower-bound w/
1895 // insert effect.
1896 assert(split_iter.index() == insert_index);
1897 locate_nxt = NXT_STAGE_T::recursively_locate_split_inserted(
1898 current_size, extra_size + split_iter.size_to_nxt(), target_size,
1899 insert_pos.nxt, insert_stage, insert_size,
1900 is_insert_left, split_at.nxt());
1901 assert(is_insert_left.has_value());
1902 #ifndef NDEBUG
1903 if (locate_nxt) {
1904 assert(*is_insert_left == true);
1905 }
1906 #endif
1907 } else {
1908 // is_insert_left.has_value() == true
1909 // Insert will *not* happen in the lower stage.
1910 // Need to look into the next stage to identify the target_size
1911 // lower-bound w/ insert effect
1912 assert(split_iter.index() != insert_index);
1913 locate_nxt = NXT_STAGE_T::recursively_locate_split(
1914 current_size, extra_size + split_iter.size_to_nxt(),
1915 target_size, split_at.nxt());
1916 #ifndef NDEBUG
1917 if (split_iter.index() < insert_index) {
1918 assert(*is_insert_left == false);
1919 } else {
1920 assert(*is_insert_left == true);
1921 }
1922 #endif
1923 }
1924 if (locate_nxt) {
1925 if (split_iter.is_last()) {
1926 return true;
1927 } else {
1928 ++split_at;
1929 return false;
1930 }
1931 } else {
1932 return false;
1933 }
1934 } else {
1935 ceph_abort("impossible path");
1936 return false;;
1937 }
1938 }
1939 }
1940
1941 /*
1942 * container appender type system
1943 * container_t::Appender(NodeExtentMutable& mut, char* p_append)
1944 * append(const container_t& src, index_t from, index_t items)
1945 * wrap() -> char*
1946 * IF !IS_BOTTOM:
1947 * open_nxt(const key_get_type&)
1948 * open_nxt(const full_key_t&)
1949 * -> std::tuple<NodeExtentMutable&, char*>
1950 * wrap_nxt(char* p_append)
1951 * ELSE
1952 * append(const full_key_t& key, const value_input_t& value)
1953 */
1954 template <KeyT KT>
1955 struct _BaseWithNxtAppender {
1956 typename NXT_STAGE_T::template StagedAppender<KT> _nxt;
1957 };
1958 template <KeyT KT>
1959 class StagedAppender
1960 : std::conditional_t<IS_BOTTOM, _BaseEmpty, _BaseWithNxtAppender<KT>> {
1961 public:
1962 StagedAppender() = default;
1963 ~StagedAppender() {
1964 assert(!require_wrap_nxt);
1965 assert(!valid());
1966 }
1967 bool valid() const { return appender.has_value(); }
1968 index_t index() const {
1969 assert(valid());
1970 return _index;
1971 }
1972 bool in_progress() const { return require_wrap_nxt; }
1973 // TODO: pass by reference
1974 void init_empty(NodeExtentMutable* p_mut, char* p_start) {
1975 assert(!valid());
1976 appender = typename container_t::template Appender<KT>(p_mut, p_start);
1977 _index = 0;
1978 }
1979 void init_tail(NodeExtentMutable* p_mut,
1980 const container_t& container,
1981 match_stage_t stage) {
1982 assert(!valid());
1983 auto iter = iterator_t(container);
1984 iter.seek_last();
1985 if (stage == STAGE) {
1986 appender = iter.template get_appender<KT>(p_mut);
1987 _index = iter.index() + 1;
1988 if constexpr (!IS_BOTTOM) {
1989 assert(!this->_nxt.valid());
1990 }
1991 } else {
1992 assert(stage < STAGE);
1993 if constexpr (!IS_BOTTOM) {
1994 appender = iter.template get_appender_opened<KT>(p_mut);
1995 _index = iter.index();
1996 require_wrap_nxt = true;
1997 auto nxt_container = iter.get_nxt_container();
1998 this->_nxt.init_tail(p_mut, nxt_container, stage);
1999 } else {
2000 ceph_abort("impossible path");
2001 }
2002 }
2003 }
2004 // possible to make src_iter end if to_index == INDEX_END
2005 void append_until(StagedIterator& src_iter, index_t& to_index) {
2006 assert(!require_wrap_nxt);
2007 auto s_index = src_iter.index();
2008 src_iter.get().template copy_out_until<KT>(*appender, to_index);
2009 assert(src_iter.index() == to_index);
2010 assert(to_index >= s_index);
2011 auto increment = (to_index - s_index);
2012 if (increment) {
2013 _index += increment;
2014 if constexpr (!IS_BOTTOM) {
2015 src_iter.get_nxt().reset();
2016 }
2017 }
2018 }
2019 void append(const full_key_t<KT>& key,
2020 const value_input_t& value, const value_t*& p_value) {
2021 assert(!require_wrap_nxt);
2022 if constexpr (!IS_BOTTOM) {
2023 auto& nxt = open_nxt(key);
2024 nxt.append(key, value, p_value);
2025 wrap_nxt();
2026 } else {
2027 appender->append(key, value, p_value);
2028 ++_index;
2029 }
2030 }
2031 char* wrap() {
2032 assert(valid());
2033 assert(_index > 0);
2034 if constexpr (!IS_BOTTOM) {
2035 if (require_wrap_nxt) {
2036 wrap_nxt();
2037 }
2038 }
2039 auto ret = appender->wrap();
2040 appender.reset();
2041 return ret;
2042 }
2043 typename NXT_STAGE_T::template StagedAppender<KT>&
2044 open_nxt(key_get_type paritial_key) {
2045 assert(!require_wrap_nxt);
2046 if constexpr (!IS_BOTTOM) {
2047 require_wrap_nxt = true;
2048 auto [p_mut, p_append] = appender->open_nxt(paritial_key);
2049 this->_nxt.init_empty(p_mut, p_append);
2050 return this->_nxt;
2051 } else {
2052 ceph_abort("impossible path");
2053 }
2054 }
2055 typename NXT_STAGE_T::template StagedAppender<KT>&
2056 open_nxt(const full_key_t<KT>& key) {
2057 assert(!require_wrap_nxt);
2058 if constexpr (!IS_BOTTOM) {
2059 require_wrap_nxt = true;
2060 auto [p_mut, p_append] = appender->open_nxt(key);
2061 this->_nxt.init_empty(p_mut, p_append);
2062 return this->_nxt;
2063 } else {
2064 ceph_abort("impossible path");
2065 }
2066 }
2067 typename NXT_STAGE_T::template StagedAppender<KT>& get_nxt() {
2068 if constexpr (!IS_BOTTOM) {
2069 assert(require_wrap_nxt);
2070 return this->_nxt;
2071 } else {
2072 ceph_abort("impossible path");
2073 }
2074 }
2075 void wrap_nxt() {
2076 if constexpr (!IS_BOTTOM) {
2077 assert(require_wrap_nxt);
2078 require_wrap_nxt = false;
2079 auto p_append = this->_nxt.wrap();
2080 appender->wrap_nxt(p_append);
2081 ++_index;
2082 } else {
2083 ceph_abort("impossible path");
2084 }
2085 }
2086 private:
2087 std::optional<typename container_t::template Appender<KT>> appender;
2088 index_t _index;
2089 bool require_wrap_nxt = false;
2090 };
2091
2092 template <KeyT KT>
2093 static void _append_range(
2094 StagedIterator& src_iter, StagedAppender<KT>& appender, index_t& to_index) {
2095 if (src_iter.is_end()) {
2096 // append done
2097 assert(to_index == INDEX_END);
2098 to_index = src_iter.index();
2099 } else if constexpr (!IS_BOTTOM) {
2100 if (appender.in_progress()) {
2101 // appender has appended something at the current item,
2102 // cannot append the current item as-a-whole
2103 index_t to_index_nxt = INDEX_END;
2104 NXT_STAGE_T::template _append_range<KT>(
2105 src_iter.nxt(), appender.get_nxt(), to_index_nxt);
2106 ++src_iter;
2107 appender.wrap_nxt();
2108 } else if (src_iter.in_progress()) {
2109 // src_iter is not at the beginning of the current item,
2110 // cannot append the current item as-a-whole
2111 index_t to_index_nxt = INDEX_END;
2112 NXT_STAGE_T::template _append_range<KT>(
2113 src_iter.get_nxt(), appender.open_nxt(src_iter.get_key()), to_index_nxt);
2114 ++src_iter;
2115 appender.wrap_nxt();
2116 } else {
2117 // we can safely append the current item as-a-whole
2118 }
2119 }
2120 appender.append_until(src_iter, to_index);
2121 }
2122
2123 template <KeyT KT>
2124 static void _append_into(StagedIterator& src_iter, StagedAppender<KT>& appender,
2125 position_t& position, match_stage_t stage) {
2126 assert(position.index == src_iter.index());
2127 // reaches the last item
2128 if (stage == STAGE) {
2129 // done, end recursion
2130 if constexpr (!IS_BOTTOM) {
2131 position.nxt = position_t::nxt_t::begin();
2132 }
2133 } else {
2134 assert(stage < STAGE);
2135 // proceed append in the next stage
2136 NXT_STAGE_T::template append_until<KT>(
2137 src_iter.nxt(), appender.open_nxt(src_iter.get_key()),
2138 position.nxt, stage);
2139 }
2140 }
2141
2142 template <KeyT KT>
2143 static void append_until(StagedIterator& src_iter, StagedAppender<KT>& appender,
2144 position_t& position, match_stage_t stage) {
2145 index_t from_index = src_iter.index();
2146 index_t& to_index = position.index;
2147 assert(from_index <= to_index);
2148 if constexpr (IS_BOTTOM) {
2149 assert(stage == STAGE);
2150 appender.append_until(src_iter, to_index);
2151 } else {
2152 assert(stage <= STAGE);
2153 if (src_iter.index() == to_index) {
2154 _append_into<KT>(src_iter, appender, position, stage);
2155 } else {
2156 if (to_index == INDEX_END) {
2157 assert(stage == STAGE);
2158 } else if (to_index == INDEX_LAST) {
2159 assert(stage < STAGE);
2160 }
2161 _append_range<KT>(src_iter, appender, to_index);
2162 _append_into<KT>(src_iter, appender, position, stage);
2163 }
2164 }
2165 to_index -= from_index;
2166 }
2167
2168 template <KeyT KT>
2169 static bool append_insert(
2170 const full_key_t<KT>& key, const value_input_t& value,
2171 StagedIterator& src_iter, StagedAppender<KT>& appender,
2172 bool is_front_insert, match_stage_t& stage, const value_t*& p_value) {
2173 assert(src_iter.valid());
2174 if (stage == STAGE) {
2175 appender.append(key, value, p_value);
2176 if (src_iter.is_end()) {
2177 return true;
2178 } else {
2179 return false;
2180 }
2181 } else {
2182 assert(stage < STAGE);
2183 if constexpr (!IS_BOTTOM) {
2184 auto nxt_is_end = NXT_STAGE_T::template append_insert<KT>(
2185 key, value, src_iter.get_nxt(), appender.get_nxt(),
2186 is_front_insert, stage, p_value);
2187 if (nxt_is_end) {
2188 appender.wrap_nxt();
2189 ++src_iter;
2190 if (is_front_insert) {
2191 stage = STAGE;
2192 }
2193 if (src_iter.is_end()) {
2194 return true;
2195 }
2196 }
2197 return false;
2198 } else {
2199 ceph_abort("impossible path");
2200 }
2201 }
2202 }
2203
2204 /* TrimType:
2205 * BEFORE: remove the entire container, normally means the according higher
2206 * stage iterator needs to be trimmed as-a-whole.
2207 * AFTER: retain the entire container, normally means the trim should be
2208 * start from the next iterator at the higher stage.
2209 * AT: trim happens in the current container, and the according higher
2210 * stage iterator needs to be adjusted by the trimmed size.
2211 */
2212 static std::tuple<TrimType, node_offset_t>
2213 recursively_trim(NodeExtentMutable& mut, StagedIterator& trim_at) {
2214 if (!trim_at.valid()) {
2215 return {TrimType::BEFORE, 0u};
2216 }
2217 if (trim_at.is_end()) {
2218 return {TrimType::AFTER, 0u};
2219 }
2220
2221 auto& iter = trim_at.get();
2222 if constexpr (!IS_BOTTOM) {
2223 auto [type, trimmed] = NXT_STAGE_T::recursively_trim(
2224 mut, trim_at.get_nxt());
2225 node_offset_t trim_size;
2226 if (type == TrimType::AFTER) {
2227 if (iter.is_last()) {
2228 return {TrimType::AFTER, 0u};
2229 }
2230 ++trim_at;
2231 trim_size = iter.trim_until(mut);
2232 } else if (type == TrimType::BEFORE) {
2233 if (iter.index() == 0) {
2234 return {TrimType::BEFORE, 0u};
2235 }
2236 trim_size = iter.trim_until(mut);
2237 } else {
2238 trim_size = iter.trim_at(mut, trimmed);
2239 }
2240 return {TrimType::AT, trim_size};
2241 } else {
2242 if (iter.index() == 0) {
2243 return {TrimType::BEFORE, 0u};
2244 } else {
2245 auto trimmed = iter.trim_until(mut);
2246 return {TrimType::AT, trimmed};
2247 }
2248 }
2249 }
2250
2251 static void trim(NodeExtentMutable& mut, StagedIterator& trim_at) {
2252 auto [type, trimmed] = recursively_trim(mut, trim_at);
2253 if (type == TrimType::BEFORE) {
2254 assert(trim_at.valid());
2255 auto& iter = trim_at.get();
2256 iter.trim_until(mut);
2257 }
2258 }
2259
2260 static std::optional<std::tuple<match_stage_t, node_offset_t, bool>>
2261 proceed_erase_recursively(
2262 NodeExtentMutable& mut,
2263 const container_t& container, // IN
2264 const char* p_left_bound, // IN
2265 position_t& pos) { // IN&OUT
2266 auto iter = iterator_t(container);
2267 auto& index = pos.index;
2268 assert(is_valid_index(index));
2269 iter.seek_at(index);
2270 bool is_last = iter.is_last();
2271
2272 if constexpr (!IS_BOTTOM) {
2273 auto nxt_container = iter.get_nxt_container();
2274 auto ret = NXT_STAGE_T::proceed_erase_recursively(
2275 mut, nxt_container, p_left_bound, pos.nxt);
2276 if (ret.has_value()) {
2277 // erased at lower level
2278 auto [r_stage, r_erase_size, r_done] = *ret;
2279 assert(r_erase_size != 0);
2280 iter.update_size(mut, -r_erase_size);
2281 if (r_done) {
2282 // done, the next_pos is calculated
2283 return ret;
2284 } else {
2285 if (is_last) {
2286 // need to find the next pos at upper stage
2287 return ret;
2288 } else {
2289 // done, calculate the next pos
2290 ++index;
2291 pos.nxt = NXT_STAGE_T::position_t::begin();
2292 return {{r_stage, r_erase_size, true}};
2293 }
2294 }
2295 }
2296 // not erased at lower level
2297 }
2298
2299 // not erased yet
2300 if (index == 0 && is_last) {
2301 // need to erase from the upper stage
2302 return std::nullopt;
2303 } else {
2304 auto erase_size = iter.erase(mut, p_left_bound);
2305 assert(erase_size != 0);
2306 if (is_last) {
2307 // need to find the next pos at upper stage
2308 return {{STAGE, erase_size, false}};
2309 } else {
2310 // done, calculate the next pos (should be correct already)
2311 if constexpr (!IS_BOTTOM) {
2312 assert(pos.nxt == NXT_STAGE_T::position_t::begin());
2313 }
2314 return {{STAGE, erase_size, true}};
2315 }
2316 }
2317 }
2318
2319 static match_stage_t erase(
2320 NodeExtentMutable& mut,
2321 const container_t& node_stage, // IN
2322 position_t& erase_pos) { // IN&OUT
2323 auto p_left_bound = node_stage.p_left_bound();
2324 auto ret = proceed_erase_recursively(
2325 mut, node_stage, p_left_bound, erase_pos);
2326 if (ret.has_value()) {
2327 auto [r_stage, r_erase_size, r_done] = *ret;
2328 std::ignore = r_erase_size;
2329 if (r_done) {
2330 assert(!erase_pos.is_end());
2331 return r_stage;
2332 } else {
2333 // erased the last kv
2334 erase_pos = position_t::end();
2335 return r_stage;
2336 }
2337 } else {
2338 assert(node_stage.keys() == 1);
2339 node_stage.erase_at(mut, node_stage, 0, p_left_bound);
2340 erase_pos = position_t::end();
2341 return STAGE;
2342 }
2343 }
2344
2345 static std::tuple<match_stage_t, node_offset_t> evaluate_merge(
2346 const full_key_t<KeyT::VIEW>& left_pivot_index,
2347 const container_t& right_container) {
2348 auto r_iter = iterator_t(right_container);
2349 r_iter.seek_at(0);
2350 node_offset_t compensate = r_iter.header_size();
2351 auto cmp = compare_to<KeyT::VIEW>(left_pivot_index, r_iter.get_key());
2352 if (cmp == MatchKindCMP::EQ) {
2353 if constexpr (!IS_BOTTOM) {
2354 // the index is equal, compensate and look at the lower stage
2355 compensate += r_iter.size_to_nxt();
2356 auto r_nxt_container = r_iter.get_nxt_container();
2357 auto [ret_stage, ret_compensate] = NXT_STAGE_T::evaluate_merge(
2358 left_pivot_index, r_nxt_container);
2359 compensate += ret_compensate;
2360 return {ret_stage, compensate};
2361 } else {
2362 ceph_abort("impossible path: left_pivot_key == right_first_key");
2363 }
2364 } else if (cmp == MatchKindCMP::LT) {
2365 // ok, do merge here
2366 return {STAGE, compensate};
2367 } else {
2368 ceph_abort("impossible path: left_pivot_key < right_first_key");
2369 }
2370 }
2371 };
2372
2373 /**
2374 * Configurations for struct staged
2375 *
2376 * staged_params_* assembles different container_t implementations (defined by
2377 * stated::_iterator_t) by STAGE, and constructs the final multi-stage
2378 * implementations for different node layouts defined by
2379 * node_extent_t<FieldType, NODE_TYPE>.
2380 *
2381 * The specialized implementations for different layouts are accessible through
2382 * the helper type node_to_stage_t<node_extent_t<FieldType, NODE_TYPE>>.
2383 *
2384 * Specifically, the settings of 8 layouts are:
2385 *
2386 * The layout (N0, LEAF/INTERNAL) has 3 stages:
2387 * - STAGE_LEFT: node_extent_t<node_fields_0_t, LEAF/INTERNAL>
2388 * - STAGE_STRING: item_iterator_t<LEAF/INTERNAL>
2389 * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL>
2390 *
2391 * The layout (N1, LEAF/INTERNAL) has 3 stages:
2392 * - STAGE_LEFT: node_extent_t<node_fields_1_t, LEAF/INTERNAL>
2393 * - STAGE_STRING: item_iterator_t<LEAF/INTERNAL>
2394 * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL>
2395 *
2396 * The layout (N2, LEAF/INTERNAL) has 2 stages:
2397 * - STAGE_STRING: node_extent_t<node_fields_2_t, LEAF/INTERNAL>
2398 * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL>
2399 *
2400 * The layout (N3, LEAF) has 1 stage:
2401 * - STAGE_RIGHT: node_extent_t<leaf_fields_3_t, LEAF>
2402 *
2403 * The layout (N3, INTERNAL) has 1 stage:
2404 * - STAGE_RIGHT: node_extent_t<internal_fields_3_t, INTERNAL>
2405 */
2406
2407 template <node_type_t _NODE_TYPE>
2408 struct staged_params_subitems {
2409 using container_t = sub_items_t<_NODE_TYPE>;
2410 static constexpr auto NODE_TYPE = _NODE_TYPE;
2411 static constexpr auto STAGE = STAGE_RIGHT;
2412
2413 // dummy type in order to make our type system work
2414 // any better solution to get rid of this?
2415 using next_param_t = staged_params_subitems<NODE_TYPE>;
2416 };
2417
2418 template <node_type_t _NODE_TYPE>
2419 struct staged_params_item_iterator {
2420 using container_t = item_iterator_t<_NODE_TYPE>;
2421 static constexpr auto NODE_TYPE = _NODE_TYPE;
2422 static constexpr auto STAGE = STAGE_STRING;
2423
2424 using next_param_t = staged_params_subitems<NODE_TYPE>;
2425 };
2426
2427 template <typename NodeType>
2428 struct staged_params_node_01 {
2429 using container_t = NodeType;
2430 static constexpr auto NODE_TYPE = NodeType::NODE_TYPE;
2431 static constexpr auto STAGE = STAGE_LEFT;
2432
2433 using next_param_t = staged_params_item_iterator<NODE_TYPE>;
2434 };
2435
2436 template <typename NodeType>
2437 struct staged_params_node_2 {
2438 using container_t = NodeType;
2439 static constexpr auto NODE_TYPE = NodeType::NODE_TYPE;
2440 static constexpr auto STAGE = STAGE_STRING;
2441
2442 using next_param_t = staged_params_subitems<NODE_TYPE>;
2443 };
2444
2445 template <typename NodeType>
2446 struct staged_params_node_3 {
2447 using container_t = NodeType;
2448 static constexpr auto NODE_TYPE = NodeType::NODE_TYPE;
2449 static constexpr auto STAGE = STAGE_RIGHT;
2450
2451 // dummy type in order to make our type system work
2452 // any better solution to get rid of this?
2453 using next_param_t = staged_params_node_3<NodeType>;
2454 };
2455
2456 template <typename NodeType, typename Enable = void> struct _node_to_stage_t;
2457 template <typename NodeType>
2458 struct _node_to_stage_t<NodeType,
2459 std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N0 ||
2460 NodeType::FIELD_TYPE == field_type_t::N1>> {
2461 using type = staged<staged_params_node_01<NodeType>>;
2462 };
2463 template <typename NodeType>
2464 struct _node_to_stage_t<NodeType,
2465 std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N2>> {
2466 using type = staged<staged_params_node_2<NodeType>>;
2467 };
2468 template <typename NodeType>
2469 struct _node_to_stage_t<NodeType,
2470 std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N3>> {
2471 using type = staged<staged_params_node_3<NodeType>>;
2472 };
2473 template <typename NodeType>
2474 using node_to_stage_t = typename _node_to_stage_t<NodeType>::type;
2475
2476 }