1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2 // vim: ts=8 sw=2 smarttab
8 #include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h"
9 #include "key_layout.h"
10 #include "stage_types.h"
12 namespace crimson::os::seastore::onode
{
14 class NodeExtentMutable
;
16 struct internal_sub_item_t
{
17 const snap_gen_t
& get_key() const { return key
; }
18 const laddr_packed_t
* get_p_value() const { return &value
; }
22 } __attribute__((packed
));
25 * internal_sub_items_t
27 * The STAGE_RIGHT implementation for internal node N0/N1/N2, implements staged
28 * contract as an indexable container to index snap-gen to child node
31 * The layout of the contaner storing n sub-items:
33 * # <--------- container range -----------> #
34 * #<~># sub-items [2, n) #
35 * # # <- sub-item 1 -> # <- sub-item 0 -> #
36 * #...# snap-gen | laddr # snap-gen | laddr #
41 class internal_sub_items_t
{
43 using num_keys_t
= index_t
;
45 internal_sub_items_t(const memory_range_t
& range
) {
46 assert(range
.p_start
< range
.p_end
);
47 assert((range
.p_end
- range
.p_start
) % sizeof(internal_sub_item_t
) == 0);
48 num_items
= (range
.p_end
- range
.p_start
) / sizeof(internal_sub_item_t
);
49 assert(num_items
> 0);
50 auto _p_first_item
= range
.p_end
- sizeof(internal_sub_item_t
);
51 p_first_item
= reinterpret_cast<const internal_sub_item_t
*>(_p_first_item
);
54 // container type system
55 using key_get_type
= const snap_gen_t
&;
56 static constexpr auto CONTAINER_TYPE
= ContainerType::INDEXABLE
;
57 num_keys_t
keys() const { return num_items
; }
58 key_get_type
operator[](index_t index
) const {
59 assert(index
< num_items
);
60 return (p_first_item
- index
)->get_key();
62 node_offset_t
size_before(index_t index
) const {
63 size_t ret
= index
* sizeof(internal_sub_item_t
);
64 assert(ret
< NODE_BLOCK_SIZE
);
67 const laddr_packed_t
* get_p_value(index_t index
) const {
68 assert(index
< num_items
);
69 return (p_first_item
- index
)->get_p_value();
71 node_offset_t
size_overhead_at(index_t index
) const { return 0u; }
72 void encode(const char* p_node_start
, ceph::bufferlist
& encoded
) const {
73 auto p_end
= reinterpret_cast<const char*>(p_first_item
) +
74 sizeof(internal_sub_item_t
);
75 auto p_start
= p_end
- num_items
* sizeof(internal_sub_item_t
);
76 int start_offset
= p_start
- p_node_start
;
77 int end_offset
= p_end
- p_node_start
;
78 assert(start_offset
> 0 &&
79 start_offset
< end_offset
&&
80 end_offset
< NODE_BLOCK_SIZE
);
81 ceph::encode(static_cast<node_offset_t
>(start_offset
), encoded
);
82 ceph::encode(static_cast<node_offset_t
>(end_offset
), encoded
);
85 static internal_sub_items_t
decode(
86 const char* p_node_start
, ceph::bufferlist::const_iterator
& delta
) {
87 node_offset_t start_offset
;
88 ceph::decode(start_offset
, delta
);
89 node_offset_t end_offset
;
90 ceph::decode(end_offset
, delta
);
91 assert(start_offset
< end_offset
);
92 assert(end_offset
<= NODE_BLOCK_SIZE
);
93 return internal_sub_items_t({p_node_start
+ start_offset
,
94 p_node_start
+ end_offset
});
97 static node_offset_t
header_size() { return 0u; }
100 static node_offset_t
estimate_insert(
101 const full_key_t
<KT
>&, const laddr_packed_t
&) {
102 return sizeof(internal_sub_item_t
);
106 static const laddr_packed_t
* insert_at(
107 NodeExtentMutable
&, const internal_sub_items_t
&,
108 const full_key_t
<KT
>&, const laddr_packed_t
&,
109 index_t index
, node_offset_t size
, const char* p_left_bound
);
111 static node_offset_t
trim_until(NodeExtentMutable
&, internal_sub_items_t
&, index_t
);
118 const internal_sub_item_t
* p_first_item
;
122 class internal_sub_items_t::Appender
{
124 Appender(NodeExtentMutable
* p_mut
, char* p_append
)
125 : p_mut
{p_mut
}, p_append
{p_append
} {}
126 void append(const internal_sub_items_t
& src
, index_t from
, index_t items
);
127 void append(const full_key_t
<KT
>&, const laddr_packed_t
&, const laddr_packed_t
*&);
128 char* wrap() { return p_append
; }
130 NodeExtentMutable
* p_mut
;
137 * The STAGE_RIGHT implementation for leaf node N0/N1/N2, implements staged
138 * contract as an indexable container to index snap-gen to onode_t.
140 * The layout of the contaner storing n sub-items:
142 * # <------------------------ container range -------------------------------> #
143 * # <---------- sub-items ----------------> # <--- offsets ---------# #
144 * #<~># sub-items [2, n) #<~>| offsets [2, n) # #
145 * # # <- sub-item 1 -> # <- sub-item 0 -> # | # #
146 * #...# snap-gen | onode # snap-gen | onode #...| offset1 | offset0 # num_keys #
149 * p_items_end + p_offsets + |
152 class leaf_sub_items_t
{
154 // TODO: decide by NODE_BLOCK_SIZE, sizeof(snap_gen_t),
155 // and the minimal size of onode_t
156 using num_keys_t
= uint8_t;
158 leaf_sub_items_t(const memory_range_t
& range
) {
159 assert(range
.p_start
< range
.p_end
);
160 auto _p_num_keys
= range
.p_end
- sizeof(num_keys_t
);
161 assert(range
.p_start
< _p_num_keys
);
162 p_num_keys
= reinterpret_cast<const num_keys_t
*>(_p_num_keys
);
164 auto _p_offsets
= _p_num_keys
- sizeof(node_offset_t
);
165 assert(range
.p_start
< _p_offsets
);
166 p_offsets
= reinterpret_cast<const node_offset_packed_t
*>(_p_offsets
);
167 p_items_end
= reinterpret_cast<const char*>(&get_offset(keys() - 1));
168 assert(range
.p_start
< p_items_end
);
169 assert(range
.p_start
== p_start());
172 bool operator==(const leaf_sub_items_t
& x
) {
173 return (p_num_keys
== x
.p_num_keys
&&
174 p_offsets
== x
.p_offsets
&&
175 p_items_end
== x
.p_items_end
);
178 const char* p_start() const { return get_item_end(keys()); }
180 const node_offset_packed_t
& get_offset(index_t index
) const {
181 assert(index
< keys());
182 return *(p_offsets
- index
);
185 const node_offset_t
get_offset_to_end(index_t index
) const {
186 assert(index
<= keys());
187 return index
== 0 ? 0 : get_offset(index
- 1).value
;
190 const char* get_item_start(index_t index
) const {
191 return p_items_end
- get_offset(index
).value
;
194 const char* get_item_end(index_t index
) const {
195 return p_items_end
- get_offset_to_end(index
);
198 // container type system
199 using key_get_type
= const snap_gen_t
&;
200 static constexpr auto CONTAINER_TYPE
= ContainerType::INDEXABLE
;
201 num_keys_t
keys() const { return *p_num_keys
; }
202 key_get_type
operator[](index_t index
) const {
203 assert(index
< keys());
204 auto pointer
= get_item_end(index
);
205 assert(get_item_start(index
) < pointer
);
206 pointer
-= sizeof(snap_gen_t
);
207 assert(get_item_start(index
) < pointer
);
208 return *reinterpret_cast<const snap_gen_t
*>(pointer
);
210 node_offset_t
size_before(index_t index
) const {
211 assert(index
<= keys());
214 ret
= sizeof(num_keys_t
);
217 ret
= sizeof(num_keys_t
) +
218 (index
+ 1) * sizeof(node_offset_t
) +
219 get_offset(index
).value
;
221 assert(ret
< NODE_BLOCK_SIZE
);
224 node_offset_t
size_overhead_at(index_t index
) const { return sizeof(node_offset_t
); }
225 const onode_t
* get_p_value(index_t index
) const {
226 assert(index
< keys());
227 auto pointer
= get_item_start(index
);
228 auto value
= reinterpret_cast<const onode_t
*>(pointer
);
229 assert(pointer
+ value
->size
+ sizeof(snap_gen_t
) == get_item_end(index
));
232 void encode(const char* p_node_start
, ceph::bufferlist
& encoded
) const {
233 auto p_end
= reinterpret_cast<const char*>(p_num_keys
) +
235 int start_offset
= p_start() - p_node_start
;
236 int end_offset
= p_end
- p_node_start
;
237 assert(start_offset
> 0 &&
238 start_offset
< end_offset
&&
239 end_offset
< NODE_BLOCK_SIZE
);
240 ceph::encode(static_cast<node_offset_t
>(start_offset
), encoded
);
241 ceph::encode(static_cast<node_offset_t
>(end_offset
), encoded
);
244 static leaf_sub_items_t
decode(
245 const char* p_node_start
, ceph::bufferlist::const_iterator
& delta
) {
246 node_offset_t start_offset
;
247 ceph::decode(start_offset
, delta
);
248 node_offset_t end_offset
;
249 ceph::decode(end_offset
, delta
);
250 assert(start_offset
< end_offset
);
251 assert(end_offset
<= NODE_BLOCK_SIZE
);
252 return leaf_sub_items_t({p_node_start
+ start_offset
,
253 p_node_start
+ end_offset
});
256 static node_offset_t
header_size() { return sizeof(num_keys_t
); }
259 static node_offset_t
estimate_insert(const full_key_t
<KT
>&, const onode_t
& value
) {
260 return value
.size
+ sizeof(snap_gen_t
) + sizeof(node_offset_t
);
264 static const onode_t
* insert_at(
265 NodeExtentMutable
&, const leaf_sub_items_t
&,
266 const full_key_t
<KT
>&, const onode_t
&,
267 index_t index
, node_offset_t size
, const char* p_left_bound
);
269 static node_offset_t
trim_until(NodeExtentMutable
&, leaf_sub_items_t
&, index_t index
);
275 // TODO: support unaligned access
276 const num_keys_t
* p_num_keys
;
277 const node_offset_packed_t
* p_offsets
;
278 const char* p_items_end
;
281 constexpr index_t APPENDER_LIMIT
= 3u;
284 class leaf_sub_items_t::Appender
{
285 struct range_items_t
{
290 const full_key_t
<KT
>* p_key
;
291 const onode_t
* p_value
;
293 using var_t
= std::variant
<range_items_t
, kv_item_t
>;
296 Appender(NodeExtentMutable
* p_mut
, char* p_append
)
297 : p_mut
{p_mut
}, p_append
{p_append
} {
300 void append(const leaf_sub_items_t
& src
, index_t from
, index_t items
) {
301 assert(cnt
<= APPENDER_LIMIT
);
302 assert(from
<= src
.keys());
307 assert(*op_src
== src
);
311 assert(from
< src
.keys());
312 assert(from
+ items
<= src
.keys());
313 appends
[cnt
] = range_items_t
{from
, items
};
316 void append(const full_key_t
<KT
>& key
,
317 const onode_t
& value
, const onode_t
*& p_value
) {
318 assert(pp_value
== nullptr);
319 assert(cnt
<= APPENDER_LIMIT
);
320 appends
[cnt
] = kv_item_t
{&key
, &value
};
327 std::optional
<leaf_sub_items_t
> op_src
;
328 const onode_t
** pp_value
= nullptr;
329 NodeExtentMutable
* p_mut
;
331 var_t appends
[APPENDER_LIMIT
];
335 template <node_type_t
> struct _sub_items_t
;
336 template<> struct _sub_items_t
<node_type_t::INTERNAL
> { using type
= internal_sub_items_t
; };
337 template<> struct _sub_items_t
<node_type_t::LEAF
> { using type
= leaf_sub_items_t
; };
338 template <node_type_t NODE_TYPE
>
339 using sub_items_t
= typename _sub_items_t
<NODE_TYPE
>::type
;