]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <variant> | |
7 | ||
8 | #include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" | |
9 | #include "key_layout.h" | |
10 | #include "stage_types.h" | |
11 | ||
12 | namespace crimson::os::seastore::onode { | |
13 | ||
14 | class NodeExtentMutable; | |
15 | ||
16 | struct internal_sub_item_t { | |
17 | const snap_gen_t& get_key() const { return key; } | |
18 | const laddr_packed_t* get_p_value() const { return &value; } | |
19 | ||
20 | snap_gen_t key; | |
21 | laddr_packed_t value; | |
22 | } __attribute__((packed)); | |
23 | ||
24 | /** | |
25 | * internal_sub_items_t | |
26 | * | |
27 | * The STAGE_RIGHT implementation for internal node N0/N1/N2, implements staged | |
28 | * contract as an indexable container to index snap-gen to child node | |
29 | * addresses. | |
30 | * | |
31 | * The layout of the contaner storing n sub-items: | |
32 | * | |
33 | * # <--------- container range -----------> # | |
34 | * #<~># sub-items [2, n) # | |
35 | * # # <- sub-item 1 -> # <- sub-item 0 -> # | |
36 | * #...# snap-gen | laddr # snap-gen | laddr # | |
37 | * ^ | |
38 | * | | |
39 | * p_first_item + | |
40 | */ | |
41 | class internal_sub_items_t { | |
42 | public: | |
43 | using num_keys_t = index_t; | |
44 | ||
20effc67 TL |
45 | internal_sub_items_t(const container_range_t& _range) |
46 | : node_size{_range.node_size} { | |
47 | assert(is_valid_node_size(node_size)); | |
48 | auto& range = _range.range; | |
f67539c2 TL |
49 | assert(range.p_start < range.p_end); |
50 | assert((range.p_end - range.p_start) % sizeof(internal_sub_item_t) == 0); | |
51 | num_items = (range.p_end - range.p_start) / sizeof(internal_sub_item_t); | |
52 | assert(num_items > 0); | |
53 | auto _p_first_item = range.p_end - sizeof(internal_sub_item_t); | |
54 | p_first_item = reinterpret_cast<const internal_sub_item_t*>(_p_first_item); | |
55 | } | |
56 | ||
57 | // container type system | |
58 | using key_get_type = const snap_gen_t&; | |
59 | static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE; | |
60 | num_keys_t keys() const { return num_items; } | |
61 | key_get_type operator[](index_t index) const { | |
62 | assert(index < num_items); | |
63 | return (p_first_item - index)->get_key(); | |
64 | } | |
65 | node_offset_t size_before(index_t index) const { | |
66 | size_t ret = index * sizeof(internal_sub_item_t); | |
20effc67 | 67 | assert(ret < node_size); |
f67539c2 TL |
68 | return ret; |
69 | } | |
70 | const laddr_packed_t* get_p_value(index_t index) const { | |
71 | assert(index < num_items); | |
72 | return (p_first_item - index)->get_p_value(); | |
73 | } | |
74 | node_offset_t size_overhead_at(index_t index) const { return 0u; } | |
75 | void encode(const char* p_node_start, ceph::bufferlist& encoded) const { | |
76 | auto p_end = reinterpret_cast<const char*>(p_first_item) + | |
77 | sizeof(internal_sub_item_t); | |
78 | auto p_start = p_end - num_items * sizeof(internal_sub_item_t); | |
79 | int start_offset = p_start - p_node_start; | |
20effc67 TL |
80 | int stage_size = p_end - p_start; |
81 | assert(start_offset > 0); | |
82 | assert(stage_size > 0); | |
83 | assert(start_offset + stage_size < (int)node_size); | |
f67539c2 | 84 | ceph::encode(static_cast<node_offset_t>(start_offset), encoded); |
20effc67 | 85 | ceph::encode(static_cast<node_offset_t>(stage_size), encoded); |
f67539c2 TL |
86 | } |
87 | ||
88 | static internal_sub_items_t decode( | |
20effc67 TL |
89 | const char* p_node_start, |
90 | extent_len_t node_size, | |
91 | ceph::bufferlist::const_iterator& delta) { | |
f67539c2 TL |
92 | node_offset_t start_offset; |
93 | ceph::decode(start_offset, delta); | |
20effc67 TL |
94 | node_offset_t stage_size; |
95 | ceph::decode(stage_size, delta); | |
96 | assert(start_offset > 0); | |
97 | assert(stage_size > 0); | |
98 | assert((unsigned)start_offset + stage_size < node_size); | |
99 | return internal_sub_items_t({{p_node_start + start_offset, | |
100 | p_node_start + start_offset + stage_size}, | |
101 | node_size}); | |
f67539c2 TL |
102 | } |
103 | ||
104 | static node_offset_t header_size() { return 0u; } | |
105 | ||
106 | template <KeyT KT> | |
107 | static node_offset_t estimate_insert( | |
20effc67 | 108 | const full_key_t<KT>&, const laddr_t&) { |
f67539c2 TL |
109 | return sizeof(internal_sub_item_t); |
110 | } | |
111 | ||
112 | template <KeyT KT> | |
113 | static const laddr_packed_t* insert_at( | |
114 | NodeExtentMutable&, const internal_sub_items_t&, | |
20effc67 | 115 | const full_key_t<KT>&, const laddr_t&, |
f67539c2 TL |
116 | index_t index, node_offset_t size, const char* p_left_bound); |
117 | ||
118 | static node_offset_t trim_until(NodeExtentMutable&, internal_sub_items_t&, index_t); | |
119 | ||
20effc67 TL |
120 | static node_offset_t erase_at( |
121 | NodeExtentMutable&, const internal_sub_items_t&, index_t, const char*); | |
122 | ||
f67539c2 TL |
123 | template <KeyT KT> |
124 | class Appender; | |
125 | ||
126 | private: | |
20effc67 | 127 | extent_len_t node_size; |
f67539c2 TL |
128 | index_t num_items; |
129 | const internal_sub_item_t* p_first_item; | |
130 | }; | |
131 | ||
132 | template <KeyT KT> | |
133 | class internal_sub_items_t::Appender { | |
134 | public: | |
135 | Appender(NodeExtentMutable* p_mut, char* p_append) | |
136 | : p_mut{p_mut}, p_append{p_append} {} | |
20effc67 TL |
137 | Appender(NodeExtentMutable* p_mut, const internal_sub_items_t& sub_items) |
138 | : p_mut{p_mut}, | |
139 | p_append{(char*)(sub_items.p_first_item + 1 - sub_items.keys())} { | |
140 | assert(sub_items.keys()); | |
141 | } | |
f67539c2 | 142 | void append(const internal_sub_items_t& src, index_t from, index_t items); |
20effc67 | 143 | void append(const full_key_t<KT>&, const laddr_t&, const laddr_packed_t*&); |
f67539c2 TL |
144 | char* wrap() { return p_append; } |
145 | private: | |
146 | NodeExtentMutable* p_mut; | |
147 | char* p_append; | |
148 | }; | |
149 | ||
150 | /** | |
151 | * leaf_sub_items_t | |
152 | * | |
153 | * The STAGE_RIGHT implementation for leaf node N0/N1/N2, implements staged | |
20effc67 | 154 | * contract as an indexable container to index snap-gen to value_header_t. |
f67539c2 TL |
155 | * |
156 | * The layout of the contaner storing n sub-items: | |
157 | * | |
158 | * # <------------------------ container range -------------------------------> # | |
159 | * # <---------- sub-items ----------------> # <--- offsets ---------# # | |
160 | * #<~># sub-items [2, n) #<~>| offsets [2, n) # # | |
161 | * # # <- sub-item 1 -> # <- sub-item 0 -> # | # # | |
20effc67 | 162 | * #...# snap-gen | value # snap-gen | value #...| offset1 | offset0 # num_keys # |
f67539c2 TL |
163 | * ^ ^ ^ |
164 | * | | | | |
165 | * p_items_end + p_offsets + | | |
166 | * p_num_keys + | |
167 | */ | |
168 | class leaf_sub_items_t { | |
169 | public: | |
20effc67 TL |
170 | // should be enough to index all keys under 64 KiB node |
171 | using num_keys_t = uint16_t; | |
172 | ||
173 | // TODO: remove if num_keys_t is aligned | |
174 | struct num_keys_packed_t { | |
175 | num_keys_t value; | |
176 | } __attribute__((packed)); | |
177 | ||
178 | leaf_sub_items_t(const container_range_t& _range) | |
179 | : node_size{_range.node_size} { | |
180 | assert(is_valid_node_size(node_size)); | |
181 | auto& range = _range.range; | |
f67539c2 TL |
182 | assert(range.p_start < range.p_end); |
183 | auto _p_num_keys = range.p_end - sizeof(num_keys_t); | |
184 | assert(range.p_start < _p_num_keys); | |
20effc67 | 185 | p_num_keys = reinterpret_cast<const num_keys_packed_t*>(_p_num_keys); |
f67539c2 TL |
186 | assert(keys()); |
187 | auto _p_offsets = _p_num_keys - sizeof(node_offset_t); | |
188 | assert(range.p_start < _p_offsets); | |
189 | p_offsets = reinterpret_cast<const node_offset_packed_t*>(_p_offsets); | |
190 | p_items_end = reinterpret_cast<const char*>(&get_offset(keys() - 1)); | |
191 | assert(range.p_start < p_items_end); | |
192 | assert(range.p_start == p_start()); | |
193 | } | |
194 | ||
195 | bool operator==(const leaf_sub_items_t& x) { | |
196 | return (p_num_keys == x.p_num_keys && | |
197 | p_offsets == x.p_offsets && | |
198 | p_items_end == x.p_items_end); | |
199 | } | |
200 | ||
201 | const char* p_start() const { return get_item_end(keys()); } | |
202 | ||
203 | const node_offset_packed_t& get_offset(index_t index) const { | |
204 | assert(index < keys()); | |
205 | return *(p_offsets - index); | |
206 | } | |
207 | ||
208 | const node_offset_t get_offset_to_end(index_t index) const { | |
209 | assert(index <= keys()); | |
210 | return index == 0 ? 0 : get_offset(index - 1).value; | |
211 | } | |
212 | ||
213 | const char* get_item_start(index_t index) const { | |
214 | return p_items_end - get_offset(index).value; | |
215 | } | |
216 | ||
217 | const char* get_item_end(index_t index) const { | |
218 | return p_items_end - get_offset_to_end(index); | |
219 | } | |
220 | ||
221 | // container type system | |
222 | using key_get_type = const snap_gen_t&; | |
223 | static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE; | |
20effc67 | 224 | num_keys_t keys() const { return p_num_keys->value; } |
f67539c2 TL |
225 | key_get_type operator[](index_t index) const { |
226 | assert(index < keys()); | |
227 | auto pointer = get_item_end(index); | |
228 | assert(get_item_start(index) < pointer); | |
229 | pointer -= sizeof(snap_gen_t); | |
230 | assert(get_item_start(index) < pointer); | |
231 | return *reinterpret_cast<const snap_gen_t*>(pointer); | |
232 | } | |
233 | node_offset_t size_before(index_t index) const { | |
234 | assert(index <= keys()); | |
235 | size_t ret; | |
236 | if (index == 0) { | |
237 | ret = sizeof(num_keys_t); | |
238 | } else { | |
239 | --index; | |
240 | ret = sizeof(num_keys_t) + | |
241 | (index + 1) * sizeof(node_offset_t) + | |
242 | get_offset(index).value; | |
243 | } | |
20effc67 | 244 | assert(ret < node_size); |
f67539c2 TL |
245 | return ret; |
246 | } | |
247 | node_offset_t size_overhead_at(index_t index) const { return sizeof(node_offset_t); } | |
20effc67 | 248 | const value_header_t* get_p_value(index_t index) const { |
f67539c2 TL |
249 | assert(index < keys()); |
250 | auto pointer = get_item_start(index); | |
20effc67 TL |
251 | auto value = reinterpret_cast<const value_header_t*>(pointer); |
252 | assert(pointer + value->allocation_size() + sizeof(snap_gen_t) == | |
253 | get_item_end(index)); | |
f67539c2 TL |
254 | return value; |
255 | } | |
256 | void encode(const char* p_node_start, ceph::bufferlist& encoded) const { | |
257 | auto p_end = reinterpret_cast<const char*>(p_num_keys) + | |
258 | sizeof(num_keys_t); | |
259 | int start_offset = p_start() - p_node_start; | |
20effc67 TL |
260 | int stage_size = p_end - p_start(); |
261 | assert(start_offset > 0); | |
262 | assert(stage_size > 0); | |
263 | assert(start_offset + stage_size < (int)node_size); | |
f67539c2 | 264 | ceph::encode(static_cast<node_offset_t>(start_offset), encoded); |
20effc67 | 265 | ceph::encode(static_cast<node_offset_t>(stage_size), encoded); |
f67539c2 TL |
266 | } |
267 | ||
268 | static leaf_sub_items_t decode( | |
20effc67 TL |
269 | const char* p_node_start, |
270 | extent_len_t node_size, | |
271 | ceph::bufferlist::const_iterator& delta) { | |
f67539c2 TL |
272 | node_offset_t start_offset; |
273 | ceph::decode(start_offset, delta); | |
20effc67 TL |
274 | node_offset_t stage_size; |
275 | ceph::decode(stage_size, delta); | |
276 | assert(start_offset > 0); | |
277 | assert(stage_size > 0); | |
278 | assert((unsigned)start_offset + stage_size < node_size); | |
279 | return leaf_sub_items_t({{p_node_start + start_offset, | |
280 | p_node_start + start_offset + stage_size}, | |
281 | node_size}); | |
f67539c2 TL |
282 | } |
283 | ||
284 | static node_offset_t header_size() { return sizeof(num_keys_t); } | |
285 | ||
286 | template <KeyT KT> | |
20effc67 TL |
287 | static node_offset_t estimate_insert( |
288 | const full_key_t<KT>&, const value_config_t& value) { | |
289 | return value.allocation_size() + sizeof(snap_gen_t) + sizeof(node_offset_t); | |
f67539c2 TL |
290 | } |
291 | ||
292 | template <KeyT KT> | |
20effc67 | 293 | static const value_header_t* insert_at( |
f67539c2 | 294 | NodeExtentMutable&, const leaf_sub_items_t&, |
20effc67 | 295 | const full_key_t<KT>&, const value_config_t&, |
f67539c2 TL |
296 | index_t index, node_offset_t size, const char* p_left_bound); |
297 | ||
298 | static node_offset_t trim_until(NodeExtentMutable&, leaf_sub_items_t&, index_t index); | |
299 | ||
20effc67 TL |
300 | static node_offset_t erase_at( |
301 | NodeExtentMutable&, const leaf_sub_items_t&, index_t, const char*); | |
302 | ||
f67539c2 TL |
303 | template <KeyT KT> |
304 | class Appender; | |
305 | ||
306 | private: | |
20effc67 TL |
307 | extent_len_t node_size; |
308 | const num_keys_packed_t* p_num_keys; | |
f67539c2 TL |
309 | const node_offset_packed_t* p_offsets; |
310 | const char* p_items_end; | |
311 | }; | |
312 | ||
313 | constexpr index_t APPENDER_LIMIT = 3u; | |
314 | ||
315 | template <KeyT KT> | |
316 | class leaf_sub_items_t::Appender { | |
317 | struct range_items_t { | |
318 | index_t from; | |
319 | index_t items; | |
320 | }; | |
321 | struct kv_item_t { | |
322 | const full_key_t<KT>* p_key; | |
20effc67 | 323 | value_config_t value_config; |
f67539c2 TL |
324 | }; |
325 | using var_t = std::variant<range_items_t, kv_item_t>; | |
326 | ||
327 | public: | |
328 | Appender(NodeExtentMutable* p_mut, char* p_append) | |
329 | : p_mut{p_mut}, p_append{p_append} { | |
330 | } | |
20effc67 TL |
331 | Appender(NodeExtentMutable* p_mut, const leaf_sub_items_t& sub_items) |
332 | : p_mut{p_mut} , op_dst(sub_items) { | |
333 | assert(sub_items.keys()); | |
f67539c2 | 334 | } |
20effc67 TL |
335 | |
336 | void append(const leaf_sub_items_t& src, index_t from, index_t items); | |
f67539c2 | 337 | void append(const full_key_t<KT>& key, |
20effc67 TL |
338 | const value_config_t& value, const value_header_t*& p_value) { |
339 | // append from empty | |
340 | assert(p_append); | |
f67539c2 TL |
341 | assert(pp_value == nullptr); |
342 | assert(cnt <= APPENDER_LIMIT); | |
20effc67 | 343 | appends[cnt] = kv_item_t{&key, value}; |
f67539c2 TL |
344 | ++cnt; |
345 | pp_value = &p_value; | |
346 | } | |
347 | char* wrap(); | |
348 | ||
349 | private: | |
f67539c2 | 350 | NodeExtentMutable* p_mut; |
20effc67 TL |
351 | // append from empty |
352 | std::optional<leaf_sub_items_t> op_src; | |
353 | const value_header_t** pp_value = nullptr; | |
354 | char* p_append = nullptr; | |
f67539c2 TL |
355 | var_t appends[APPENDER_LIMIT]; |
356 | index_t cnt = 0; | |
20effc67 TL |
357 | // append from existing |
358 | std::optional<leaf_sub_items_t> op_dst; | |
359 | char* p_appended = nullptr; | |
f67539c2 TL |
360 | }; |
361 | ||
362 | template <node_type_t> struct _sub_items_t; | |
363 | template<> struct _sub_items_t<node_type_t::INTERNAL> { using type = internal_sub_items_t; }; | |
364 | template<> struct _sub_items_t<node_type_t::LEAF> { using type = leaf_sub_items_t; }; | |
365 | template <node_type_t NODE_TYPE> | |
366 | using sub_items_t = typename _sub_items_t<NODE_TYPE>::type; | |
367 | ||
368 | } |