]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / onode_manager / staged-fltree / stages / sub_items_stage.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
6#include <variant>
7
8#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h"
9#include "key_layout.h"
10#include "stage_types.h"
11
12namespace crimson::os::seastore::onode {
13
14class NodeExtentMutable;
15
16struct internal_sub_item_t {
17 const snap_gen_t& get_key() const { return key; }
18 const laddr_packed_t* get_p_value() const { return &value; }
19
20 snap_gen_t key;
21 laddr_packed_t value;
22} __attribute__((packed));
23
24/**
25 * internal_sub_items_t
26 *
27 * The STAGE_RIGHT implementation for internal node N0/N1/N2, implements staged
28 * contract as an indexable container to index snap-gen to child node
29 * addresses.
30 *
31 * The layout of the contaner storing n sub-items:
32 *
33 * # <--------- container range -----------> #
34 * #<~># sub-items [2, n) #
35 * # # <- sub-item 1 -> # <- sub-item 0 -> #
36 * #...# snap-gen | laddr # snap-gen | laddr #
37 * ^
38 * |
39 * p_first_item +
40 */
41class internal_sub_items_t {
42 public:
43 using num_keys_t = index_t;
44
20effc67
TL
45 internal_sub_items_t(const container_range_t& _range)
46 : node_size{_range.node_size} {
47 assert(is_valid_node_size(node_size));
48 auto& range = _range.range;
f67539c2
TL
49 assert(range.p_start < range.p_end);
50 assert((range.p_end - range.p_start) % sizeof(internal_sub_item_t) == 0);
51 num_items = (range.p_end - range.p_start) / sizeof(internal_sub_item_t);
52 assert(num_items > 0);
53 auto _p_first_item = range.p_end - sizeof(internal_sub_item_t);
54 p_first_item = reinterpret_cast<const internal_sub_item_t*>(_p_first_item);
55 }
56
57 // container type system
58 using key_get_type = const snap_gen_t&;
59 static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE;
60 num_keys_t keys() const { return num_items; }
61 key_get_type operator[](index_t index) const {
62 assert(index < num_items);
63 return (p_first_item - index)->get_key();
64 }
65 node_offset_t size_before(index_t index) const {
66 size_t ret = index * sizeof(internal_sub_item_t);
20effc67 67 assert(ret < node_size);
f67539c2
TL
68 return ret;
69 }
70 const laddr_packed_t* get_p_value(index_t index) const {
71 assert(index < num_items);
72 return (p_first_item - index)->get_p_value();
73 }
74 node_offset_t size_overhead_at(index_t index) const { return 0u; }
75 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
76 auto p_end = reinterpret_cast<const char*>(p_first_item) +
77 sizeof(internal_sub_item_t);
78 auto p_start = p_end - num_items * sizeof(internal_sub_item_t);
79 int start_offset = p_start - p_node_start;
20effc67
TL
80 int stage_size = p_end - p_start;
81 assert(start_offset > 0);
82 assert(stage_size > 0);
83 assert(start_offset + stage_size < (int)node_size);
f67539c2 84 ceph::encode(static_cast<node_offset_t>(start_offset), encoded);
20effc67 85 ceph::encode(static_cast<node_offset_t>(stage_size), encoded);
f67539c2
TL
86 }
87
88 static internal_sub_items_t decode(
20effc67
TL
89 const char* p_node_start,
90 extent_len_t node_size,
91 ceph::bufferlist::const_iterator& delta) {
f67539c2
TL
92 node_offset_t start_offset;
93 ceph::decode(start_offset, delta);
20effc67
TL
94 node_offset_t stage_size;
95 ceph::decode(stage_size, delta);
96 assert(start_offset > 0);
97 assert(stage_size > 0);
98 assert((unsigned)start_offset + stage_size < node_size);
99 return internal_sub_items_t({{p_node_start + start_offset,
100 p_node_start + start_offset + stage_size},
101 node_size});
f67539c2
TL
102 }
103
104 static node_offset_t header_size() { return 0u; }
105
106 template <KeyT KT>
107 static node_offset_t estimate_insert(
20effc67 108 const full_key_t<KT>&, const laddr_t&) {
f67539c2
TL
109 return sizeof(internal_sub_item_t);
110 }
111
112 template <KeyT KT>
113 static const laddr_packed_t* insert_at(
114 NodeExtentMutable&, const internal_sub_items_t&,
20effc67 115 const full_key_t<KT>&, const laddr_t&,
f67539c2
TL
116 index_t index, node_offset_t size, const char* p_left_bound);
117
118 static node_offset_t trim_until(NodeExtentMutable&, internal_sub_items_t&, index_t);
119
20effc67
TL
120 static node_offset_t erase_at(
121 NodeExtentMutable&, const internal_sub_items_t&, index_t, const char*);
122
f67539c2
TL
123 template <KeyT KT>
124 class Appender;
125
126 private:
20effc67 127 extent_len_t node_size;
f67539c2
TL
128 index_t num_items;
129 const internal_sub_item_t* p_first_item;
130};
131
132template <KeyT KT>
133class internal_sub_items_t::Appender {
134 public:
135 Appender(NodeExtentMutable* p_mut, char* p_append)
136 : p_mut{p_mut}, p_append{p_append} {}
20effc67
TL
137 Appender(NodeExtentMutable* p_mut, const internal_sub_items_t& sub_items)
138 : p_mut{p_mut},
139 p_append{(char*)(sub_items.p_first_item + 1 - sub_items.keys())} {
140 assert(sub_items.keys());
141 }
f67539c2 142 void append(const internal_sub_items_t& src, index_t from, index_t items);
20effc67 143 void append(const full_key_t<KT>&, const laddr_t&, const laddr_packed_t*&);
f67539c2
TL
144 char* wrap() { return p_append; }
145 private:
146 NodeExtentMutable* p_mut;
147 char* p_append;
148};
149
150/**
151 * leaf_sub_items_t
152 *
153 * The STAGE_RIGHT implementation for leaf node N0/N1/N2, implements staged
20effc67 154 * contract as an indexable container to index snap-gen to value_header_t.
f67539c2
TL
155 *
156 * The layout of the contaner storing n sub-items:
157 *
158 * # <------------------------ container range -------------------------------> #
159 * # <---------- sub-items ----------------> # <--- offsets ---------# #
160 * #<~># sub-items [2, n) #<~>| offsets [2, n) # #
161 * # # <- sub-item 1 -> # <- sub-item 0 -> # | # #
20effc67 162 * #...# snap-gen | value # snap-gen | value #...| offset1 | offset0 # num_keys #
f67539c2
TL
163 * ^ ^ ^
164 * | | |
165 * p_items_end + p_offsets + |
166 * p_num_keys +
167 */
168class leaf_sub_items_t {
169 public:
20effc67
TL
170 // should be enough to index all keys under 64 KiB node
171 using num_keys_t = uint16_t;
172
173 // TODO: remove if num_keys_t is aligned
174 struct num_keys_packed_t {
175 num_keys_t value;
176 } __attribute__((packed));
177
178 leaf_sub_items_t(const container_range_t& _range)
179 : node_size{_range.node_size} {
180 assert(is_valid_node_size(node_size));
181 auto& range = _range.range;
f67539c2
TL
182 assert(range.p_start < range.p_end);
183 auto _p_num_keys = range.p_end - sizeof(num_keys_t);
184 assert(range.p_start < _p_num_keys);
20effc67 185 p_num_keys = reinterpret_cast<const num_keys_packed_t*>(_p_num_keys);
f67539c2
TL
186 assert(keys());
187 auto _p_offsets = _p_num_keys - sizeof(node_offset_t);
188 assert(range.p_start < _p_offsets);
189 p_offsets = reinterpret_cast<const node_offset_packed_t*>(_p_offsets);
190 p_items_end = reinterpret_cast<const char*>(&get_offset(keys() - 1));
191 assert(range.p_start < p_items_end);
192 assert(range.p_start == p_start());
193 }
194
195 bool operator==(const leaf_sub_items_t& x) {
196 return (p_num_keys == x.p_num_keys &&
197 p_offsets == x.p_offsets &&
198 p_items_end == x.p_items_end);
199 }
200
201 const char* p_start() const { return get_item_end(keys()); }
202
203 const node_offset_packed_t& get_offset(index_t index) const {
204 assert(index < keys());
205 return *(p_offsets - index);
206 }
207
208 const node_offset_t get_offset_to_end(index_t index) const {
209 assert(index <= keys());
210 return index == 0 ? 0 : get_offset(index - 1).value;
211 }
212
213 const char* get_item_start(index_t index) const {
214 return p_items_end - get_offset(index).value;
215 }
216
217 const char* get_item_end(index_t index) const {
218 return p_items_end - get_offset_to_end(index);
219 }
220
221 // container type system
222 using key_get_type = const snap_gen_t&;
223 static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE;
20effc67 224 num_keys_t keys() const { return p_num_keys->value; }
f67539c2
TL
225 key_get_type operator[](index_t index) const {
226 assert(index < keys());
227 auto pointer = get_item_end(index);
228 assert(get_item_start(index) < pointer);
229 pointer -= sizeof(snap_gen_t);
230 assert(get_item_start(index) < pointer);
231 return *reinterpret_cast<const snap_gen_t*>(pointer);
232 }
233 node_offset_t size_before(index_t index) const {
234 assert(index <= keys());
235 size_t ret;
236 if (index == 0) {
237 ret = sizeof(num_keys_t);
238 } else {
239 --index;
240 ret = sizeof(num_keys_t) +
241 (index + 1) * sizeof(node_offset_t) +
242 get_offset(index).value;
243 }
20effc67 244 assert(ret < node_size);
f67539c2
TL
245 return ret;
246 }
247 node_offset_t size_overhead_at(index_t index) const { return sizeof(node_offset_t); }
20effc67 248 const value_header_t* get_p_value(index_t index) const {
f67539c2
TL
249 assert(index < keys());
250 auto pointer = get_item_start(index);
20effc67
TL
251 auto value = reinterpret_cast<const value_header_t*>(pointer);
252 assert(pointer + value->allocation_size() + sizeof(snap_gen_t) ==
253 get_item_end(index));
f67539c2
TL
254 return value;
255 }
256 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
257 auto p_end = reinterpret_cast<const char*>(p_num_keys) +
258 sizeof(num_keys_t);
259 int start_offset = p_start() - p_node_start;
20effc67
TL
260 int stage_size = p_end - p_start();
261 assert(start_offset > 0);
262 assert(stage_size > 0);
263 assert(start_offset + stage_size < (int)node_size);
f67539c2 264 ceph::encode(static_cast<node_offset_t>(start_offset), encoded);
20effc67 265 ceph::encode(static_cast<node_offset_t>(stage_size), encoded);
f67539c2
TL
266 }
267
268 static leaf_sub_items_t decode(
20effc67
TL
269 const char* p_node_start,
270 extent_len_t node_size,
271 ceph::bufferlist::const_iterator& delta) {
f67539c2
TL
272 node_offset_t start_offset;
273 ceph::decode(start_offset, delta);
20effc67
TL
274 node_offset_t stage_size;
275 ceph::decode(stage_size, delta);
276 assert(start_offset > 0);
277 assert(stage_size > 0);
278 assert((unsigned)start_offset + stage_size < node_size);
279 return leaf_sub_items_t({{p_node_start + start_offset,
280 p_node_start + start_offset + stage_size},
281 node_size});
f67539c2
TL
282 }
283
284 static node_offset_t header_size() { return sizeof(num_keys_t); }
285
286 template <KeyT KT>
20effc67
TL
287 static node_offset_t estimate_insert(
288 const full_key_t<KT>&, const value_config_t& value) {
289 return value.allocation_size() + sizeof(snap_gen_t) + sizeof(node_offset_t);
f67539c2
TL
290 }
291
292 template <KeyT KT>
20effc67 293 static const value_header_t* insert_at(
f67539c2 294 NodeExtentMutable&, const leaf_sub_items_t&,
20effc67 295 const full_key_t<KT>&, const value_config_t&,
f67539c2
TL
296 index_t index, node_offset_t size, const char* p_left_bound);
297
298 static node_offset_t trim_until(NodeExtentMutable&, leaf_sub_items_t&, index_t index);
299
20effc67
TL
300 static node_offset_t erase_at(
301 NodeExtentMutable&, const leaf_sub_items_t&, index_t, const char*);
302
f67539c2
TL
303 template <KeyT KT>
304 class Appender;
305
306 private:
20effc67
TL
307 extent_len_t node_size;
308 const num_keys_packed_t* p_num_keys;
f67539c2
TL
309 const node_offset_packed_t* p_offsets;
310 const char* p_items_end;
311};
312
313constexpr index_t APPENDER_LIMIT = 3u;
314
315template <KeyT KT>
316class leaf_sub_items_t::Appender {
317 struct range_items_t {
318 index_t from;
319 index_t items;
320 };
321 struct kv_item_t {
322 const full_key_t<KT>* p_key;
20effc67 323 value_config_t value_config;
f67539c2
TL
324 };
325 using var_t = std::variant<range_items_t, kv_item_t>;
326
327 public:
328 Appender(NodeExtentMutable* p_mut, char* p_append)
329 : p_mut{p_mut}, p_append{p_append} {
330 }
20effc67
TL
331 Appender(NodeExtentMutable* p_mut, const leaf_sub_items_t& sub_items)
332 : p_mut{p_mut} , op_dst(sub_items) {
333 assert(sub_items.keys());
f67539c2 334 }
20effc67
TL
335
336 void append(const leaf_sub_items_t& src, index_t from, index_t items);
f67539c2 337 void append(const full_key_t<KT>& key,
20effc67
TL
338 const value_config_t& value, const value_header_t*& p_value) {
339 // append from empty
340 assert(p_append);
f67539c2
TL
341 assert(pp_value == nullptr);
342 assert(cnt <= APPENDER_LIMIT);
20effc67 343 appends[cnt] = kv_item_t{&key, value};
f67539c2
TL
344 ++cnt;
345 pp_value = &p_value;
346 }
347 char* wrap();
348
349 private:
f67539c2 350 NodeExtentMutable* p_mut;
20effc67
TL
351 // append from empty
352 std::optional<leaf_sub_items_t> op_src;
353 const value_header_t** pp_value = nullptr;
354 char* p_append = nullptr;
f67539c2
TL
355 var_t appends[APPENDER_LIMIT];
356 index_t cnt = 0;
20effc67
TL
357 // append from existing
358 std::optional<leaf_sub_items_t> op_dst;
359 char* p_appended = nullptr;
f67539c2
TL
360};
361
362template <node_type_t> struct _sub_items_t;
363template<> struct _sub_items_t<node_type_t::INTERNAL> { using type = internal_sub_items_t; };
364template<> struct _sub_items_t<node_type_t::LEAF> { using type = leaf_sub_items_t; };
365template <node_type_t NODE_TYPE>
366using sub_items_t = typename _sub_items_t<NODE_TYPE>::type;
367
368}