]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / crimson / os / seastore / onode_manager / staged-fltree / stages / sub_items_stage.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <variant>
7
8 #include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h"
9 #include "key_layout.h"
10 #include "stage_types.h"
11
12 namespace crimson::os::seastore::onode {
13
14 class NodeExtentMutable;
15
16 struct internal_sub_item_t {
17 const snap_gen_t& get_key() const { return key; }
18 const laddr_packed_t* get_p_value() const { return &value; }
19
20 snap_gen_t key;
21 laddr_packed_t value;
22 } __attribute__((packed));
23
24 /**
25 * internal_sub_items_t
26 *
27 * The STAGE_RIGHT implementation for internal node N0/N1/N2, implements staged
28 * contract as an indexable container to index snap-gen to child node
29 * addresses.
30 *
31 * The layout of the contaner storing n sub-items:
32 *
33 * # <--------- container range -----------> #
34 * #<~># sub-items [2, n) #
35 * # # <- sub-item 1 -> # <- sub-item 0 -> #
36 * #...# snap-gen | laddr # snap-gen | laddr #
37 * ^
38 * |
39 * p_first_item +
40 */
41 class internal_sub_items_t {
42 public:
43 using num_keys_t = index_t;
44
45 internal_sub_items_t(const memory_range_t& range) {
46 assert(range.p_start < range.p_end);
47 assert((range.p_end - range.p_start) % sizeof(internal_sub_item_t) == 0);
48 num_items = (range.p_end - range.p_start) / sizeof(internal_sub_item_t);
49 assert(num_items > 0);
50 auto _p_first_item = range.p_end - sizeof(internal_sub_item_t);
51 p_first_item = reinterpret_cast<const internal_sub_item_t*>(_p_first_item);
52 }
53
54 // container type system
55 using key_get_type = const snap_gen_t&;
56 static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE;
57 num_keys_t keys() const { return num_items; }
58 key_get_type operator[](index_t index) const {
59 assert(index < num_items);
60 return (p_first_item - index)->get_key();
61 }
62 node_offset_t size_before(index_t index) const {
63 size_t ret = index * sizeof(internal_sub_item_t);
64 assert(ret < NODE_BLOCK_SIZE);
65 return ret;
66 }
67 const laddr_packed_t* get_p_value(index_t index) const {
68 assert(index < num_items);
69 return (p_first_item - index)->get_p_value();
70 }
71 node_offset_t size_overhead_at(index_t index) const { return 0u; }
72 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
73 auto p_end = reinterpret_cast<const char*>(p_first_item) +
74 sizeof(internal_sub_item_t);
75 auto p_start = p_end - num_items * sizeof(internal_sub_item_t);
76 int start_offset = p_start - p_node_start;
77 int end_offset = p_end - p_node_start;
78 assert(start_offset > 0 &&
79 start_offset < end_offset &&
80 end_offset < NODE_BLOCK_SIZE);
81 ceph::encode(static_cast<node_offset_t>(start_offset), encoded);
82 ceph::encode(static_cast<node_offset_t>(end_offset), encoded);
83 }
84
85 static internal_sub_items_t decode(
86 const char* p_node_start, ceph::bufferlist::const_iterator& delta) {
87 node_offset_t start_offset;
88 ceph::decode(start_offset, delta);
89 node_offset_t end_offset;
90 ceph::decode(end_offset, delta);
91 assert(start_offset < end_offset);
92 assert(end_offset <= NODE_BLOCK_SIZE);
93 return internal_sub_items_t({p_node_start + start_offset,
94 p_node_start + end_offset});
95 }
96
97 static node_offset_t header_size() { return 0u; }
98
99 template <KeyT KT>
100 static node_offset_t estimate_insert(
101 const full_key_t<KT>&, const laddr_packed_t&) {
102 return sizeof(internal_sub_item_t);
103 }
104
105 template <KeyT KT>
106 static const laddr_packed_t* insert_at(
107 NodeExtentMutable&, const internal_sub_items_t&,
108 const full_key_t<KT>&, const laddr_packed_t&,
109 index_t index, node_offset_t size, const char* p_left_bound);
110
111 static node_offset_t trim_until(NodeExtentMutable&, internal_sub_items_t&, index_t);
112
113 template <KeyT KT>
114 class Appender;
115
116 private:
117 index_t num_items;
118 const internal_sub_item_t* p_first_item;
119 };
120
121 template <KeyT KT>
122 class internal_sub_items_t::Appender {
123 public:
124 Appender(NodeExtentMutable* p_mut, char* p_append)
125 : p_mut{p_mut}, p_append{p_append} {}
126 void append(const internal_sub_items_t& src, index_t from, index_t items);
127 void append(const full_key_t<KT>&, const laddr_packed_t&, const laddr_packed_t*&);
128 char* wrap() { return p_append; }
129 private:
130 NodeExtentMutable* p_mut;
131 char* p_append;
132 };
133
134 /**
135 * leaf_sub_items_t
136 *
137 * The STAGE_RIGHT implementation for leaf node N0/N1/N2, implements staged
138 * contract as an indexable container to index snap-gen to onode_t.
139 *
140 * The layout of the contaner storing n sub-items:
141 *
142 * # <------------------------ container range -------------------------------> #
143 * # <---------- sub-items ----------------> # <--- offsets ---------# #
144 * #<~># sub-items [2, n) #<~>| offsets [2, n) # #
145 * # # <- sub-item 1 -> # <- sub-item 0 -> # | # #
146 * #...# snap-gen | onode # snap-gen | onode #...| offset1 | offset0 # num_keys #
147 * ^ ^ ^
148 * | | |
149 * p_items_end + p_offsets + |
150 * p_num_keys +
151 */
152 class leaf_sub_items_t {
153 public:
154 // TODO: decide by NODE_BLOCK_SIZE, sizeof(snap_gen_t),
155 // and the minimal size of onode_t
156 using num_keys_t = uint8_t;
157
158 leaf_sub_items_t(const memory_range_t& range) {
159 assert(range.p_start < range.p_end);
160 auto _p_num_keys = range.p_end - sizeof(num_keys_t);
161 assert(range.p_start < _p_num_keys);
162 p_num_keys = reinterpret_cast<const num_keys_t*>(_p_num_keys);
163 assert(keys());
164 auto _p_offsets = _p_num_keys - sizeof(node_offset_t);
165 assert(range.p_start < _p_offsets);
166 p_offsets = reinterpret_cast<const node_offset_packed_t*>(_p_offsets);
167 p_items_end = reinterpret_cast<const char*>(&get_offset(keys() - 1));
168 assert(range.p_start < p_items_end);
169 assert(range.p_start == p_start());
170 }
171
172 bool operator==(const leaf_sub_items_t& x) {
173 return (p_num_keys == x.p_num_keys &&
174 p_offsets == x.p_offsets &&
175 p_items_end == x.p_items_end);
176 }
177
178 const char* p_start() const { return get_item_end(keys()); }
179
180 const node_offset_packed_t& get_offset(index_t index) const {
181 assert(index < keys());
182 return *(p_offsets - index);
183 }
184
185 const node_offset_t get_offset_to_end(index_t index) const {
186 assert(index <= keys());
187 return index == 0 ? 0 : get_offset(index - 1).value;
188 }
189
190 const char* get_item_start(index_t index) const {
191 return p_items_end - get_offset(index).value;
192 }
193
194 const char* get_item_end(index_t index) const {
195 return p_items_end - get_offset_to_end(index);
196 }
197
198 // container type system
199 using key_get_type = const snap_gen_t&;
200 static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE;
201 num_keys_t keys() const { return *p_num_keys; }
202 key_get_type operator[](index_t index) const {
203 assert(index < keys());
204 auto pointer = get_item_end(index);
205 assert(get_item_start(index) < pointer);
206 pointer -= sizeof(snap_gen_t);
207 assert(get_item_start(index) < pointer);
208 return *reinterpret_cast<const snap_gen_t*>(pointer);
209 }
210 node_offset_t size_before(index_t index) const {
211 assert(index <= keys());
212 size_t ret;
213 if (index == 0) {
214 ret = sizeof(num_keys_t);
215 } else {
216 --index;
217 ret = sizeof(num_keys_t) +
218 (index + 1) * sizeof(node_offset_t) +
219 get_offset(index).value;
220 }
221 assert(ret < NODE_BLOCK_SIZE);
222 return ret;
223 }
224 node_offset_t size_overhead_at(index_t index) const { return sizeof(node_offset_t); }
225 const onode_t* get_p_value(index_t index) const {
226 assert(index < keys());
227 auto pointer = get_item_start(index);
228 auto value = reinterpret_cast<const onode_t*>(pointer);
229 assert(pointer + value->size + sizeof(snap_gen_t) == get_item_end(index));
230 return value;
231 }
232 void encode(const char* p_node_start, ceph::bufferlist& encoded) const {
233 auto p_end = reinterpret_cast<const char*>(p_num_keys) +
234 sizeof(num_keys_t);
235 int start_offset = p_start() - p_node_start;
236 int end_offset = p_end - p_node_start;
237 assert(start_offset > 0 &&
238 start_offset < end_offset &&
239 end_offset < NODE_BLOCK_SIZE);
240 ceph::encode(static_cast<node_offset_t>(start_offset), encoded);
241 ceph::encode(static_cast<node_offset_t>(end_offset), encoded);
242 }
243
244 static leaf_sub_items_t decode(
245 const char* p_node_start, ceph::bufferlist::const_iterator& delta) {
246 node_offset_t start_offset;
247 ceph::decode(start_offset, delta);
248 node_offset_t end_offset;
249 ceph::decode(end_offset, delta);
250 assert(start_offset < end_offset);
251 assert(end_offset <= NODE_BLOCK_SIZE);
252 return leaf_sub_items_t({p_node_start + start_offset,
253 p_node_start + end_offset});
254 }
255
256 static node_offset_t header_size() { return sizeof(num_keys_t); }
257
258 template <KeyT KT>
259 static node_offset_t estimate_insert(const full_key_t<KT>&, const onode_t& value) {
260 return value.size + sizeof(snap_gen_t) + sizeof(node_offset_t);
261 }
262
263 template <KeyT KT>
264 static const onode_t* insert_at(
265 NodeExtentMutable&, const leaf_sub_items_t&,
266 const full_key_t<KT>&, const onode_t&,
267 index_t index, node_offset_t size, const char* p_left_bound);
268
269 static node_offset_t trim_until(NodeExtentMutable&, leaf_sub_items_t&, index_t index);
270
271 template <KeyT KT>
272 class Appender;
273
274 private:
275 // TODO: support unaligned access
276 const num_keys_t* p_num_keys;
277 const node_offset_packed_t* p_offsets;
278 const char* p_items_end;
279 };
280
281 constexpr index_t APPENDER_LIMIT = 3u;
282
283 template <KeyT KT>
284 class leaf_sub_items_t::Appender {
285 struct range_items_t {
286 index_t from;
287 index_t items;
288 };
289 struct kv_item_t {
290 const full_key_t<KT>* p_key;
291 const onode_t* p_value;
292 };
293 using var_t = std::variant<range_items_t, kv_item_t>;
294
295 public:
296 Appender(NodeExtentMutable* p_mut, char* p_append)
297 : p_mut{p_mut}, p_append{p_append} {
298 }
299
300 void append(const leaf_sub_items_t& src, index_t from, index_t items) {
301 assert(cnt <= APPENDER_LIMIT);
302 assert(from <= src.keys());
303 if (items == 0) {
304 return;
305 }
306 if (op_src) {
307 assert(*op_src == src);
308 } else {
309 op_src = src;
310 }
311 assert(from < src.keys());
312 assert(from + items <= src.keys());
313 appends[cnt] = range_items_t{from, items};
314 ++cnt;
315 }
316 void append(const full_key_t<KT>& key,
317 const onode_t& value, const onode_t*& p_value) {
318 assert(pp_value == nullptr);
319 assert(cnt <= APPENDER_LIMIT);
320 appends[cnt] = kv_item_t{&key, &value};
321 ++cnt;
322 pp_value = &p_value;
323 }
324 char* wrap();
325
326 private:
327 std::optional<leaf_sub_items_t> op_src;
328 const onode_t** pp_value = nullptr;
329 NodeExtentMutable* p_mut;
330 char* p_append;
331 var_t appends[APPENDER_LIMIT];
332 index_t cnt = 0;
333 };
334
335 template <node_type_t> struct _sub_items_t;
336 template<> struct _sub_items_t<node_type_t::INTERNAL> { using type = internal_sub_items_t; };
337 template<> struct _sub_items_t<node_type_t::LEAF> { using type = leaf_sub_items_t; };
338 template <node_type_t NODE_TYPE>
339 using sub_items_t = typename _sub_items_t<NODE_TYPE>::type;
340
341 }