]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <ostream> | |
7 | ||
8 | #include "include/buffer.h" | |
9 | #include "crimson/common/type_helpers.h" | |
10 | ||
11 | #include "fwd.h" | |
12 | #include "node_extent_mutable.h" | |
13 | ||
14 | namespace crimson::os::seastore::onode { | |
15 | ||
16 | // value size up to 64 KiB | |
17 | using value_size_t = uint16_t; | |
18 | enum class value_magic_t : uint8_t { | |
19 | ONODE = 0x52, | |
20 | TEST_UNBOUND, | |
21 | TEST_BOUNDED, | |
22 | TEST_EXTENDED, | |
23 | }; | |
24 | inline std::ostream& operator<<(std::ostream& os, const value_magic_t& magic) { | |
25 | switch (magic) { | |
26 | case value_magic_t::ONODE: | |
27 | return os << "ONODE"; | |
28 | case value_magic_t::TEST_UNBOUND: | |
29 | return os << "TEST_UNBOUND"; | |
30 | case value_magic_t::TEST_BOUNDED: | |
31 | return os << "TEST_BOUNDED"; | |
32 | case value_magic_t::TEST_EXTENDED: | |
33 | return os << "TEST_EXTENDED"; | |
34 | default: | |
35 | return os << "UNKNOWN(" << magic << ")"; | |
36 | } | |
37 | } | |
38 | ||
39 | /** | |
40 | * value_config_t | |
41 | * | |
42 | * Parameters to create a value. | |
43 | */ | |
44 | struct value_config_t { | |
45 | value_magic_t magic; | |
46 | value_size_t payload_size; | |
47 | ||
48 | value_size_t allocation_size() const; | |
49 | ||
50 | void encode(ceph::bufferlist& encoded) const { | |
51 | ceph::encode(magic, encoded); | |
52 | ceph::encode(payload_size, encoded); | |
53 | } | |
54 | ||
55 | static value_config_t decode(ceph::bufferlist::const_iterator& delta) { | |
56 | value_magic_t magic; | |
57 | ceph::decode(magic, delta); | |
58 | value_size_t payload_size; | |
59 | ceph::decode(payload_size, delta); | |
60 | return {magic, payload_size}; | |
61 | } | |
62 | }; | |
63 | inline std::ostream& operator<<(std::ostream& os, const value_config_t& conf) { | |
64 | return os << "ValueConf(" << conf.magic | |
65 | << ", " << conf.payload_size << "B)"; | |
66 | } | |
67 | ||
68 | /** | |
69 | * value_header_t | |
70 | * | |
71 | * The header structure in value layout. | |
72 | * | |
73 | * Value layout: | |
74 | * | |
75 | * # <- alloc size -> # | |
76 | * # header | payload # | |
77 | */ | |
78 | struct value_header_t { | |
79 | value_magic_t magic; | |
80 | value_size_t payload_size; | |
81 | ||
82 | bool operator==(const value_header_t& rhs) const { | |
83 | return (magic == rhs.magic && payload_size == rhs.payload_size); | |
84 | } | |
85 | bool operator!=(const value_header_t& rhs) const { | |
86 | return !(*this == rhs); | |
87 | } | |
88 | ||
89 | value_size_t allocation_size() const { | |
90 | return payload_size + sizeof(value_header_t); | |
91 | } | |
92 | ||
93 | const char* get_payload() const { | |
94 | return reinterpret_cast<const char*>(this) + sizeof(value_header_t); | |
95 | } | |
96 | ||
97 | NodeExtentMutable get_payload_mutable(NodeExtentMutable& node) const { | |
98 | return node.get_mutable_absolute(get_payload(), payload_size); | |
99 | } | |
100 | ||
101 | char* get_payload() { | |
102 | return reinterpret_cast<char*>(this) + sizeof(value_header_t); | |
103 | } | |
104 | ||
105 | void initiate(NodeExtentMutable& mut, const value_config_t& config) { | |
106 | value_header_t header{config.magic, config.payload_size}; | |
107 | mut.copy_in_absolute(this, header); | |
108 | mut.set_absolute(get_payload(), 0, config.payload_size); | |
109 | } | |
110 | ||
111 | static value_size_t estimate_allocation_size(value_size_t payload_size) { | |
112 | return payload_size + sizeof(value_header_t); | |
113 | } | |
114 | } __attribute__((packed)); | |
115 | inline std::ostream& operator<<(std::ostream& os, const value_header_t& header) { | |
116 | return os << "Value(" << header.magic | |
117 | << ", " << header.payload_size << "B)"; | |
118 | } | |
119 | ||
120 | inline value_size_t value_config_t::allocation_size() const { | |
121 | return value_header_t::estimate_allocation_size(payload_size); | |
122 | } | |
123 | ||
124 | /** | |
125 | * ValueDeltaRecorder | |
126 | * | |
127 | * An abstracted class to handle user-defined value delta encode, decode and | |
128 | * replay. | |
129 | */ | |
130 | class ValueDeltaRecorder { | |
131 | public: | |
132 | virtual ~ValueDeltaRecorder() = default; | |
133 | ValueDeltaRecorder(const ValueDeltaRecorder&) = delete; | |
134 | ValueDeltaRecorder(ValueDeltaRecorder&&) = delete; | |
135 | ValueDeltaRecorder& operator=(const ValueDeltaRecorder&) = delete; | |
136 | ValueDeltaRecorder& operator=(ValueDeltaRecorder&&) = delete; | |
137 | ||
138 | /// Returns the value header magic for validation purpose. | |
139 | virtual value_magic_t get_header_magic() const = 0; | |
140 | ||
141 | /// Called by DeltaRecorderT to apply user-defined value delta. | |
142 | virtual void apply_value_delta(ceph::bufferlist::const_iterator&, | |
143 | NodeExtentMutable&, | |
144 | laddr_t) = 0; | |
145 | ||
146 | protected: | |
147 | ValueDeltaRecorder(ceph::bufferlist& encoded) : encoded{encoded} {} | |
148 | ||
149 | /// Get the delta buffer to encode user-defined value delta. | |
150 | ceph::bufferlist& get_encoded(NodeExtentMutable&); | |
151 | ||
152 | private: | |
153 | ceph::bufferlist& encoded; | |
154 | }; | |
155 | ||
156 | /** | |
157 | * tree_conf_t | |
158 | * | |
159 | * Hard limits and compile-time configurations. | |
160 | */ | |
161 | struct tree_conf_t { | |
162 | value_magic_t value_magic; | |
163 | string_size_t max_ns_size; | |
164 | string_size_t max_oid_size; | |
165 | value_size_t max_value_payload_size; | |
166 | extent_len_t internal_node_size; | |
167 | extent_len_t leaf_node_size; | |
168 | bool do_split_check = true; | |
169 | }; | |
170 | ||
171 | class tree_cursor_t; | |
172 | /** | |
173 | * Value | |
174 | * | |
175 | * Value is a stateless view of the underlying value header and payload content | |
176 | * stored in a tree leaf node, with the support to implement user-defined value | |
177 | * deltas and to extend and trim the underlying payload data (not implemented | |
178 | * yet). | |
179 | * | |
180 | * In the current implementation, we don't guarantee any alignment for value | |
181 | * payload due to unaligned node layout and the according merge and split | |
182 | * operations. | |
183 | */ | |
184 | class Value { | |
185 | public: | |
186 | virtual ~Value(); | |
187 | Value(const Value&) = default; | |
188 | Value(Value&&) = default; | |
189 | Value& operator=(const Value&) = delete; | |
190 | Value& operator=(Value&&) = delete; | |
191 | ||
192 | /// Returns whether the Value is still tracked in tree. | |
193 | bool is_tracked() const; | |
194 | ||
195 | /// Invalidate the Value before submitting transaction. | |
196 | void invalidate(); | |
197 | ||
198 | /// Returns the value payload size. | |
199 | value_size_t get_payload_size() const { | |
200 | assert(is_tracked()); | |
201 | return read_value_header()->payload_size; | |
202 | } | |
203 | ||
204 | laddr_t get_hint() const; | |
205 | ||
206 | bool operator==(const Value& v) const { return p_cursor == v.p_cursor; } | |
207 | bool operator!=(const Value& v) const { return !(*this == v); } | |
208 | ||
209 | protected: | |
210 | Value(NodeExtentManager&, const ValueBuilder&, Ref<tree_cursor_t>&); | |
211 | ||
212 | /// Extends the payload size. | |
213 | eagain_ifuture<> extend(Transaction&, value_size_t extend_size); | |
214 | ||
215 | /// Trim and shrink the payload. | |
216 | eagain_ifuture<> trim(Transaction&, value_size_t trim_size); | |
217 | ||
218 | /// Get the permission to mutate the payload with the optional value recorder. | |
219 | template <typename PayloadT, typename ValueDeltaRecorderT> | |
220 | std::pair<NodeExtentMutable&, ValueDeltaRecorderT*> | |
221 | prepare_mutate_payload(Transaction& t) { | |
222 | assert(is_tracked()); | |
223 | assert(sizeof(PayloadT) <= get_payload_size()); | |
224 | ||
225 | auto value_mutable = do_prepare_mutate_payload(t); | |
226 | assert(value_mutable.first.get_write() == | |
227 | const_cast<const Value*>(this)->template read_payload<char>()); | |
228 | assert(value_mutable.first.get_length() == get_payload_size()); | |
229 | return {value_mutable.first, | |
230 | static_cast<ValueDeltaRecorderT*>(value_mutable.second)}; | |
231 | } | |
232 | ||
233 | /// Get the latest payload pointer for read. | |
234 | template <typename PayloadT> | |
235 | const PayloadT* read_payload() const { | |
236 | assert(is_tracked()); | |
237 | // see Value documentation | |
238 | static_assert(alignof(PayloadT) == 1); | |
239 | assert(sizeof(PayloadT) <= get_payload_size()); | |
240 | return reinterpret_cast<const PayloadT*>(read_value_header()->get_payload()); | |
241 | } | |
242 | ||
243 | private: | |
244 | const value_header_t* read_value_header() const; | |
245 | context_t get_context(Transaction& t) { | |
246 | return {nm, vb, t}; | |
247 | } | |
248 | ||
249 | std::pair<NodeExtentMutable&, ValueDeltaRecorder*> | |
250 | do_prepare_mutate_payload(Transaction&); | |
251 | ||
252 | NodeExtentManager& nm; | |
253 | const ValueBuilder& vb; | |
254 | Ref<tree_cursor_t> p_cursor; | |
255 | ||
256 | template <typename ValueImpl> | |
257 | friend class Btree; | |
258 | }; | |
259 | ||
260 | /** | |
261 | * ValueBuilder | |
262 | * | |
263 | * For tree nodes to build values without the need to depend on the actual | |
264 | * implementation. | |
265 | */ | |
266 | struct ValueBuilder { | |
267 | virtual value_magic_t get_header_magic() const = 0; | |
268 | virtual string_size_t get_max_ns_size() const = 0; | |
269 | virtual string_size_t get_max_oid_size() const = 0; | |
270 | virtual value_size_t get_max_value_payload_size() const = 0; | |
271 | virtual extent_len_t get_internal_node_size() const = 0; | |
272 | virtual extent_len_t get_leaf_node_size() const = 0; | |
273 | virtual std::unique_ptr<ValueDeltaRecorder> | |
274 | build_value_recorder(ceph::bufferlist&) const = 0; | |
275 | }; | |
276 | ||
277 | /** | |
278 | * ValueBuilderImpl | |
279 | * | |
280 | * The concrete ValueBuilder implementation in Btree. | |
281 | */ | |
282 | template <typename ValueImpl> | |
283 | struct ValueBuilderImpl final : public ValueBuilder { | |
284 | ValueBuilderImpl() { | |
285 | validate_tree_config(ValueImpl::TREE_CONF); | |
286 | } | |
287 | ||
288 | value_magic_t get_header_magic() const { | |
289 | return ValueImpl::TREE_CONF.value_magic; | |
290 | } | |
291 | string_size_t get_max_ns_size() const override { | |
292 | return ValueImpl::TREE_CONF.max_ns_size; | |
293 | } | |
294 | string_size_t get_max_oid_size() const override { | |
295 | return ValueImpl::TREE_CONF.max_oid_size; | |
296 | } | |
297 | value_size_t get_max_value_payload_size() const override { | |
298 | return ValueImpl::TREE_CONF.max_value_payload_size; | |
299 | } | |
300 | extent_len_t get_internal_node_size() const override { | |
301 | return ValueImpl::TREE_CONF.internal_node_size; | |
302 | } | |
303 | extent_len_t get_leaf_node_size() const override { | |
304 | return ValueImpl::TREE_CONF.leaf_node_size; | |
305 | } | |
306 | ||
307 | std::unique_ptr<ValueDeltaRecorder> | |
308 | build_value_recorder(ceph::bufferlist& encoded) const override { | |
309 | std::unique_ptr<ValueDeltaRecorder> ret = | |
310 | std::make_unique<typename ValueImpl::Recorder>(encoded); | |
311 | assert(ret->get_header_magic() == get_header_magic()); | |
312 | return ret; | |
313 | } | |
314 | ||
315 | ValueImpl build_value(NodeExtentManager& nm, | |
316 | const ValueBuilder& vb, | |
317 | Ref<tree_cursor_t>& p_cursor) const { | |
318 | assert(vb.get_header_magic() == get_header_magic()); | |
319 | return ValueImpl(nm, vb, p_cursor); | |
320 | } | |
321 | }; | |
322 | ||
323 | void validate_tree_config(const tree_conf_t& conf); | |
324 | ||
325 | /** | |
326 | * Get the value recorder by type (the magic value) when the ValueBuilder is | |
327 | * unavailable. | |
328 | */ | |
329 | std::unique_ptr<ValueDeltaRecorder> | |
330 | build_value_recorder_by_type(ceph::bufferlist& encoded, const value_magic_t& magic); | |
331 | ||
332 | } |