]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <limits> | |
20effc67 TL |
7 | #include <numeric> |
8 | #include <optional> | |
f67539c2 | 9 | #include <iostream> |
20effc67 | 10 | #include <vector> |
1e59de90 TL |
11 | #include <boost/core/ignore_unused.hpp> |
12 | ||
13 | #include <seastar/core/lowres_clock.hh> | |
f67539c2 TL |
14 | |
15 | #include "include/byteorder.h" | |
16 | #include "include/denc.h" | |
17 | #include "include/buffer.h" | |
1e59de90 | 18 | #include "include/intarith.h" |
20effc67 | 19 | #include "include/interval_set.h" |
1e59de90 | 20 | #include "include/uuid.h" |
f67539c2 TL |
21 | |
22 | namespace crimson::os::seastore { | |
23 | ||
1e59de90 TL |
24 | /* using a special xattr key "omap_header" to store omap header */ |
25 | const std::string OMAP_HEADER_XATTR_KEY = "omap_header"; | |
26 | ||
27 | using transaction_id_t = uint64_t; | |
28 | constexpr transaction_id_t TRANS_ID_NULL = 0; | |
29 | ||
30 | /* | |
31 | * Note: NULL value is usually the default and max value. | |
32 | */ | |
33 | ||
20effc67 TL |
34 | using depth_t = uint32_t; |
35 | using depth_le_t = ceph_le32; | |
36 | ||
37 | inline depth_le_t init_depth_le(uint32_t i) { | |
38 | return ceph_le32(i); | |
39 | } | |
f67539c2 TL |
40 | |
41 | using checksum_t = uint32_t; | |
42 | ||
43 | // Immutable metadata for seastore to set at mkfs time | |
44 | struct seastore_meta_t { | |
45 | uuid_d seastore_id; | |
46 | ||
47 | DENC(seastore_meta_t, v, p) { | |
48 | DENC_START(1, 1, p); | |
49 | denc(v.seastore_id, p); | |
50 | DENC_FINISH(p); | |
51 | } | |
52 | }; | |
53 | ||
20effc67 TL |
54 | std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta); |
55 | ||
1e59de90 TL |
56 | bool is_aligned(uint64_t offset, uint64_t alignment); |
57 | ||
20effc67 TL |
58 | // identifies a specific physical device within seastore |
59 | using device_id_t = uint8_t; | |
60 | ||
1e59de90 | 61 | constexpr auto DEVICE_ID_BITS = std::numeric_limits<device_id_t>::digits; |
20effc67 | 62 | |
1e59de90 TL |
63 | constexpr device_id_t DEVICE_ID_MAX = std::numeric_limits<device_id_t>::max(); |
64 | constexpr device_id_t DEVICE_ID_NULL = DEVICE_ID_MAX; | |
20effc67 TL |
65 | constexpr device_id_t DEVICE_ID_RECORD_RELATIVE = DEVICE_ID_MAX - 1; |
66 | constexpr device_id_t DEVICE_ID_BLOCK_RELATIVE = DEVICE_ID_MAX - 2; | |
67 | constexpr device_id_t DEVICE_ID_DELAYED = DEVICE_ID_MAX - 3; | |
1e59de90 TL |
68 | // for tests which generate fake paddrs |
69 | constexpr device_id_t DEVICE_ID_FAKE = DEVICE_ID_MAX - 4; | |
70 | constexpr device_id_t DEVICE_ID_ZERO = DEVICE_ID_MAX - 5; | |
71 | constexpr device_id_t DEVICE_ID_ROOT = DEVICE_ID_MAX - 6; | |
20effc67 | 72 | constexpr device_id_t DEVICE_ID_MAX_VALID = DEVICE_ID_MAX - 7; |
1e59de90 TL |
73 | constexpr device_id_t DEVICE_ID_MAX_VALID_SEGMENT = DEVICE_ID_MAX >> 1; |
74 | constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0; | |
75 | constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN = | |
76 | 1 << (std::numeric_limits<device_id_t>::digits - 1); | |
20effc67 | 77 | |
1e59de90 TL |
78 | struct device_id_printer_t { |
79 | device_id_t id; | |
80 | }; | |
20effc67 | 81 | |
1e59de90 TL |
82 | std::ostream &operator<<(std::ostream &out, const device_id_printer_t &id); |
83 | ||
84 | // 1 bit in paddr_t to identify the absolute physical address type | |
85 | enum class paddr_types_t { | |
86 | SEGMENT = 0, | |
87 | RANDOM_BLOCK = 1, | |
88 | RESERVED = 2 | |
89 | }; | |
20effc67 | 90 | |
1e59de90 TL |
91 | constexpr paddr_types_t device_id_to_paddr_type(device_id_t id) { |
92 | if (id > DEVICE_ID_MAX_VALID) { | |
93 | return paddr_types_t::RESERVED; | |
94 | } else if ((id & 0x80) == 0) { | |
95 | return paddr_types_t::SEGMENT; | |
96 | } else { | |
97 | return paddr_types_t::RANDOM_BLOCK; | |
98 | } | |
99 | } | |
20effc67 | 100 | |
1e59de90 TL |
101 | constexpr bool has_device_off(device_id_t id) { |
102 | return id == DEVICE_ID_RECORD_RELATIVE || | |
103 | id == DEVICE_ID_BLOCK_RELATIVE || | |
104 | id == DEVICE_ID_DELAYED || | |
105 | id == DEVICE_ID_FAKE || | |
106 | id == DEVICE_ID_ROOT; | |
107 | } | |
20effc67 | 108 | |
1e59de90 TL |
109 | // internal segment id type of segment_id_t below, with the top |
110 | // "DEVICE_ID_BITS" bits representing the device id of the segment. | |
111 | using internal_segment_id_t = uint32_t; | |
112 | constexpr auto SEGMENT_ID_BITS = std::numeric_limits<internal_segment_id_t>::digits; | |
20effc67 | 113 | |
1e59de90 TL |
114 | // segment ids without a device id encapsulated |
115 | using device_segment_id_t = uint32_t; | |
116 | constexpr auto DEVICE_SEGMENT_ID_BITS = SEGMENT_ID_BITS - DEVICE_ID_BITS; | |
117 | constexpr device_segment_id_t DEVICE_SEGMENT_ID_MAX = (1 << DEVICE_SEGMENT_ID_BITS) - 1; | |
118 | ||
119 | // Identifies segment location on disk, see SegmentManager, | |
120 | struct segment_id_t { | |
20effc67 | 121 | public: |
1e59de90 TL |
122 | // segment_id_t() == MAX_SEG_ID == NULL_SEG_ID |
123 | segment_id_t() | |
124 | : segment_id_t(DEVICE_ID_MAX_VALID_SEGMENT, DEVICE_SEGMENT_ID_MAX) {} | |
125 | ||
126 | segment_id_t(device_id_t id, device_segment_id_t _segment) | |
127 | : segment_id_t(make_internal(id, _segment)) {} | |
128 | ||
129 | segment_id_t(internal_segment_id_t _segment) | |
130 | : segment(_segment) { | |
131 | assert(device_id_to_paddr_type(device_id()) == paddr_types_t::SEGMENT); | |
132 | } | |
20effc67 TL |
133 | |
134 | [[gnu::always_inline]] | |
1e59de90 TL |
135 | constexpr device_id_t device_id() const { |
136 | return static_cast<device_id_t>(segment >> DEVICE_SEGMENT_ID_BITS); | |
20effc67 TL |
137 | } |
138 | ||
139 | [[gnu::always_inline]] | |
140 | constexpr device_segment_id_t device_segment_id() const { | |
1e59de90 TL |
141 | constexpr internal_segment_id_t _SEGMENT_ID_MASK = (1u << DEVICE_SEGMENT_ID_BITS) - 1; |
142 | return segment & _SEGMENT_ID_MASK; | |
20effc67 TL |
143 | } |
144 | ||
145 | bool operator==(const segment_id_t& other) const { | |
146 | return segment == other.segment; | |
147 | } | |
148 | bool operator!=(const segment_id_t& other) const { | |
149 | return segment != other.segment; | |
150 | } | |
151 | bool operator<(const segment_id_t& other) const { | |
152 | return segment < other.segment; | |
153 | } | |
154 | bool operator<=(const segment_id_t& other) const { | |
155 | return segment <= other.segment; | |
156 | } | |
157 | bool operator>(const segment_id_t& other) const { | |
158 | return segment > other.segment; | |
159 | } | |
160 | bool operator>=(const segment_id_t& other) const { | |
161 | return segment >= other.segment; | |
162 | } | |
163 | ||
164 | DENC(segment_id_t, v, p) { | |
165 | denc(v.segment, p); | |
166 | } | |
20effc67 | 167 | |
1e59de90 TL |
168 | static constexpr segment_id_t create_const( |
169 | device_id_t id, device_segment_id_t segment) { | |
170 | return segment_id_t(id, segment, const_t{}); | |
20effc67 TL |
171 | } |
172 | ||
1e59de90 TL |
173 | private: |
174 | struct const_t {}; | |
175 | constexpr segment_id_t(device_id_t id, device_segment_id_t _segment, const_t) | |
176 | : segment(make_internal(id, _segment)) {} | |
20effc67 TL |
177 | |
178 | constexpr static inline internal_segment_id_t make_internal( | |
1e59de90 TL |
179 | device_id_t d_id, |
180 | device_segment_id_t s_id) { | |
181 | return static_cast<internal_segment_id_t>(s_id) | | |
182 | (static_cast<internal_segment_id_t>(d_id) << DEVICE_SEGMENT_ID_BITS); | |
20effc67 TL |
183 | } |
184 | ||
1e59de90 TL |
185 | internal_segment_id_t segment; |
186 | ||
20effc67 | 187 | friend struct segment_id_le_t; |
20effc67 | 188 | friend struct paddr_t; |
20effc67 | 189 | }; |
f67539c2 | 190 | |
1e59de90 TL |
191 | std::ostream &operator<<(std::ostream &out, const segment_id_t&); |
192 | ||
20effc67 TL |
193 | // ondisk type of segment_id_t |
194 | struct __attribute((packed)) segment_id_le_t { | |
1e59de90 | 195 | ceph_le32 segment = ceph_le32(segment_id_t().segment); |
20effc67 TL |
196 | |
197 | segment_id_le_t(const segment_id_t id) : | |
198 | segment(ceph_le32(id.segment)) {} | |
199 | ||
200 | operator segment_id_t() const { | |
201 | return segment_id_t(segment); | |
202 | } | |
203 | }; | |
204 | ||
1e59de90 TL |
205 | constexpr segment_id_t MIN_SEG_ID = segment_id_t::create_const(0, 0); |
206 | // segment_id_t() == MAX_SEG_ID == NULL_SEG_ID | |
207 | constexpr segment_id_t MAX_SEG_ID = | |
208 | segment_id_t::create_const(DEVICE_ID_MAX_VALID_SEGMENT, DEVICE_SEGMENT_ID_MAX); | |
209 | constexpr segment_id_t NULL_SEG_ID = MAX_SEG_ID; | |
f67539c2 TL |
210 | |
211 | /* Monotonically increasing segment seq, uniquely identifies | |
212 | * the incarnation of a segment */ | |
aee94f69 | 213 | using segment_seq_t = uint64_t; |
20effc67 TL |
214 | static constexpr segment_seq_t MAX_SEG_SEQ = |
215 | std::numeric_limits<segment_seq_t>::max(); | |
1e59de90 TL |
216 | static constexpr segment_seq_t NULL_SEG_SEQ = MAX_SEG_SEQ; |
217 | ||
218 | enum class segment_type_t : uint8_t { | |
219 | JOURNAL = 0, | |
220 | OOL, | |
221 | NULL_SEG, | |
222 | }; | |
223 | ||
224 | std::ostream& operator<<(std::ostream& out, segment_type_t t); | |
225 | ||
226 | struct segment_seq_printer_t { | |
227 | segment_seq_t seq; | |
228 | }; | |
f67539c2 | 229 | |
1e59de90 | 230 | std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq); |
f67539c2 | 231 | |
20effc67 TL |
232 | /** |
233 | * segment_map_t | |
234 | * | |
235 | * Compact templated mapping from a segment_id_t to a value type. | |
236 | */ | |
237 | template <typename T> | |
238 | class segment_map_t { | |
239 | public: | |
240 | segment_map_t() { | |
241 | // initializes top vector with 0 length vectors to indicate that they | |
242 | // are not yet present | |
243 | device_to_segments.resize(DEVICE_ID_MAX_VALID); | |
244 | } | |
1e59de90 TL |
245 | void add_device(device_id_t device, std::size_t segments, const T& init) { |
246 | ceph_assert(device <= DEVICE_ID_MAX_VALID); | |
247 | ceph_assert(device_to_segments[device].size() == 0); | |
248 | ceph_assert(segments > 0); | |
20effc67 TL |
249 | device_to_segments[device].resize(segments, init); |
250 | total_segments += segments; | |
251 | } | |
252 | void clear() { | |
253 | device_to_segments.clear(); | |
254 | device_to_segments.resize(DEVICE_ID_MAX_VALID); | |
255 | total_segments = 0; | |
256 | } | |
257 | ||
258 | T& operator[](segment_id_t id) { | |
259 | assert(id.device_segment_id() < device_to_segments[id.device_id()].size()); | |
260 | return device_to_segments[id.device_id()][id.device_segment_id()]; | |
261 | } | |
262 | const T& operator[](segment_id_t id) const { | |
263 | assert(id.device_segment_id() < device_to_segments[id.device_id()].size()); | |
264 | return device_to_segments[id.device_id()][id.device_segment_id()]; | |
265 | } | |
266 | ||
267 | bool contains(segment_id_t id) { | |
268 | bool b = id.device_id() < device_to_segments.size(); | |
269 | if (!b) { | |
270 | return b; | |
271 | } | |
272 | b = id.device_segment_id() < device_to_segments[id.device_id()].size(); | |
273 | return b; | |
274 | } | |
275 | ||
276 | auto begin() { | |
277 | return iterator<false>::lower_bound(*this, 0, 0); | |
278 | } | |
279 | auto begin() const { | |
280 | return iterator<true>::lower_bound(*this, 0, 0); | |
281 | } | |
282 | ||
283 | auto end() { | |
284 | return iterator<false>::end_iterator(*this); | |
285 | } | |
286 | auto end() const { | |
287 | return iterator<true>::end_iterator(*this); | |
288 | } | |
289 | ||
290 | auto device_begin(device_id_t id) { | |
291 | auto ret = iterator<false>::lower_bound(*this, id, 0); | |
292 | assert(ret->first.device_id() == id); | |
293 | return ret; | |
294 | } | |
295 | auto device_end(device_id_t id) { | |
296 | return iterator<false>::lower_bound(*this, id + 1, 0); | |
297 | } | |
298 | ||
299 | size_t size() const { | |
300 | return total_segments; | |
301 | } | |
302 | ||
303 | private: | |
304 | template <bool is_const = false> | |
305 | class iterator { | |
306 | /// points at set being iterated over | |
307 | std::conditional_t< | |
308 | is_const, | |
309 | const segment_map_t &, | |
310 | segment_map_t &> parent; | |
311 | ||
312 | /// points at current device, or DEVICE_ID_MAX_VALID if is_end() | |
313 | device_id_t device_id; | |
314 | ||
315 | /// segment at which we are pointing, 0 if is_end() | |
316 | device_segment_id_t device_segment_id; | |
317 | ||
318 | /// holds referent for operator* and operator-> when !is_end() | |
319 | std::optional< | |
320 | std::pair< | |
321 | const segment_id_t, | |
322 | std::conditional_t<is_const, const T&, T&> | |
323 | >> current; | |
324 | ||
325 | bool is_end() const { | |
326 | return device_id == DEVICE_ID_MAX_VALID; | |
327 | } | |
328 | ||
329 | void find_valid() { | |
330 | assert(!is_end()); | |
331 | auto &device_vec = parent.device_to_segments[device_id]; | |
332 | if (device_vec.size() == 0 || | |
333 | device_segment_id == device_vec.size()) { | |
334 | while (++device_id < DEVICE_ID_MAX_VALID && | |
335 | parent.device_to_segments[device_id].size() == 0); | |
336 | device_segment_id = 0; | |
337 | } | |
338 | if (is_end()) { | |
339 | current = std::nullopt; | |
340 | } else { | |
341 | current.emplace( | |
342 | segment_id_t{device_id, device_segment_id}, | |
343 | parent.device_to_segments[device_id][device_segment_id] | |
344 | ); | |
345 | } | |
346 | } | |
347 | ||
348 | iterator( | |
349 | decltype(parent) &parent, | |
350 | device_id_t device_id, | |
351 | device_segment_id_t device_segment_id) | |
352 | : parent(parent), device_id(device_id), | |
353 | device_segment_id(device_segment_id) {} | |
354 | ||
355 | public: | |
356 | static iterator lower_bound( | |
357 | decltype(parent) &parent, | |
358 | device_id_t device_id, | |
359 | device_segment_id_t device_segment_id) { | |
360 | if (device_id == DEVICE_ID_MAX_VALID) { | |
361 | return end_iterator(parent); | |
362 | } else { | |
363 | auto ret = iterator{parent, device_id, device_segment_id}; | |
364 | ret.find_valid(); | |
365 | return ret; | |
366 | } | |
367 | } | |
368 | ||
369 | static iterator end_iterator( | |
370 | decltype(parent) &parent) { | |
371 | return iterator{parent, DEVICE_ID_MAX_VALID, 0}; | |
372 | } | |
373 | ||
374 | iterator<is_const>& operator++() { | |
375 | assert(!is_end()); | |
376 | ++device_segment_id; | |
377 | find_valid(); | |
378 | return *this; | |
379 | } | |
380 | ||
381 | bool operator==(iterator<is_const> rit) { | |
382 | return (device_id == rit.device_id && | |
383 | device_segment_id == rit.device_segment_id); | |
384 | } | |
385 | ||
386 | bool operator!=(iterator<is_const> rit) { | |
387 | return !(*this == rit); | |
388 | } | |
389 | ||
390 | template <bool c = is_const, std::enable_if_t<c, int> = 0> | |
391 | const std::pair<const segment_id_t, const T&> *operator->() { | |
392 | assert(!is_end()); | |
393 | return &*current; | |
394 | } | |
395 | template <bool c = is_const, std::enable_if_t<!c, int> = 0> | |
396 | std::pair<const segment_id_t, T&> *operator->() { | |
397 | assert(!is_end()); | |
398 | return &*current; | |
399 | } | |
1e59de90 TL |
400 | |
401 | using reference = std::conditional_t< | |
402 | is_const, const std::pair<const segment_id_t, const T&>&, | |
403 | std::pair<const segment_id_t, T&>&>; | |
404 | reference operator*() { | |
20effc67 TL |
405 | assert(!is_end()); |
406 | return *current; | |
407 | } | |
408 | }; | |
409 | ||
410 | /** | |
411 | * device_to_segments | |
412 | * | |
413 | * device -> segment -> T mapping. device_to_segments[d].size() > 0 iff | |
414 | * device <d> has been added. | |
415 | */ | |
416 | std::vector<std::vector<T>> device_to_segments; | |
417 | ||
418 | /// total number of added segments | |
419 | size_t total_segments = 0; | |
420 | }; | |
421 | ||
f67539c2 TL |
422 | /** |
423 | * paddr_t | |
424 | * | |
425 | * <segment, offset> offset on disk, see SegmentManager | |
426 | * | |
427 | * May be absolute, record_relative, or block_relative. | |
428 | * | |
429 | * Blocks get read independently of the surrounding record, | |
430 | * so paddrs embedded directly within a block need to refer | |
431 | * to other blocks within the same record by a block_relative | |
432 | * addr relative to the block's own offset. By contrast, | |
433 | * deltas to existing blocks need to use record_relative | |
434 | * addrs relative to the first block of the record. | |
435 | * | |
436 | * Fresh extents during a transaction are refered to by | |
437 | * record_relative paddrs. | |
438 | */ | |
1e59de90 TL |
439 | |
440 | using internal_paddr_t = uint64_t; | |
441 | constexpr auto PADDR_BITS = std::numeric_limits<internal_paddr_t>::digits; | |
442 | ||
443 | /** | |
444 | * device_off_t | |
445 | * | |
446 | * Offset within a device, may be negative for relative offsets. | |
447 | */ | |
448 | using device_off_t = int64_t; | |
449 | using u_device_off_t = uint64_t; | |
450 | constexpr auto DEVICE_OFF_BITS = PADDR_BITS - DEVICE_ID_BITS; | |
451 | constexpr auto DEVICE_OFF_MAX = | |
452 | std::numeric_limits<device_off_t>::max() >> DEVICE_ID_BITS; | |
453 | constexpr auto DEVICE_OFF_MIN = -(DEVICE_OFF_MAX + 1); | |
454 | ||
455 | /** | |
456 | * segment_off_t | |
457 | * | |
458 | * Offset within a segment on disk, may be negative for relative offsets. | |
459 | */ | |
460 | using segment_off_t = int32_t; | |
461 | using u_segment_off_t = uint32_t; | |
462 | constexpr auto SEGMENT_OFF_MAX = std::numeric_limits<segment_off_t>::max(); | |
463 | constexpr auto SEGMENT_OFF_MIN = std::numeric_limits<segment_off_t>::min(); | |
464 | constexpr auto SEGMENT_OFF_BITS = std::numeric_limits<u_segment_off_t>::digits; | |
465 | static_assert(PADDR_BITS == SEGMENT_ID_BITS + SEGMENT_OFF_BITS); | |
466 | ||
467 | constexpr auto DEVICE_ID_MASK = | |
468 | ((internal_paddr_t(1) << DEVICE_ID_BITS) - 1) << DEVICE_OFF_BITS; | |
469 | constexpr auto DEVICE_OFF_MASK = | |
470 | std::numeric_limits<u_device_off_t>::max() >> DEVICE_ID_BITS; | |
471 | constexpr auto SEGMENT_ID_MASK = | |
472 | ((internal_paddr_t(1) << SEGMENT_ID_BITS) - 1) << SEGMENT_OFF_BITS; | |
473 | constexpr auto SEGMENT_OFF_MASK = | |
474 | (internal_paddr_t(1) << SEGMENT_OFF_BITS) - 1; | |
475 | ||
476 | constexpr internal_paddr_t encode_device_off(device_off_t off) { | |
477 | return static_cast<internal_paddr_t>(off) & DEVICE_OFF_MASK; | |
478 | } | |
479 | ||
480 | constexpr device_off_t decode_device_off(internal_paddr_t addr) { | |
481 | if (addr & (1ull << (DEVICE_OFF_BITS - 1))) { | |
482 | return static_cast<device_off_t>(addr | DEVICE_ID_MASK); | |
483 | } else { | |
484 | return static_cast<device_off_t>(addr & DEVICE_OFF_MASK); | |
485 | } | |
486 | } | |
487 | ||
20effc67 | 488 | struct seg_paddr_t; |
1e59de90 TL |
489 | struct blk_paddr_t; |
490 | struct res_paddr_t; | |
aee94f69 | 491 | struct pladdr_t; |
f67539c2 | 492 | struct paddr_t { |
20effc67 | 493 | public: |
1e59de90 TL |
494 | // P_ADDR_MAX == P_ADDR_NULL == paddr_t{} |
495 | paddr_t() : paddr_t(DEVICE_ID_MAX, device_off_t(0)) {} | |
496 | ||
497 | static paddr_t make_seg_paddr( | |
498 | segment_id_t seg, | |
499 | segment_off_t offset) { | |
20effc67 TL |
500 | return paddr_t(seg, offset); |
501 | } | |
1e59de90 TL |
502 | |
503 | static paddr_t make_seg_paddr( | |
20effc67 TL |
504 | device_id_t device, |
505 | device_segment_id_t seg, | |
506 | segment_off_t offset) { | |
507 | return paddr_t(segment_id_t(device, seg), offset); | |
508 | } | |
f67539c2 | 509 | |
1e59de90 TL |
510 | static paddr_t make_blk_paddr( |
511 | device_id_t device, | |
512 | device_off_t offset) { | |
513 | assert(device_id_to_paddr_type(device) == paddr_types_t::RANDOM_BLOCK); | |
514 | return paddr_t(device, offset); | |
515 | } | |
516 | ||
517 | static paddr_t make_res_paddr( | |
518 | device_id_t device, | |
519 | device_off_t offset) { | |
520 | assert(device_id_to_paddr_type(device) == paddr_types_t::RESERVED); | |
521 | return paddr_t(device, offset); | |
522 | } | |
523 | ||
524 | void swap(paddr_t &other) { | |
525 | std::swap(internal_paddr, other.internal_paddr); | |
f67539c2 TL |
526 | } |
527 | ||
20effc67 | 528 | device_id_t get_device_id() const { |
1e59de90 | 529 | return static_cast<device_id_t>(internal_paddr >> DEVICE_OFF_BITS); |
20effc67 | 530 | } |
1e59de90 TL |
531 | |
532 | paddr_types_t get_addr_type() const { | |
533 | return device_id_to_paddr_type(get_device_id()); | |
f67539c2 TL |
534 | } |
535 | ||
1e59de90 TL |
536 | paddr_t add_offset(device_off_t o) const; |
537 | ||
20effc67 | 538 | paddr_t add_relative(paddr_t o) const; |
1e59de90 TL |
539 | |
540 | paddr_t add_block_relative(paddr_t o) const { | |
541 | // special version mainly for documentation purposes | |
542 | assert(o.is_block_relative()); | |
543 | return add_relative(o); | |
544 | } | |
545 | ||
546 | paddr_t add_record_relative(paddr_t o) const { | |
547 | // special version mainly for documentation purposes | |
548 | assert(o.is_record_relative()); | |
549 | return add_relative(o); | |
550 | } | |
551 | ||
552 | /** | |
553 | * maybe_relative_to | |
554 | * | |
555 | * Helper for the case where an in-memory paddr_t may be | |
556 | * either block_relative or absolute (not record_relative). | |
557 | * | |
558 | * base must be either absolute or record_relative. | |
559 | */ | |
560 | paddr_t maybe_relative_to(paddr_t base) const { | |
561 | assert(!base.is_block_relative()); | |
562 | if (is_block_relative()) { | |
563 | return base.add_block_relative(*this); | |
564 | } else { | |
565 | return *this; | |
566 | } | |
567 | } | |
568 | ||
569 | /** | |
570 | * block_relative_to | |
571 | * | |
572 | * Only defined for record_relative paddr_ts. Yields a | |
573 | * block_relative address. | |
574 | */ | |
575 | paddr_t block_relative_to(paddr_t rhs) const; | |
576 | ||
577 | // To be compatible with laddr_t operator+ | |
578 | paddr_t operator+(device_off_t o) const { | |
579 | return add_offset(o); | |
580 | } | |
20effc67 TL |
581 | |
582 | seg_paddr_t& as_seg_paddr(); | |
583 | const seg_paddr_t& as_seg_paddr() const; | |
1e59de90 TL |
584 | blk_paddr_t& as_blk_paddr(); |
585 | const blk_paddr_t& as_blk_paddr() const; | |
586 | res_paddr_t& as_res_paddr(); | |
587 | const res_paddr_t& as_res_paddr() const; | |
20effc67 | 588 | |
1e59de90 TL |
589 | bool is_delayed() const { |
590 | return get_device_id() == DEVICE_ID_DELAYED; | |
591 | } | |
f67539c2 | 592 | bool is_block_relative() const { |
20effc67 TL |
593 | return get_device_id() == DEVICE_ID_BLOCK_RELATIVE; |
594 | } | |
595 | bool is_record_relative() const { | |
596 | return get_device_id() == DEVICE_ID_RECORD_RELATIVE; | |
597 | } | |
598 | bool is_relative() const { | |
599 | return is_block_relative() || is_record_relative(); | |
600 | } | |
601 | /// Denotes special null addr | |
602 | bool is_null() const { | |
603 | return get_device_id() == DEVICE_ID_NULL; | |
604 | } | |
605 | /// Denotes special zero addr | |
606 | bool is_zero() const { | |
607 | return get_device_id() == DEVICE_ID_ZERO; | |
608 | } | |
1e59de90 TL |
609 | /// Denotes the root addr |
610 | bool is_root() const { | |
611 | return get_device_id() == DEVICE_ID_ROOT; | |
612 | } | |
20effc67 TL |
613 | |
614 | /** | |
615 | * is_real | |
616 | * | |
1e59de90 TL |
617 | * indicates whether addr reflects a physical location, absolute, relative, |
618 | * or delayed. FAKE segments also count as real so as to reflect the way in | |
619 | * which unit tests use them. | |
20effc67 TL |
620 | */ |
621 | bool is_real() const { | |
1e59de90 TL |
622 | return !is_zero() && !is_null() && !is_root(); |
623 | } | |
624 | ||
625 | bool is_absolute() const { | |
626 | return get_addr_type() != paddr_types_t::RESERVED; | |
627 | } | |
628 | ||
629 | bool is_fake() const { | |
630 | return get_device_id() == DEVICE_ID_FAKE; | |
20effc67 TL |
631 | } |
632 | ||
1e59de90 TL |
633 | auto operator<=>(const paddr_t &) const = default; |
634 | ||
20effc67 TL |
635 | DENC(paddr_t, v, p) { |
636 | DENC_START(1, 1, p); | |
1e59de90 | 637 | denc(v.internal_paddr, p); |
20effc67 TL |
638 | DENC_FINISH(p); |
639 | } | |
1e59de90 TL |
640 | |
641 | constexpr static paddr_t create_const( | |
642 | device_id_t d_id, device_off_t offset) { | |
643 | return paddr_t(d_id, offset, const_construct_t()); | |
644 | } | |
645 | ||
646 | protected: | |
647 | internal_paddr_t internal_paddr; | |
648 | ||
649 | private: | |
650 | // as seg | |
651 | paddr_t(segment_id_t seg, segment_off_t offset) | |
652 | : paddr_t((static_cast<internal_paddr_t>(seg.segment) << SEGMENT_OFF_BITS) | | |
653 | static_cast<u_segment_off_t>(offset)) {} | |
654 | ||
655 | // as blk or res | |
656 | paddr_t(device_id_t d_id, device_off_t offset) | |
657 | : paddr_t((static_cast<internal_paddr_t>(d_id) << DEVICE_OFF_BITS) | | |
658 | encode_device_off(offset)) { | |
659 | assert(offset >= DEVICE_OFF_MIN); | |
660 | assert(offset <= DEVICE_OFF_MAX); | |
661 | assert(get_addr_type() != paddr_types_t::SEGMENT); | |
662 | } | |
663 | ||
664 | paddr_t(internal_paddr_t val); | |
665 | ||
666 | struct const_construct_t {}; | |
667 | constexpr paddr_t(device_id_t d_id, device_off_t offset, const_construct_t) | |
668 | : internal_paddr((static_cast<internal_paddr_t>(d_id) << DEVICE_OFF_BITS) | | |
669 | static_cast<u_device_off_t>(offset)) {} | |
670 | ||
20effc67 | 671 | friend struct paddr_le_t; |
aee94f69 TL |
672 | friend struct pladdr_le_t; |
673 | ||
20effc67 | 674 | }; |
20effc67 | 675 | |
1e59de90 | 676 | std::ostream &operator<<(std::ostream &out, const paddr_t &rhs); |
20effc67 | 677 | |
1e59de90 | 678 | struct seg_paddr_t : public paddr_t { |
20effc67 TL |
679 | seg_paddr_t(const seg_paddr_t&) = delete; |
680 | seg_paddr_t(seg_paddr_t&) = delete; | |
681 | seg_paddr_t& operator=(const seg_paddr_t&) = delete; | |
682 | seg_paddr_t& operator=(seg_paddr_t&) = delete; | |
1e59de90 | 683 | |
20effc67 | 684 | segment_id_t get_segment_id() const { |
1e59de90 TL |
685 | return segment_id_t(static_cast<internal_segment_id_t>( |
686 | internal_paddr >> SEGMENT_OFF_BITS)); | |
20effc67 | 687 | } |
1e59de90 | 688 | |
20effc67 | 689 | segment_off_t get_segment_off() const { |
1e59de90 | 690 | return segment_off_t(internal_paddr & SEGMENT_OFF_MASK); |
20effc67 | 691 | } |
1e59de90 TL |
692 | |
693 | void set_segment_off(segment_off_t off) { | |
694 | assert(off >= 0); | |
695 | internal_paddr = (internal_paddr & SEGMENT_ID_MASK); | |
696 | internal_paddr |= static_cast<u_segment_off_t>(off); | |
20effc67 | 697 | } |
1e59de90 TL |
698 | |
699 | paddr_t add_offset(device_off_t o) const { | |
700 | device_off_t off = get_segment_off() + o; | |
701 | assert(off >= 0); | |
702 | assert(off <= SEGMENT_OFF_MAX); | |
703 | return paddr_t::make_seg_paddr( | |
704 | get_segment_id(), static_cast<segment_off_t>(off)); | |
f67539c2 | 705 | } |
1e59de90 TL |
706 | }; |
707 | ||
708 | struct blk_paddr_t : public paddr_t { | |
709 | blk_paddr_t(const blk_paddr_t&) = delete; | |
710 | blk_paddr_t(blk_paddr_t&) = delete; | |
711 | blk_paddr_t& operator=(const blk_paddr_t&) = delete; | |
712 | blk_paddr_t& operator=(blk_paddr_t&) = delete; | |
f67539c2 | 713 | |
1e59de90 TL |
714 | device_off_t get_device_off() const { |
715 | return decode_device_off(internal_paddr); | |
f67539c2 TL |
716 | } |
717 | ||
1e59de90 TL |
718 | void set_device_off(device_off_t off) { |
719 | assert(off >= 0); | |
720 | assert(off <= DEVICE_OFF_MAX); | |
721 | internal_paddr = (internal_paddr & DEVICE_ID_MASK); | |
722 | internal_paddr |= encode_device_off(off); | |
f67539c2 TL |
723 | } |
724 | ||
1e59de90 TL |
725 | paddr_t add_offset(device_off_t o) const { |
726 | assert(o >= DEVICE_OFF_MIN); | |
727 | assert(o <= DEVICE_OFF_MAX); | |
728 | auto off = get_device_off() + o; | |
729 | return paddr_t::make_blk_paddr(get_device_id(), off); | |
f67539c2 | 730 | } |
1e59de90 | 731 | }; |
f67539c2 | 732 | |
1e59de90 TL |
733 | struct res_paddr_t : public paddr_t { |
734 | res_paddr_t(const res_paddr_t&) = delete; | |
735 | res_paddr_t(res_paddr_t&) = delete; | |
736 | res_paddr_t& operator=(const res_paddr_t&) = delete; | |
737 | res_paddr_t& operator=(res_paddr_t&) = delete; | |
738 | ||
739 | device_off_t get_device_off() const { | |
740 | return decode_device_off(internal_paddr); | |
f67539c2 TL |
741 | } |
742 | ||
1e59de90 TL |
743 | void set_device_off(device_off_t off) { |
744 | assert(has_device_off(get_device_id())); | |
745 | assert(off >= DEVICE_OFF_MIN); | |
746 | assert(off <= DEVICE_OFF_MAX); | |
747 | internal_paddr = (internal_paddr & DEVICE_ID_MASK); | |
748 | internal_paddr |= encode_device_off(off); | |
f67539c2 TL |
749 | } |
750 | ||
1e59de90 TL |
751 | paddr_t add_offset(device_off_t o) const { |
752 | assert(has_device_off(get_device_id())); | |
753 | assert(o >= DEVICE_OFF_MIN); | |
754 | assert(o <= DEVICE_OFF_MAX); | |
755 | auto off = get_device_off() + o; | |
756 | return paddr_t::make_res_paddr(get_device_id(), off); | |
757 | } | |
758 | ||
759 | paddr_t block_relative_to(const res_paddr_t &rhs) const { | |
760 | assert(rhs.is_record_relative() && is_record_relative()); | |
761 | auto off = get_device_off() - rhs.get_device_off(); | |
762 | return paddr_t::make_res_paddr(DEVICE_ID_BLOCK_RELATIVE, off); | |
f67539c2 | 763 | } |
f67539c2 | 764 | }; |
1e59de90 TL |
765 | |
766 | constexpr paddr_t P_ADDR_MIN = paddr_t::create_const(0, 0); | |
767 | // P_ADDR_MAX == P_ADDR_NULL == paddr_t{} | |
768 | constexpr paddr_t P_ADDR_MAX = paddr_t::create_const(DEVICE_ID_MAX, 0); | |
769 | constexpr paddr_t P_ADDR_NULL = P_ADDR_MAX; | |
770 | constexpr paddr_t P_ADDR_ZERO = paddr_t::create_const(DEVICE_ID_ZERO, 0); | |
771 | constexpr paddr_t P_ADDR_ROOT = paddr_t::create_const(DEVICE_ID_ROOT, 0); | |
772 | ||
773 | inline paddr_t make_record_relative_paddr(device_off_t off) { | |
774 | return paddr_t::make_res_paddr(DEVICE_ID_RECORD_RELATIVE, off); | |
f67539c2 | 775 | } |
1e59de90 TL |
776 | inline paddr_t make_block_relative_paddr(device_off_t off) { |
777 | return paddr_t::make_res_paddr(DEVICE_ID_BLOCK_RELATIVE, off); | |
f67539c2 | 778 | } |
1e59de90 TL |
779 | inline paddr_t make_fake_paddr(device_off_t off) { |
780 | return paddr_t::make_res_paddr(DEVICE_ID_FAKE, off); | |
20effc67 | 781 | } |
1e59de90 TL |
782 | inline paddr_t make_delayed_temp_paddr(device_off_t off) { |
783 | return paddr_t::make_res_paddr(DEVICE_ID_DELAYED, off); | |
784 | } | |
785 | ||
786 | inline const seg_paddr_t& paddr_t::as_seg_paddr() const { | |
787 | assert(get_addr_type() == paddr_types_t::SEGMENT); | |
788 | return *static_cast<const seg_paddr_t*>(this); | |
789 | } | |
790 | ||
791 | inline seg_paddr_t& paddr_t::as_seg_paddr() { | |
792 | assert(get_addr_type() == paddr_types_t::SEGMENT); | |
793 | return *static_cast<seg_paddr_t*>(this); | |
794 | } | |
795 | ||
796 | inline const blk_paddr_t& paddr_t::as_blk_paddr() const { | |
797 | assert(get_addr_type() == paddr_types_t::RANDOM_BLOCK); | |
798 | return *static_cast<const blk_paddr_t*>(this); | |
799 | } | |
800 | ||
801 | inline blk_paddr_t& paddr_t::as_blk_paddr() { | |
802 | assert(get_addr_type() == paddr_types_t::RANDOM_BLOCK); | |
803 | return *static_cast<blk_paddr_t*>(this); | |
804 | } | |
805 | ||
806 | inline const res_paddr_t& paddr_t::as_res_paddr() const { | |
807 | assert(get_addr_type() == paddr_types_t::RESERVED); | |
808 | return *static_cast<const res_paddr_t*>(this); | |
809 | } | |
810 | ||
811 | inline res_paddr_t& paddr_t::as_res_paddr() { | |
812 | assert(get_addr_type() == paddr_types_t::RESERVED); | |
813 | return *static_cast<res_paddr_t*>(this); | |
814 | } | |
815 | ||
816 | inline paddr_t::paddr_t(internal_paddr_t val) : internal_paddr(val) { | |
817 | #ifndef NDEBUG | |
818 | auto type = get_addr_type(); | |
819 | if (type == paddr_types_t::SEGMENT) { | |
820 | assert(as_seg_paddr().get_segment_off() >= 0); | |
821 | } else if (type == paddr_types_t::RANDOM_BLOCK) { | |
822 | assert(as_blk_paddr().get_device_off() >= 0); | |
823 | } else { | |
824 | assert(type == paddr_types_t::RESERVED); | |
825 | if (!has_device_off(get_device_id())) { | |
826 | assert(as_res_paddr().get_device_off() == 0); | |
827 | } | |
828 | } | |
829 | #endif | |
830 | } | |
831 | ||
832 | #define PADDR_OPERATION(a_type, base, func) \ | |
833 | if (get_addr_type() == a_type) { \ | |
834 | return static_cast<const base*>(this)->func; \ | |
835 | } | |
836 | ||
837 | inline paddr_t paddr_t::add_offset(device_off_t o) const { | |
838 | PADDR_OPERATION(paddr_types_t::SEGMENT, seg_paddr_t, add_offset(o)) | |
839 | PADDR_OPERATION(paddr_types_t::RANDOM_BLOCK, blk_paddr_t, add_offset(o)) | |
840 | PADDR_OPERATION(paddr_types_t::RESERVED, res_paddr_t, add_offset(o)) | |
841 | ceph_assert(0 == "not supported type"); | |
842 | return P_ADDR_NULL; | |
843 | } | |
844 | ||
845 | inline paddr_t paddr_t::add_relative(paddr_t o) const { | |
846 | assert(o.is_relative()); | |
847 | auto &res_o = o.as_res_paddr(); | |
848 | return add_offset(res_o.get_device_off()); | |
849 | } | |
850 | ||
851 | inline paddr_t paddr_t::block_relative_to(paddr_t rhs) const { | |
852 | return as_res_paddr().block_relative_to(rhs.as_res_paddr()); | |
f67539c2 TL |
853 | } |
854 | ||
20effc67 | 855 | struct __attribute((packed)) paddr_le_t { |
1e59de90 TL |
856 | ceph_le64 internal_paddr = |
857 | ceph_le64(P_ADDR_NULL.internal_paddr); | |
858 | ||
859 | using orig_type = paddr_t; | |
f67539c2 TL |
860 | |
861 | paddr_le_t() = default; | |
1e59de90 | 862 | paddr_le_t(const paddr_t &addr) : internal_paddr(ceph_le64(addr.internal_paddr)) {} |
f67539c2 TL |
863 | |
864 | operator paddr_t() const { | |
1e59de90 | 865 | return paddr_t{internal_paddr}; |
f67539c2 TL |
866 | } |
867 | }; | |
868 | ||
f67539c2 | 869 | using objaddr_t = uint32_t; |
20effc67 | 870 | constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max(); |
1e59de90 | 871 | constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX; |
20effc67 TL |
872 | |
873 | enum class placement_hint_t { | |
1e59de90 TL |
874 | HOT = 0, // The default user hint that expects mutations or retirement |
875 | COLD, // Expect no mutations and no retirement in the near future | |
876 | REWRITE, // Hint for the internal rewrites | |
877 | NUM_HINTS // Constant for number of hints or as NULL | |
20effc67 TL |
878 | }; |
879 | ||
1e59de90 TL |
880 | constexpr auto PLACEMENT_HINT_NULL = placement_hint_t::NUM_HINTS; |
881 | ||
882 | std::ostream& operator<<(std::ostream& out, placement_hint_t h); | |
883 | ||
884 | enum class device_type_t : uint8_t { | |
20effc67 | 885 | NONE = 0, |
1e59de90 TL |
886 | HDD, |
887 | SSD, | |
aee94f69 | 888 | ZBD, // ZNS SSD or SMR HDD |
1e59de90 TL |
889 | EPHEMERAL_COLD, |
890 | EPHEMERAL_MAIN, | |
891 | RANDOM_BLOCK_SSD, | |
892 | RANDOM_BLOCK_EPHEMERAL, | |
20effc67 TL |
893 | NUM_TYPES |
894 | }; | |
895 | ||
896 | std::ostream& operator<<(std::ostream& out, device_type_t t); | |
897 | ||
898 | bool can_delay_allocation(device_type_t type); | |
899 | device_type_t string_to_device_type(std::string type); | |
f67539c2 | 900 | |
1e59de90 | 901 | enum class backend_type_t { |
aee94f69 | 902 | SEGMENTED, // SegmentManager: SSD, ZBD, HDD |
1e59de90 TL |
903 | RANDOM_BLOCK // RBMDevice: RANDOM_BLOCK_SSD |
904 | }; | |
905 | ||
906 | std::ostream& operator<<(std::ostream& out, backend_type_t); | |
907 | using journal_type_t = backend_type_t; | |
908 | ||
909 | constexpr backend_type_t get_default_backend_of_device(device_type_t dtype) { | |
910 | assert(dtype != device_type_t::NONE && | |
911 | dtype != device_type_t::NUM_TYPES); | |
912 | if (dtype >= device_type_t::HDD && | |
913 | dtype <= device_type_t::EPHEMERAL_MAIN) { | |
914 | return backend_type_t::SEGMENTED; | |
915 | } else { | |
916 | return backend_type_t::RANDOM_BLOCK; | |
917 | } | |
918 | } | |
919 | ||
920 | /** | |
921 | * Monotonically increasing identifier for the location of a | |
f67539c2 TL |
922 | * journal_record. |
923 | */ | |
1e59de90 | 924 | // JOURNAL_SEQ_NULL == JOURNAL_SEQ_MAX == journal_seq_t{} |
f67539c2 | 925 | struct journal_seq_t { |
1e59de90 TL |
926 | segment_seq_t segment_seq = NULL_SEG_SEQ; |
927 | paddr_t offset = P_ADDR_NULL; | |
f67539c2 | 928 | |
1e59de90 TL |
929 | void swap(journal_seq_t &other) { |
930 | std::swap(segment_seq, other.segment_seq); | |
931 | std::swap(offset, other.offset); | |
20effc67 TL |
932 | } |
933 | ||
1e59de90 TL |
934 | // produces a pseudo journal_seq_t relative to this by offset |
935 | journal_seq_t add_offset( | |
936 | journal_type_t type, | |
937 | device_off_t off, | |
938 | device_off_t roll_start, | |
939 | device_off_t roll_size) const; | |
940 | ||
941 | device_off_t relative_to( | |
942 | journal_type_t type, | |
943 | const journal_seq_t& r, | |
944 | device_off_t roll_start, | |
945 | device_off_t roll_size) const; | |
946 | ||
f67539c2 TL |
947 | DENC(journal_seq_t, v, p) { |
948 | DENC_START(1, 1, p); | |
949 | denc(v.segment_seq, p); | |
950 | denc(v.offset, p); | |
951 | DENC_FINISH(p); | |
952 | } | |
1e59de90 TL |
953 | |
954 | bool operator==(const journal_seq_t &o) const { return cmp(o) == 0; } | |
955 | bool operator!=(const journal_seq_t &o) const { return cmp(o) != 0; } | |
956 | bool operator<(const journal_seq_t &o) const { return cmp(o) < 0; } | |
957 | bool operator<=(const journal_seq_t &o) const { return cmp(o) <= 0; } | |
958 | bool operator>(const journal_seq_t &o) const { return cmp(o) > 0; } | |
959 | bool operator>=(const journal_seq_t &o) const { return cmp(o) >= 0; } | |
960 | ||
961 | private: | |
962 | int cmp(const journal_seq_t &other) const { | |
963 | if (segment_seq > other.segment_seq) { | |
964 | return 1; | |
965 | } else if (segment_seq < other.segment_seq) { | |
966 | return -1; | |
967 | } | |
968 | using ret_t = std::pair<device_off_t, segment_id_t>; | |
969 | auto to_pair = [](const paddr_t &addr) -> ret_t { | |
970 | if (addr.get_addr_type() == paddr_types_t::SEGMENT) { | |
971 | auto &seg_addr = addr.as_seg_paddr(); | |
972 | return ret_t(seg_addr.get_segment_off(), seg_addr.get_segment_id()); | |
973 | } else if (addr.get_addr_type() == paddr_types_t::RANDOM_BLOCK) { | |
974 | auto &blk_addr = addr.as_blk_paddr(); | |
975 | return ret_t(blk_addr.get_device_off(), MAX_SEG_ID); | |
976 | } else if (addr.get_addr_type() == paddr_types_t::RESERVED) { | |
977 | auto &res_addr = addr.as_res_paddr(); | |
978 | return ret_t(res_addr.get_device_off(), MAX_SEG_ID); | |
979 | } else { | |
980 | assert(0 == "impossible"); | |
981 | return ret_t(0, MAX_SEG_ID); | |
982 | } | |
983 | }; | |
984 | auto left = to_pair(offset); | |
985 | auto right = to_pair(other.offset); | |
986 | if (left > right) { | |
987 | return 1; | |
988 | } else if (left < right) { | |
989 | return -1; | |
990 | } else { | |
991 | return 0; | |
992 | } | |
993 | } | |
f67539c2 | 994 | }; |
1e59de90 TL |
995 | |
996 | std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq); | |
997 | ||
20effc67 TL |
998 | constexpr journal_seq_t JOURNAL_SEQ_MIN{ |
999 | 0, | |
1e59de90 | 1000 | P_ADDR_MIN |
20effc67 TL |
1001 | }; |
1002 | constexpr journal_seq_t JOURNAL_SEQ_MAX{ | |
1003 | MAX_SEG_SEQ, | |
1004 | P_ADDR_MAX | |
1005 | }; | |
1e59de90 TL |
1006 | // JOURNAL_SEQ_NULL == JOURNAL_SEQ_MAX == journal_seq_t{} |
1007 | constexpr journal_seq_t JOURNAL_SEQ_NULL = JOURNAL_SEQ_MAX; | |
f67539c2 TL |
1008 | |
1009 | // logical addr, see LBAManager, TransactionManager | |
1010 | using laddr_t = uint64_t; | |
1011 | constexpr laddr_t L_ADDR_MIN = std::numeric_limits<laddr_t>::min(); | |
1012 | constexpr laddr_t L_ADDR_MAX = std::numeric_limits<laddr_t>::max(); | |
1e59de90 TL |
1013 | constexpr laddr_t L_ADDR_NULL = L_ADDR_MAX; |
1014 | constexpr laddr_t L_ADDR_ROOT = L_ADDR_MAX - 1; | |
1015 | constexpr laddr_t L_ADDR_LBAT = L_ADDR_MAX - 2; | |
f67539c2 | 1016 | |
20effc67 TL |
1017 | struct __attribute((packed)) laddr_le_t { |
1018 | ceph_le64 laddr = ceph_le64(L_ADDR_NULL); | |
f67539c2 | 1019 | |
1e59de90 TL |
1020 | using orig_type = laddr_t; |
1021 | ||
f67539c2 TL |
1022 | laddr_le_t() = default; |
1023 | laddr_le_t(const laddr_le_t &) = default; | |
1024 | explicit laddr_le_t(const laddr_t &addr) | |
20effc67 | 1025 | : laddr(ceph_le64(addr)) {} |
f67539c2 TL |
1026 | |
1027 | operator laddr_t() const { | |
1028 | return laddr_t(laddr); | |
1029 | } | |
1030 | laddr_le_t& operator=(laddr_t addr) { | |
1031 | ceph_le64 val; | |
1032 | val = addr; | |
1033 | laddr = val; | |
1034 | return *this; | |
1035 | } | |
1036 | }; | |
1037 | ||
aee94f69 TL |
1038 | constexpr uint64_t PL_ADDR_NULL = std::numeric_limits<uint64_t>::max(); |
1039 | ||
1040 | struct pladdr_t { | |
1041 | std::variant<laddr_t, paddr_t> pladdr; | |
1042 | ||
1043 | pladdr_t() = default; | |
1044 | pladdr_t(const pladdr_t &) = default; | |
1045 | pladdr_t(laddr_t laddr) | |
1046 | : pladdr(laddr) {} | |
1047 | pladdr_t(paddr_t paddr) | |
1048 | : pladdr(paddr) {} | |
1049 | ||
1050 | bool is_laddr() const { | |
1051 | return pladdr.index() == 0; | |
1052 | } | |
1053 | ||
1054 | bool is_paddr() const { | |
1055 | return pladdr.index() == 1; | |
1056 | } | |
1057 | ||
1058 | pladdr_t& operator=(paddr_t paddr) { | |
1059 | pladdr = paddr; | |
1060 | return *this; | |
1061 | } | |
1062 | ||
1063 | pladdr_t& operator=(laddr_t laddr) { | |
1064 | pladdr = laddr; | |
1065 | return *this; | |
1066 | } | |
1067 | ||
1068 | bool operator==(const pladdr_t &) const = default; | |
1069 | ||
1070 | paddr_t get_paddr() const { | |
1071 | assert(pladdr.index() == 1); | |
1072 | return paddr_t(std::get<1>(pladdr)); | |
1073 | } | |
1074 | ||
1075 | laddr_t get_laddr() const { | |
1076 | assert(pladdr.index() == 0); | |
1077 | return laddr_t(std::get<0>(pladdr)); | |
1078 | } | |
1079 | ||
1080 | }; | |
1081 | ||
1082 | std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr); | |
1083 | ||
1084 | enum class addr_type_t : uint8_t { | |
1085 | PADDR=0, | |
1086 | LADDR=1, | |
1087 | MAX=2 // or NONE | |
1088 | }; | |
1089 | ||
1090 | struct __attribute((packed)) pladdr_le_t { | |
1091 | ceph_le64 pladdr = ceph_le64(PL_ADDR_NULL); | |
1092 | addr_type_t addr_type = addr_type_t::MAX; | |
1093 | ||
1094 | pladdr_le_t() = default; | |
1095 | pladdr_le_t(const pladdr_le_t &) = default; | |
1096 | explicit pladdr_le_t(const pladdr_t &addr) | |
1097 | : pladdr( | |
1098 | ceph_le64( | |
1099 | addr.is_laddr() ? | |
1100 | std::get<0>(addr.pladdr) : | |
1101 | std::get<1>(addr.pladdr).internal_paddr)), | |
1102 | addr_type( | |
1103 | addr.is_laddr() ? | |
1104 | addr_type_t::LADDR : | |
1105 | addr_type_t::PADDR) | |
1106 | {} | |
1107 | ||
1108 | operator pladdr_t() const { | |
1109 | if (addr_type == addr_type_t::LADDR) { | |
1110 | return pladdr_t(laddr_t(pladdr)); | |
1111 | } else { | |
1112 | assert(addr_type == addr_type_t::PADDR); | |
1113 | return pladdr_t(paddr_t(pladdr)); | |
1114 | } | |
1115 | } | |
1116 | }; | |
1117 | ||
1118 | template <typename T> | |
1119 | struct min_max_t {}; | |
1120 | ||
1121 | template <> | |
1122 | struct min_max_t<laddr_t> { | |
1123 | static constexpr laddr_t max = L_ADDR_MAX; | |
1124 | static constexpr laddr_t min = L_ADDR_MIN; | |
1125 | static constexpr laddr_t null = L_ADDR_NULL; | |
1126 | }; | |
1127 | ||
1128 | template <> | |
1129 | struct min_max_t<paddr_t> { | |
1130 | static constexpr paddr_t max = P_ADDR_MAX; | |
1131 | static constexpr paddr_t min = P_ADDR_MIN; | |
1132 | static constexpr paddr_t null = P_ADDR_NULL; | |
1133 | }; | |
1134 | ||
f67539c2 TL |
1135 | // logical offset, see LBAManager, TransactionManager |
1136 | using extent_len_t = uint32_t; | |
1137 | constexpr extent_len_t EXTENT_LEN_MAX = | |
1138 | std::numeric_limits<extent_len_t>::max(); | |
1139 | ||
1140 | using extent_len_le_t = ceph_le32; | |
20effc67 TL |
1141 | inline extent_len_le_t init_extent_len_le(extent_len_t len) { |
1142 | return ceph_le32(len); | |
f67539c2 TL |
1143 | } |
1144 | ||
1145 | struct laddr_list_t : std::list<std::pair<laddr_t, extent_len_t>> { | |
1146 | template <typename... T> | |
1147 | laddr_list_t(T&&... args) | |
1148 | : std::list<std::pair<laddr_t, extent_len_t>>(std::forward<T>(args)...) {} | |
1149 | }; | |
1150 | struct paddr_list_t : std::list<std::pair<paddr_t, extent_len_t>> { | |
1151 | template <typename... T> | |
1152 | paddr_list_t(T&&... args) | |
1153 | : std::list<std::pair<paddr_t, extent_len_t>>(std::forward<T>(args)...) {} | |
1154 | }; | |
1155 | ||
1156 | std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs); | |
1157 | std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs); | |
1158 | ||
1159 | /* identifies type of extent, used for interpretting deltas, managing | |
1160 | * writeback. | |
1161 | * | |
1162 | * Note that any new extent type needs to be added to | |
1163 | * Cache::get_extent_by_type in cache.cc | |
1164 | */ | |
1165 | enum class extent_types_t : uint8_t { | |
1166 | ROOT = 0, | |
1167 | LADDR_INTERNAL = 1, | |
1168 | LADDR_LEAF = 2, | |
1e59de90 TL |
1169 | DINK_LADDR_LEAF = 3, // should only be used for unitttests |
1170 | OMAP_INNER = 4, | |
1171 | OMAP_LEAF = 5, | |
1172 | ONODE_BLOCK_STAGED = 6, | |
1173 | COLL_BLOCK = 7, | |
1174 | OBJECT_DATA_BLOCK = 8, | |
1175 | RETIRED_PLACEHOLDER = 9, | |
1176 | // the following two types are not extent types, | |
1177 | // they are just used to indicates paddr allocation deltas | |
1178 | ALLOC_INFO = 10, | |
1179 | JOURNAL_TAIL = 11, | |
f67539c2 | 1180 | // Test Block Types |
1e59de90 TL |
1181 | TEST_BLOCK = 12, |
1182 | TEST_BLOCK_PHYSICAL = 13, | |
1183 | BACKREF_INTERNAL = 14, | |
1184 | BACKREF_LEAF = 15, | |
20effc67 | 1185 | // None and the number of valid extent_types_t |
1e59de90 | 1186 | NONE = 16, |
f67539c2 | 1187 | }; |
1e59de90 | 1188 | using extent_types_le_t = uint8_t; |
20effc67 | 1189 | constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE); |
f67539c2 | 1190 | |
1e59de90 TL |
1191 | constexpr size_t BACKREF_NODE_SIZE = 4096; |
1192 | ||
1193 | std::ostream &operator<<(std::ostream &out, extent_types_t t); | |
1194 | ||
20effc67 | 1195 | constexpr bool is_logical_type(extent_types_t type) { |
f67539c2 TL |
1196 | switch (type) { |
1197 | case extent_types_t::ROOT: | |
1198 | case extent_types_t::LADDR_INTERNAL: | |
1199 | case extent_types_t::LADDR_LEAF: | |
1e59de90 TL |
1200 | case extent_types_t::BACKREF_INTERNAL: |
1201 | case extent_types_t::BACKREF_LEAF: | |
f67539c2 TL |
1202 | return false; |
1203 | default: | |
1204 | return true; | |
1205 | } | |
1206 | } | |
1207 | ||
1e59de90 TL |
1208 | constexpr bool is_retired_placeholder(extent_types_t type) |
1209 | { | |
1210 | return type == extent_types_t::RETIRED_PLACEHOLDER; | |
1211 | } | |
1212 | ||
20effc67 TL |
1213 | constexpr bool is_lba_node(extent_types_t type) |
1214 | { | |
1215 | return type == extent_types_t::LADDR_INTERNAL || | |
1e59de90 TL |
1216 | type == extent_types_t::LADDR_LEAF || |
1217 | type == extent_types_t::DINK_LADDR_LEAF; | |
1218 | } | |
1219 | ||
1220 | constexpr bool is_backref_node(extent_types_t type) | |
1221 | { | |
1222 | return type == extent_types_t::BACKREF_INTERNAL || | |
1223 | type == extent_types_t::BACKREF_LEAF; | |
1224 | } | |
1225 | ||
1226 | constexpr bool is_lba_backref_node(extent_types_t type) | |
1227 | { | |
1228 | return is_lba_node(type) || is_backref_node(type); | |
20effc67 TL |
1229 | } |
1230 | ||
f67539c2 TL |
1231 | std::ostream &operator<<(std::ostream &out, extent_types_t t); |
1232 | ||
1e59de90 TL |
1233 | /** |
1234 | * rewrite_gen_t | |
1235 | * | |
1236 | * The goal is to group the similar aged extents in the same segment for better | |
1237 | * bimodel utilization distribution, and also to the same device tier. For EPM, | |
1238 | * it has the flexibility to make placement decisions by re-assigning the | |
1239 | * generation. And each non-inline generation will be statically mapped to a | |
1240 | * writer in EPM. | |
1241 | * | |
1242 | * All the fresh and dirty extents start with INIT_GENERATION upon allocation, | |
1243 | * and they will be assigned to INLINE/OOL generation by EPM before the initial | |
1244 | * writes. After that, the generation can only be increased upon rewrite. | |
1245 | * | |
1246 | * Note, although EPM can re-assign the generations according to the tiering | |
1247 | * status, it cannot decrease the generation for the correctness of space | |
1248 | * reservation. It may choose to assign a larger generation if the extent is | |
1249 | * hinted cold, or if want to evict extents to the cold tier. And it may choose | |
1250 | * to not increase the generation if want to keep the hot tier as filled as | |
1251 | * possible. | |
1252 | */ | |
1253 | using rewrite_gen_t = uint8_t; | |
1254 | ||
1255 | // INIT_GENERATION requires EPM decision to INLINE/OOL_GENERATION | |
1256 | constexpr rewrite_gen_t INIT_GENERATION = 0; | |
1257 | constexpr rewrite_gen_t INLINE_GENERATION = 1; // to the journal | |
1258 | constexpr rewrite_gen_t OOL_GENERATION = 2; | |
1259 | ||
1260 | // All the rewritten extents start with MIN_REWRITE_GENERATION | |
1261 | constexpr rewrite_gen_t MIN_REWRITE_GENERATION = 3; | |
1262 | // without cold tier, the largest generation is less than MIN_COLD_GENERATION | |
1263 | constexpr rewrite_gen_t MIN_COLD_GENERATION = 5; | |
1264 | constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 7; | |
1265 | constexpr rewrite_gen_t REWRITE_GENERATIONS = MAX_REWRITE_GENERATION + 1; | |
1266 | constexpr rewrite_gen_t NULL_GENERATION = | |
1267 | std::numeric_limits<rewrite_gen_t>::max(); | |
1268 | ||
1269 | struct rewrite_gen_printer_t { | |
1270 | rewrite_gen_t gen; | |
1271 | }; | |
1272 | ||
1273 | std::ostream &operator<<(std::ostream &out, rewrite_gen_printer_t gen); | |
1274 | ||
1275 | constexpr std::size_t generation_to_writer(rewrite_gen_t gen) { | |
1276 | // caller to assert the gen is in the reasonable range | |
1277 | return gen - OOL_GENERATION; | |
1278 | } | |
1279 | ||
1280 | // before EPM decision | |
1281 | constexpr bool is_target_rewrite_generation(rewrite_gen_t gen) { | |
1282 | return gen == INIT_GENERATION || | |
1283 | (gen >= MIN_REWRITE_GENERATION && | |
1284 | gen <= REWRITE_GENERATIONS); | |
1285 | } | |
1286 | ||
1287 | // after EPM decision | |
1288 | constexpr bool is_rewrite_generation(rewrite_gen_t gen) { | |
1289 | return gen >= INLINE_GENERATION && | |
1290 | gen < REWRITE_GENERATIONS; | |
1291 | } | |
1292 | ||
1293 | enum class data_category_t : uint8_t { | |
1294 | METADATA = 0, | |
1295 | DATA, | |
1296 | NUM | |
1297 | }; | |
1298 | ||
1299 | std::ostream &operator<<(std::ostream &out, data_category_t c); | |
1300 | ||
1301 | constexpr data_category_t get_extent_category(extent_types_t type) { | |
1302 | if (type == extent_types_t::OBJECT_DATA_BLOCK || | |
1303 | type == extent_types_t::TEST_BLOCK) { | |
1304 | return data_category_t::DATA; | |
1305 | } else { | |
1306 | return data_category_t::METADATA; | |
1307 | } | |
1308 | } | |
1309 | ||
1310 | // type for extent modification time, milliseconds since the epoch | |
1311 | using sea_time_point = seastar::lowres_system_clock::time_point; | |
1312 | using sea_duration = seastar::lowres_system_clock::duration; | |
1313 | using mod_time_point_t = int64_t; | |
1314 | ||
1315 | constexpr mod_time_point_t | |
1316 | timepoint_to_mod(const sea_time_point &t) { | |
1317 | return std::chrono::duration_cast<std::chrono::milliseconds>( | |
1318 | t.time_since_epoch()).count(); | |
1319 | } | |
1320 | ||
1321 | constexpr sea_time_point | |
1322 | mod_to_timepoint(mod_time_point_t t) { | |
1323 | return sea_time_point(std::chrono::duration_cast<sea_duration>( | |
1324 | std::chrono::milliseconds(t))); | |
1325 | } | |
1326 | ||
1327 | constexpr auto NULL_TIME = sea_time_point(); | |
1328 | constexpr auto NULL_MOD_TIME = timepoint_to_mod(NULL_TIME); | |
1329 | ||
1330 | struct sea_time_point_printer_t { | |
1331 | sea_time_point tp; | |
1332 | }; | |
1333 | std::ostream &operator<<(std::ostream &out, sea_time_point_printer_t tp); | |
1334 | ||
1335 | struct mod_time_point_printer_t { | |
1336 | mod_time_point_t tp; | |
1337 | }; | |
1338 | std::ostream &operator<<(std::ostream &out, mod_time_point_printer_t tp); | |
1339 | ||
1340 | constexpr sea_time_point | |
1341 | get_average_time(const sea_time_point& t1, std::size_t n1, | |
1342 | const sea_time_point& t2, std::size_t n2) { | |
1343 | assert(t1 != NULL_TIME); | |
1344 | assert(t2 != NULL_TIME); | |
1345 | auto new_size = n1 + n2; | |
1346 | assert(new_size > 0); | |
1347 | auto c1 = t1.time_since_epoch().count(); | |
1348 | auto c2 = t2.time_since_epoch().count(); | |
1349 | auto c_ret = c1 / new_size * n1 + c2 / new_size * n2; | |
1350 | return sea_time_point(sea_duration(c_ret)); | |
1351 | } | |
1352 | ||
f67539c2 TL |
1353 | /* description of a new physical extent */ |
1354 | struct extent_t { | |
1355 | extent_types_t type; ///< type of extent | |
1356 | laddr_t addr; ///< laddr of extent (L_ADDR_NULL for non-logical) | |
1357 | ceph::bufferlist bl; ///< payload, bl.length() == length, aligned | |
1358 | }; | |
1359 | ||
1360 | using extent_version_t = uint32_t; | |
f67539c2 TL |
1361 | |
1362 | /* description of a mutation to a physical extent */ | |
1363 | struct delta_info_t { | |
1364 | extent_types_t type = extent_types_t::NONE; ///< delta type | |
1365 | paddr_t paddr; ///< physical address | |
1366 | laddr_t laddr = L_ADDR_NULL; ///< logical address | |
1367 | uint32_t prev_crc = 0; | |
1368 | uint32_t final_crc = 0; | |
1e59de90 | 1369 | extent_len_t length = 0; ///< extent length |
f67539c2 | 1370 | extent_version_t pversion; ///< prior version |
1e59de90 TL |
1371 | segment_seq_t ext_seq; ///< seq of the extent's segment |
1372 | segment_type_t seg_type; | |
f67539c2 TL |
1373 | ceph::bufferlist bl; ///< payload |
1374 | ||
1375 | DENC(delta_info_t, v, p) { | |
1376 | DENC_START(1, 1, p); | |
1377 | denc(v.type, p); | |
1378 | denc(v.paddr, p); | |
1379 | denc(v.laddr, p); | |
1380 | denc(v.prev_crc, p); | |
1381 | denc(v.final_crc, p); | |
1382 | denc(v.length, p); | |
1383 | denc(v.pversion, p); | |
1e59de90 TL |
1384 | denc(v.ext_seq, p); |
1385 | denc(v.seg_type, p); | |
f67539c2 TL |
1386 | denc(v.bl, p); |
1387 | DENC_FINISH(p); | |
1388 | } | |
1389 | ||
1390 | bool operator==(const delta_info_t &rhs) const { | |
1391 | return ( | |
1392 | type == rhs.type && | |
1393 | paddr == rhs.paddr && | |
1394 | laddr == rhs.laddr && | |
1395 | prev_crc == rhs.prev_crc && | |
1396 | final_crc == rhs.final_crc && | |
1397 | length == rhs.length && | |
1398 | pversion == rhs.pversion && | |
1e59de90 | 1399 | ext_seq == rhs.ext_seq && |
f67539c2 TL |
1400 | bl == rhs.bl |
1401 | ); | |
1402 | } | |
1e59de90 TL |
1403 | }; |
1404 | ||
1405 | std::ostream &operator<<(std::ostream &out, const delta_info_t &delta); | |
1406 | ||
1407 | /* contains the latest journal tail information */ | |
1408 | struct journal_tail_delta_t { | |
1409 | journal_seq_t alloc_tail; | |
1410 | journal_seq_t dirty_tail; | |
f67539c2 | 1411 | |
1e59de90 TL |
1412 | DENC(journal_tail_delta_t, v, p) { |
1413 | DENC_START(1, 1, p); | |
1414 | denc(v.alloc_tail, p); | |
1415 | denc(v.dirty_tail, p); | |
1416 | DENC_FINISH(p); | |
1417 | } | |
f67539c2 TL |
1418 | }; |
1419 | ||
1e59de90 | 1420 | std::ostream &operator<<(std::ostream &out, const journal_tail_delta_t &delta); |
f67539c2 | 1421 | |
20effc67 TL |
1422 | class object_data_t { |
1423 | laddr_t reserved_data_base = L_ADDR_NULL; | |
1424 | extent_len_t reserved_data_len = 0; | |
1425 | ||
1426 | bool dirty = false; | |
1427 | public: | |
1428 | object_data_t( | |
1429 | laddr_t reserved_data_base, | |
1430 | extent_len_t reserved_data_len) | |
1431 | : reserved_data_base(reserved_data_base), | |
1432 | reserved_data_len(reserved_data_len) {} | |
1433 | ||
1434 | laddr_t get_reserved_data_base() const { | |
1435 | return reserved_data_base; | |
1436 | } | |
1437 | ||
1438 | extent_len_t get_reserved_data_len() const { | |
1439 | return reserved_data_len; | |
1440 | } | |
1441 | ||
1442 | bool is_null() const { | |
1443 | return reserved_data_base == L_ADDR_NULL; | |
1444 | } | |
1445 | ||
1446 | bool must_update() const { | |
1447 | return dirty; | |
1448 | } | |
1449 | ||
1450 | void update_reserved( | |
1451 | laddr_t base, | |
1452 | extent_len_t len) { | |
1453 | dirty = true; | |
1454 | reserved_data_base = base; | |
1455 | reserved_data_len = len; | |
1456 | } | |
1457 | ||
1458 | void update_len( | |
1459 | extent_len_t len) { | |
1460 | dirty = true; | |
1461 | reserved_data_len = len; | |
1462 | } | |
1463 | ||
1464 | void clear() { | |
1465 | dirty = true; | |
1466 | reserved_data_base = L_ADDR_NULL; | |
1467 | reserved_data_len = 0; | |
1468 | } | |
1469 | }; | |
1470 | ||
1471 | struct __attribute__((packed)) object_data_le_t { | |
1472 | laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL); | |
1473 | extent_len_le_t reserved_data_len = init_extent_len_le(0); | |
1474 | ||
1475 | void update(const object_data_t &nroot) { | |
1476 | reserved_data_base = nroot.get_reserved_data_base(); | |
1477 | reserved_data_len = init_extent_len_le(nroot.get_reserved_data_len()); | |
1478 | } | |
1479 | ||
1480 | object_data_t get() const { | |
1481 | return object_data_t( | |
1482 | reserved_data_base, | |
1483 | reserved_data_len); | |
1484 | } | |
1485 | }; | |
1486 | ||
1487 | struct omap_root_t { | |
1488 | laddr_t addr = L_ADDR_NULL; | |
1489 | depth_t depth = 0; | |
1490 | laddr_t hint = L_ADDR_MIN; | |
1491 | bool mutated = false; | |
1492 | ||
1493 | omap_root_t() = default; | |
1494 | omap_root_t(laddr_t addr, depth_t depth, laddr_t addr_min) | |
1495 | : addr(addr), | |
1496 | depth(depth), | |
1497 | hint(addr_min) {} | |
1498 | ||
1499 | omap_root_t(const omap_root_t &o) = default; | |
1500 | omap_root_t(omap_root_t &&o) = default; | |
1501 | omap_root_t &operator=(const omap_root_t &o) = default; | |
1502 | omap_root_t &operator=(omap_root_t &&o) = default; | |
1503 | ||
1504 | bool is_null() const { | |
1505 | return addr == L_ADDR_NULL; | |
1506 | } | |
1507 | ||
1508 | bool must_update() const { | |
1509 | return mutated; | |
1510 | } | |
1511 | ||
1512 | void update(laddr_t _addr, depth_t _depth, laddr_t _hint) { | |
1513 | mutated = true; | |
1514 | addr = _addr; | |
1515 | depth = _depth; | |
1516 | hint = _hint; | |
1517 | } | |
1518 | ||
1519 | laddr_t get_location() const { | |
1520 | return addr; | |
1521 | } | |
1522 | ||
1523 | depth_t get_depth() const { | |
1524 | return depth; | |
1525 | } | |
1526 | ||
1527 | laddr_t get_hint() const { | |
1528 | return hint; | |
1529 | } | |
1530 | }; | |
1e59de90 | 1531 | std::ostream &operator<<(std::ostream &out, const omap_root_t &root); |
20effc67 TL |
1532 | |
1533 | class __attribute__((packed)) omap_root_le_t { | |
1534 | laddr_le_t addr = laddr_le_t(L_ADDR_NULL); | |
1535 | depth_le_t depth = init_depth_le(0); | |
1536 | ||
1537 | public: | |
1538 | omap_root_le_t() = default; | |
1539 | ||
1540 | omap_root_le_t(laddr_t addr, depth_t depth) | |
1541 | : addr(addr), depth(init_depth_le(depth)) {} | |
1542 | ||
1543 | omap_root_le_t(const omap_root_le_t &o) = default; | |
1544 | omap_root_le_t(omap_root_le_t &&o) = default; | |
1545 | omap_root_le_t &operator=(const omap_root_le_t &o) = default; | |
1546 | omap_root_le_t &operator=(omap_root_le_t &&o) = default; | |
1547 | ||
1548 | void update(const omap_root_t &nroot) { | |
1549 | addr = nroot.get_location(); | |
1550 | depth = init_depth_le(nroot.get_depth()); | |
1551 | } | |
1552 | ||
1553 | omap_root_t get(laddr_t hint) const { | |
1554 | return omap_root_t(addr, depth, hint); | |
1555 | } | |
1556 | }; | |
1557 | ||
1558 | /** | |
1e59de90 | 1559 | * phy_tree_root_t |
20effc67 | 1560 | */ |
1e59de90 | 1561 | class __attribute__((packed)) phy_tree_root_t { |
20effc67 TL |
1562 | paddr_le_t root_addr; |
1563 | depth_le_t depth = init_extent_len_le(0); | |
1564 | ||
1565 | public: | |
1e59de90 | 1566 | phy_tree_root_t() = default; |
20effc67 | 1567 | |
1e59de90 | 1568 | phy_tree_root_t(paddr_t addr, depth_t depth) |
20effc67 TL |
1569 | : root_addr(addr), depth(init_depth_le(depth)) {} |
1570 | ||
1e59de90 TL |
1571 | phy_tree_root_t(const phy_tree_root_t &o) = default; |
1572 | phy_tree_root_t(phy_tree_root_t &&o) = default; | |
1573 | phy_tree_root_t &operator=(const phy_tree_root_t &o) = default; | |
1574 | phy_tree_root_t &operator=(phy_tree_root_t &&o) = default; | |
20effc67 TL |
1575 | |
1576 | paddr_t get_location() const { | |
1577 | return root_addr; | |
1578 | } | |
1579 | ||
1580 | void set_location(paddr_t location) { | |
1581 | root_addr = location; | |
1582 | } | |
1583 | ||
1584 | depth_t get_depth() const { | |
1585 | return depth; | |
1586 | } | |
1587 | ||
1588 | void set_depth(depth_t ndepth) { | |
1589 | depth = ndepth; | |
1590 | } | |
1591 | ||
1592 | void adjust_addrs_from_base(paddr_t base) { | |
1593 | paddr_t _root_addr = root_addr; | |
1594 | if (_root_addr.is_relative()) { | |
1595 | root_addr = base.add_record_relative(_root_addr); | |
1596 | } | |
1597 | } | |
1598 | }; | |
1599 | ||
1600 | class coll_root_t { | |
1601 | laddr_t addr = L_ADDR_NULL; | |
1602 | extent_len_t size = 0; | |
1603 | ||
1604 | bool mutated = false; | |
1605 | ||
1606 | public: | |
1607 | coll_root_t() = default; | |
1608 | coll_root_t(laddr_t addr, extent_len_t size) : addr(addr), size(size) {} | |
1609 | ||
1610 | coll_root_t(const coll_root_t &o) = default; | |
1611 | coll_root_t(coll_root_t &&o) = default; | |
1612 | coll_root_t &operator=(const coll_root_t &o) = default; | |
1613 | coll_root_t &operator=(coll_root_t &&o) = default; | |
1614 | ||
1615 | bool must_update() const { | |
1616 | return mutated; | |
1617 | } | |
1618 | ||
1619 | void update(laddr_t _addr, extent_len_t _s) { | |
1620 | mutated = true; | |
1621 | addr = _addr; | |
1622 | size = _s; | |
1623 | } | |
1624 | ||
1625 | laddr_t get_location() const { | |
1626 | return addr; | |
1627 | } | |
1628 | ||
1629 | extent_len_t get_size() const { | |
1630 | return size; | |
1631 | } | |
1632 | }; | |
1633 | ||
1634 | /** | |
1635 | * coll_root_le_t | |
1636 | * | |
1637 | * Information for locating CollectionManager information, to be embedded | |
1638 | * in root block. | |
1639 | */ | |
1640 | class __attribute__((packed)) coll_root_le_t { | |
1641 | laddr_le_t addr; | |
1642 | extent_len_le_t size = init_extent_len_le(0); | |
1643 | ||
1644 | public: | |
1645 | coll_root_le_t() = default; | |
1646 | ||
1e59de90 | 1647 | coll_root_le_t(laddr_t laddr, extent_len_t size) |
20effc67 TL |
1648 | : addr(laddr), size(init_extent_len_le(size)) {} |
1649 | ||
1650 | ||
1651 | coll_root_le_t(const coll_root_le_t &o) = default; | |
1652 | coll_root_le_t(coll_root_le_t &&o) = default; | |
1653 | coll_root_le_t &operator=(const coll_root_le_t &o) = default; | |
1654 | coll_root_le_t &operator=(coll_root_le_t &&o) = default; | |
1655 | ||
1656 | void update(const coll_root_t &nroot) { | |
1657 | addr = nroot.get_location(); | |
1658 | size = init_extent_len_le(nroot.get_size()); | |
1659 | } | |
1660 | ||
1661 | coll_root_t get() const { | |
1662 | return coll_root_t(addr, size); | |
1663 | } | |
1664 | }; | |
1665 | ||
1e59de90 TL |
1666 | using lba_root_t = phy_tree_root_t; |
1667 | using backref_root_t = phy_tree_root_t; | |
20effc67 TL |
1668 | |
1669 | /** | |
1670 | * root_t | |
1671 | * | |
1672 | * Contains information required to find metadata roots. | |
1673 | * TODO: generalize this to permit more than one lba_manager implementation | |
1674 | */ | |
1675 | struct __attribute__((packed)) root_t { | |
1676 | using meta_t = std::map<std::string, std::string>; | |
1677 | ||
1678 | static constexpr int MAX_META_LENGTH = 1024; | |
1679 | ||
1e59de90 | 1680 | backref_root_t backref_root; |
20effc67 TL |
1681 | lba_root_t lba_root; |
1682 | laddr_le_t onode_root; | |
1683 | coll_root_le_t collection_root; | |
1684 | ||
1685 | char meta[MAX_META_LENGTH]; | |
1686 | ||
1687 | root_t() { | |
1688 | set_meta(meta_t{}); | |
1689 | } | |
1690 | ||
1691 | void adjust_addrs_from_base(paddr_t base) { | |
1692 | lba_root.adjust_addrs_from_base(base); | |
1e59de90 | 1693 | backref_root.adjust_addrs_from_base(base); |
20effc67 TL |
1694 | } |
1695 | ||
1696 | meta_t get_meta() { | |
1697 | bufferlist bl; | |
1698 | bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta)); | |
1699 | meta_t ret; | |
1700 | auto iter = bl.cbegin(); | |
1701 | decode(ret, iter); | |
1702 | return ret; | |
1703 | } | |
1704 | ||
1705 | void set_meta(const meta_t &m) { | |
1706 | ceph::bufferlist bl; | |
1707 | encode(m, bl); | |
1708 | ceph_assert(bl.length() < MAX_META_LENGTH); | |
1709 | bl.rebuild(); | |
1710 | auto &bptr = bl.front(); | |
1711 | ::memset(meta, 0, MAX_META_LENGTH); | |
1712 | ::memcpy(meta, bptr.c_str(), bl.length()); | |
1713 | } | |
1714 | }; | |
1715 | ||
1e59de90 TL |
1716 | struct alloc_blk_t { |
1717 | alloc_blk_t( | |
1718 | paddr_t paddr, | |
1719 | laddr_t laddr, | |
1720 | extent_len_t len, | |
1721 | extent_types_t type) | |
1722 | : paddr(paddr), laddr(laddr), len(len), type(type) | |
1723 | {} | |
1724 | ||
1725 | explicit alloc_blk_t() = default; | |
1726 | ||
1727 | paddr_t paddr = P_ADDR_NULL; | |
1728 | laddr_t laddr = L_ADDR_NULL; | |
1729 | extent_len_t len = 0; | |
1730 | extent_types_t type = extent_types_t::ROOT; | |
1731 | DENC(alloc_blk_t, v, p) { | |
1732 | DENC_START(1, 1, p); | |
1733 | denc(v.paddr, p); | |
1734 | denc(v.laddr, p); | |
1735 | denc(v.len, p); | |
1736 | denc(v.type, p); | |
1737 | DENC_FINISH(p); | |
1738 | } | |
1739 | }; | |
20effc67 TL |
1740 | |
1741 | // use absolute address | |
1e59de90 | 1742 | struct alloc_delta_t { |
20effc67 TL |
1743 | enum class op_types_t : uint8_t { |
1744 | NONE = 0, | |
1745 | SET = 1, | |
1746 | CLEAR = 2 | |
1747 | }; | |
1e59de90 | 1748 | std::vector<alloc_blk_t> alloc_blk_ranges; |
20effc67 TL |
1749 | op_types_t op = op_types_t::NONE; |
1750 | ||
1e59de90 | 1751 | alloc_delta_t() = default; |
20effc67 | 1752 | |
1e59de90 | 1753 | DENC(alloc_delta_t, v, p) { |
20effc67 TL |
1754 | DENC_START(1, 1, p); |
1755 | denc(v.alloc_blk_ranges, p); | |
1756 | denc(v.op, p); | |
1757 | DENC_FINISH(p); | |
1758 | } | |
1759 | }; | |
1760 | ||
20effc67 TL |
1761 | struct extent_info_t { |
1762 | extent_types_t type = extent_types_t::NONE; | |
1763 | laddr_t addr = L_ADDR_NULL; | |
1764 | extent_len_t len = 0; | |
1765 | ||
1766 | extent_info_t() = default; | |
1767 | extent_info_t(const extent_t &et) | |
1e59de90 TL |
1768 | : type(et.type), addr(et.addr), |
1769 | len(et.bl.length()) | |
1770 | {} | |
20effc67 TL |
1771 | |
1772 | DENC(extent_info_t, v, p) { | |
1773 | DENC_START(1, 1, p); | |
1774 | denc(v.type, p); | |
1775 | denc(v.addr, p); | |
1776 | denc(v.len, p); | |
1777 | DENC_FINISH(p); | |
1778 | } | |
1779 | }; | |
1780 | std::ostream &operator<<(std::ostream &out, const extent_info_t &header); | |
1781 | ||
1782 | using segment_nonce_t = uint32_t; | |
1783 | ||
1784 | /** | |
1785 | * Segment header | |
1786 | * | |
1787 | * Every segment contains and encode segment_header_t in the first block. | |
1788 | * Our strategy for finding the journal replay point is: | |
1789 | * 1) Find the segment with the highest journal_segment_seq | |
1e59de90 TL |
1790 | * 2) Get dirty_tail and alloc_tail from the segment header |
1791 | * 3) Scan forward to update tails from journal_tail_delta_t | |
1792 | * 4) Replay from the latest tails | |
20effc67 TL |
1793 | */ |
1794 | struct segment_header_t { | |
1e59de90 | 1795 | segment_seq_t segment_seq; |
20effc67 TL |
1796 | segment_id_t physical_segment_id; // debugging |
1797 | ||
1e59de90 TL |
1798 | journal_seq_t dirty_tail; |
1799 | journal_seq_t alloc_tail; | |
20effc67 | 1800 | segment_nonce_t segment_nonce; |
1e59de90 TL |
1801 | |
1802 | segment_type_t type; | |
1803 | ||
1804 | data_category_t category; | |
1805 | rewrite_gen_t generation; | |
1806 | ||
1807 | segment_type_t get_type() const { | |
1808 | return type; | |
1809 | } | |
20effc67 TL |
1810 | |
1811 | DENC(segment_header_t, v, p) { | |
1812 | DENC_START(1, 1, p); | |
1e59de90 | 1813 | denc(v.segment_seq, p); |
20effc67 | 1814 | denc(v.physical_segment_id, p); |
1e59de90 TL |
1815 | denc(v.dirty_tail, p); |
1816 | denc(v.alloc_tail, p); | |
20effc67 | 1817 | denc(v.segment_nonce, p); |
1e59de90 TL |
1818 | denc(v.type, p); |
1819 | denc(v.category, p); | |
1820 | denc(v.generation, p); | |
20effc67 TL |
1821 | DENC_FINISH(p); |
1822 | } | |
1823 | }; | |
1824 | std::ostream &operator<<(std::ostream &out, const segment_header_t &header); | |
1825 | ||
1e59de90 TL |
1826 | struct segment_tail_t { |
1827 | segment_seq_t segment_seq; | |
1828 | segment_id_t physical_segment_id; // debugging | |
1829 | ||
1830 | segment_nonce_t segment_nonce; | |
1831 | ||
1832 | segment_type_t type; | |
1833 | ||
1834 | mod_time_point_t modify_time; | |
1835 | std::size_t num_extents; | |
1836 | ||
1837 | segment_type_t get_type() const { | |
1838 | return type; | |
1839 | } | |
1840 | ||
1841 | DENC(segment_tail_t, v, p) { | |
1842 | DENC_START(1, 1, p); | |
1843 | denc(v.segment_seq, p); | |
1844 | denc(v.physical_segment_id, p); | |
1845 | denc(v.segment_nonce, p); | |
1846 | denc(v.type, p); | |
1847 | denc(v.modify_time, p); | |
1848 | denc(v.num_extents, p); | |
1849 | DENC_FINISH(p); | |
1850 | } | |
1851 | }; | |
1852 | std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail); | |
1853 | ||
1854 | enum class transaction_type_t : uint8_t { | |
1855 | MUTATE = 0, | |
1856 | READ, // including weak and non-weak read transactions | |
1857 | TRIM_DIRTY, | |
1858 | TRIM_ALLOC, | |
1859 | CLEANER_MAIN, | |
1860 | CLEANER_COLD, | |
1861 | MAX | |
1862 | }; | |
1863 | ||
1864 | static constexpr auto TRANSACTION_TYPE_NULL = transaction_type_t::MAX; | |
1865 | ||
1866 | static constexpr auto TRANSACTION_TYPE_MAX = static_cast<std::size_t>( | |
1867 | transaction_type_t::MAX); | |
1868 | ||
1869 | std::ostream &operator<<(std::ostream &os, transaction_type_t type); | |
1870 | ||
1871 | constexpr bool is_valid_transaction(transaction_type_t type) { | |
1872 | return type < transaction_type_t::MAX; | |
1873 | } | |
1874 | ||
1875 | constexpr bool is_background_transaction(transaction_type_t type) { | |
1876 | return (type >= transaction_type_t::TRIM_DIRTY && | |
1877 | type < transaction_type_t::MAX); | |
1878 | } | |
1879 | ||
1880 | constexpr bool is_trim_transaction(transaction_type_t type) { | |
1881 | return (type == transaction_type_t::TRIM_DIRTY || | |
1882 | type == transaction_type_t::TRIM_ALLOC); | |
1883 | } | |
1884 | ||
20effc67 TL |
1885 | struct record_size_t { |
1886 | extent_len_t plain_mdlength = 0; // mdlength without the record header | |
1887 | extent_len_t dlength = 0; | |
1888 | ||
1889 | extent_len_t get_raw_mdlength() const; | |
1890 | ||
1891 | bool is_empty() const { | |
1892 | return plain_mdlength == 0 && | |
1893 | dlength == 0; | |
1894 | } | |
1895 | ||
1896 | void account_extent(extent_len_t extent_len); | |
1897 | ||
1898 | void account(const extent_t& extent) { | |
1899 | account_extent(extent.bl.length()); | |
1900 | } | |
1901 | ||
1902 | void account(const delta_info_t& delta); | |
1e59de90 TL |
1903 | |
1904 | bool operator==(const record_size_t &) const = default; | |
20effc67 | 1905 | }; |
1e59de90 | 1906 | std::ostream &operator<<(std::ostream&, const record_size_t&); |
20effc67 | 1907 | |
f67539c2 | 1908 | struct record_t { |
1e59de90 | 1909 | transaction_type_t type = TRANSACTION_TYPE_NULL; |
f67539c2 TL |
1910 | std::vector<extent_t> extents; |
1911 | std::vector<delta_info_t> deltas; | |
20effc67 | 1912 | record_size_t size; |
1e59de90 | 1913 | sea_time_point modify_time = NULL_TIME; |
20effc67 | 1914 | |
1e59de90 TL |
1915 | record_t(transaction_type_t type) : type{type} { } |
1916 | ||
1917 | // unit test only | |
1918 | record_t() { | |
1919 | type = transaction_type_t::MUTATE; | |
1920 | } | |
1921 | ||
1922 | // unit test only | |
20effc67 TL |
1923 | record_t(std::vector<extent_t>&& _extents, |
1924 | std::vector<delta_info_t>&& _deltas) { | |
1e59de90 | 1925 | auto modify_time = seastar::lowres_system_clock::now(); |
20effc67 | 1926 | for (auto& e: _extents) { |
1e59de90 | 1927 | push_back(std::move(e), modify_time); |
20effc67 TL |
1928 | } |
1929 | for (auto& d: _deltas) { | |
1930 | push_back(std::move(d)); | |
1931 | } | |
1e59de90 | 1932 | type = transaction_type_t::MUTATE; |
20effc67 TL |
1933 | } |
1934 | ||
1935 | bool is_empty() const { | |
1936 | return extents.size() == 0 && | |
1937 | deltas.size() == 0; | |
1938 | } | |
1939 | ||
1940 | std::size_t get_delta_size() const { | |
1941 | auto delta_size = std::accumulate( | |
1942 | deltas.begin(), deltas.end(), 0, | |
1943 | [](uint64_t sum, auto& delta) { | |
1944 | return sum + delta.bl.length(); | |
1945 | } | |
1946 | ); | |
1947 | return delta_size; | |
1948 | } | |
1949 | ||
1e59de90 TL |
1950 | void push_back(extent_t&& extent, sea_time_point &t) { |
1951 | ceph_assert(t != NULL_TIME); | |
1952 | if (extents.size() == 0) { | |
1953 | assert(modify_time == NULL_TIME); | |
1954 | modify_time = t; | |
1955 | } else { | |
1956 | modify_time = get_average_time(modify_time, extents.size(), t, 1); | |
1957 | } | |
20effc67 TL |
1958 | size.account(extent); |
1959 | extents.push_back(std::move(extent)); | |
1960 | } | |
1961 | ||
1962 | void push_back(delta_info_t&& delta) { | |
1963 | size.account(delta); | |
1964 | deltas.push_back(std::move(delta)); | |
1965 | } | |
f67539c2 | 1966 | }; |
1e59de90 | 1967 | std::ostream &operator<<(std::ostream&, const record_t&); |
f67539c2 | 1968 | |
20effc67 | 1969 | struct record_header_t { |
1e59de90 | 1970 | transaction_type_t type; |
20effc67 TL |
1971 | uint32_t deltas; // number of deltas |
1972 | uint32_t extents; // number of extents | |
1e59de90 | 1973 | mod_time_point_t modify_time; |
20effc67 TL |
1974 | |
1975 | DENC(record_header_t, v, p) { | |
1976 | DENC_START(1, 1, p); | |
1e59de90 | 1977 | denc(v.type, p); |
20effc67 TL |
1978 | denc(v.deltas, p); |
1979 | denc(v.extents, p); | |
1e59de90 | 1980 | denc(v.modify_time, p); |
20effc67 TL |
1981 | DENC_FINISH(p); |
1982 | } | |
1983 | }; | |
1e59de90 | 1984 | std::ostream &operator<<(std::ostream&, const record_header_t&); |
20effc67 TL |
1985 | |
1986 | struct record_group_header_t { | |
1987 | uint32_t records; | |
1988 | extent_len_t mdlength; // block aligned, length of metadata | |
1989 | extent_len_t dlength; // block aligned, length of data | |
1990 | segment_nonce_t segment_nonce;// nonce of containing segment | |
1991 | journal_seq_t committed_to; // records prior to committed_to have been | |
1992 | // fully written, maybe in another segment. | |
1993 | checksum_t data_crc; // crc of data payload | |
1994 | ||
1995 | ||
1996 | DENC(record_group_header_t, v, p) { | |
1997 | DENC_START(1, 1, p); | |
1998 | denc(v.records, p); | |
1999 | denc(v.mdlength, p); | |
2000 | denc(v.dlength, p); | |
2001 | denc(v.segment_nonce, p); | |
2002 | denc(v.committed_to, p); | |
2003 | denc(v.data_crc, p); | |
2004 | DENC_FINISH(p); | |
2005 | } | |
2006 | }; | |
1e59de90 | 2007 | std::ostream& operator<<(std::ostream&, const record_group_header_t&); |
20effc67 TL |
2008 | |
2009 | struct record_group_size_t { | |
2010 | extent_len_t plain_mdlength = 0; // mdlength without the group header | |
2011 | extent_len_t dlength = 0; | |
2012 | extent_len_t block_size = 0; | |
2013 | ||
2014 | record_group_size_t() = default; | |
2015 | record_group_size_t( | |
2016 | const record_size_t& rsize, | |
2017 | extent_len_t block_size) { | |
2018 | account(rsize, block_size); | |
2019 | } | |
2020 | ||
2021 | extent_len_t get_raw_mdlength() const; | |
2022 | ||
2023 | extent_len_t get_mdlength() const { | |
2024 | assert(block_size > 0); | |
2025 | return p2roundup(get_raw_mdlength(), block_size); | |
2026 | } | |
2027 | ||
2028 | extent_len_t get_encoded_length() const { | |
2029 | assert(block_size > 0); | |
2030 | assert(dlength % block_size == 0); | |
2031 | return get_mdlength() + dlength; | |
2032 | } | |
2033 | ||
2034 | record_group_size_t get_encoded_length_after( | |
2035 | const record_size_t& rsize, | |
2036 | extent_len_t block_size) const { | |
2037 | record_group_size_t tmp = *this; | |
2038 | tmp.account(rsize, block_size); | |
2039 | return tmp; | |
2040 | } | |
2041 | ||
2042 | double get_fullness() const { | |
2043 | assert(block_size > 0); | |
2044 | return ((double)(get_raw_mdlength() + dlength) / | |
2045 | get_encoded_length()); | |
2046 | } | |
2047 | ||
2048 | void account(const record_size_t& rsize, | |
2049 | extent_len_t block_size); | |
1e59de90 TL |
2050 | |
2051 | bool operator==(const record_group_size_t &) const = default; | |
20effc67 | 2052 | }; |
1e59de90 | 2053 | std::ostream& operator<<(std::ostream&, const record_group_size_t&); |
20effc67 TL |
2054 | |
2055 | struct record_group_t { | |
2056 | std::vector<record_t> records; | |
2057 | record_group_size_t size; | |
20effc67 TL |
2058 | |
2059 | record_group_t() = default; | |
2060 | record_group_t( | |
2061 | record_t&& record, | |
2062 | extent_len_t block_size) { | |
2063 | push_back(std::move(record), block_size); | |
2064 | } | |
2065 | ||
2066 | std::size_t get_size() const { | |
2067 | return records.size(); | |
2068 | } | |
2069 | ||
2070 | void push_back( | |
2071 | record_t&& record, | |
2072 | extent_len_t block_size) { | |
2073 | size.account(record.size, block_size); | |
20effc67 | 2074 | records.push_back(std::move(record)); |
1e59de90 | 2075 | assert(size.get_encoded_length() < SEGMENT_OFF_MAX); |
20effc67 TL |
2076 | } |
2077 | ||
2078 | void reserve(std::size_t limit) { | |
2079 | records.reserve(limit); | |
2080 | } | |
2081 | ||
2082 | void clear() { | |
2083 | records.clear(); | |
2084 | size = {}; | |
20effc67 TL |
2085 | } |
2086 | }; | |
1e59de90 | 2087 | std::ostream& operator<<(std::ostream&, const record_group_t&); |
20effc67 TL |
2088 | |
2089 | ceph::bufferlist encode_record( | |
2090 | record_t&& record, | |
2091 | extent_len_t block_size, | |
2092 | const journal_seq_t& committed_to, | |
2093 | segment_nonce_t current_segment_nonce); | |
2094 | ||
2095 | ceph::bufferlist encode_records( | |
2096 | record_group_t& record_group, | |
2097 | const journal_seq_t& committed_to, | |
2098 | segment_nonce_t current_segment_nonce); | |
2099 | ||
2100 | std::optional<record_group_header_t> | |
2101 | try_decode_records_header( | |
2102 | const ceph::bufferlist& header_bl, | |
2103 | segment_nonce_t expected_nonce); | |
2104 | ||
2105 | bool validate_records_metadata( | |
2106 | const ceph::bufferlist& md_bl); | |
2107 | ||
2108 | bool validate_records_data( | |
2109 | const record_group_header_t& header, | |
2110 | const ceph::bufferlist& data_bl); | |
2111 | ||
2112 | struct record_extent_infos_t { | |
2113 | record_header_t header; | |
2114 | std::vector<extent_info_t> extent_infos; | |
2115 | }; | |
2116 | std::optional<std::vector<record_extent_infos_t> > | |
2117 | try_decode_extent_infos( | |
2118 | const record_group_header_t& header, | |
2119 | const ceph::bufferlist& md_bl); | |
1e59de90 TL |
2120 | std::optional<std::vector<record_header_t>> |
2121 | try_decode_record_headers( | |
2122 | const record_group_header_t& header, | |
2123 | const ceph::bufferlist& md_bl); | |
20effc67 TL |
2124 | |
2125 | struct record_deltas_t { | |
2126 | paddr_t record_block_base; | |
1e59de90 | 2127 | std::vector<std::pair<sea_time_point, delta_info_t>> deltas; |
20effc67 TL |
2128 | }; |
2129 | std::optional<std::vector<record_deltas_t> > | |
2130 | try_decode_deltas( | |
2131 | const record_group_header_t& header, | |
2132 | const ceph::bufferlist& md_bl, | |
2133 | paddr_t record_block_base); | |
2134 | ||
2135 | struct write_result_t { | |
2136 | journal_seq_t start_seq; | |
1e59de90 | 2137 | extent_len_t length; |
20effc67 TL |
2138 | |
2139 | journal_seq_t get_end_seq() const { | |
1e59de90 TL |
2140 | return journal_seq_t{ |
2141 | start_seq.segment_seq, | |
2142 | start_seq.offset.add_offset(length)}; | |
20effc67 TL |
2143 | } |
2144 | }; | |
1e59de90 | 2145 | std::ostream& operator<<(std::ostream&, const write_result_t&); |
20effc67 TL |
2146 | |
2147 | struct record_locator_t { | |
2148 | paddr_t record_block_base; | |
2149 | write_result_t write_result; | |
2150 | }; | |
1e59de90 | 2151 | std::ostream& operator<<(std::ostream&, const record_locator_t&); |
20effc67 TL |
2152 | |
2153 | /// scan segment for end incrementally | |
2154 | struct scan_valid_records_cursor { | |
2155 | bool last_valid_header_found = false; | |
2156 | journal_seq_t seq; | |
2157 | journal_seq_t last_committed; | |
1e59de90 | 2158 | std::size_t num_consumed_records = 0; |
aee94f69 | 2159 | extent_len_t block_size = 0; |
20effc67 TL |
2160 | |
2161 | struct found_record_group_t { | |
2162 | paddr_t offset; | |
2163 | record_group_header_t header; | |
2164 | bufferlist mdbuffer; | |
2165 | ||
2166 | found_record_group_t( | |
2167 | paddr_t offset, | |
2168 | const record_group_header_t &header, | |
2169 | const bufferlist &mdbuffer) | |
2170 | : offset(offset), header(header), mdbuffer(mdbuffer) {} | |
2171 | }; | |
2172 | std::deque<found_record_group_t> pending_record_groups; | |
2173 | ||
2174 | bool is_complete() const { | |
2175 | return last_valid_header_found && pending_record_groups.empty(); | |
2176 | } | |
2177 | ||
2178 | segment_id_t get_segment_id() const { | |
2179 | return seq.offset.as_seg_paddr().get_segment_id(); | |
2180 | } | |
2181 | ||
2182 | segment_off_t get_segment_offset() const { | |
2183 | return seq.offset.as_seg_paddr().get_segment_off(); | |
2184 | } | |
2185 | ||
aee94f69 TL |
2186 | extent_len_t get_block_size() const { |
2187 | return block_size; | |
2188 | } | |
2189 | ||
1e59de90 | 2190 | void increment_seq(segment_off_t off) { |
aee94f69 | 2191 | seq.offset = seq.offset.add_offset(off); |
20effc67 TL |
2192 | } |
2193 | ||
1e59de90 TL |
2194 | void emplace_record_group(const record_group_header_t&, ceph::bufferlist&&); |
2195 | ||
2196 | void pop_record_group() { | |
2197 | assert(!pending_record_groups.empty()); | |
2198 | ++num_consumed_records; | |
2199 | pending_record_groups.pop_front(); | |
2200 | } | |
2201 | ||
20effc67 TL |
2202 | scan_valid_records_cursor( |
2203 | journal_seq_t seq) | |
2204 | : seq(seq) {} | |
2205 | }; | |
1e59de90 | 2206 | std::ostream& operator<<(std::ostream&, const scan_valid_records_cursor&); |
20effc67 | 2207 | |
f67539c2 TL |
2208 | } |
2209 | ||
2210 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::seastore_meta_t) | |
20effc67 | 2211 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_id_t) |
f67539c2 TL |
2212 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::paddr_t) |
2213 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_seq_t) | |
2214 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t) | |
1e59de90 | 2215 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_tail_delta_t) |
20effc67 TL |
2216 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t) |
2217 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_group_header_t) | |
2218 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t) | |
2219 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t) | |
1e59de90 TL |
2220 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::alloc_blk_t) |
2221 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::alloc_delta_t) | |
2222 | WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_tail_t) | |
2223 | ||
2224 | #if FMT_VERSION >= 90000 | |
2225 | template <> struct fmt::formatter<crimson::os::seastore::data_category_t> : fmt::ostream_formatter {}; | |
2226 | template <> struct fmt::formatter<crimson::os::seastore::delta_info_t> : fmt::ostream_formatter {}; | |
2227 | template <> struct fmt::formatter<crimson::os::seastore::device_id_printer_t> : fmt::ostream_formatter {}; | |
2228 | template <> struct fmt::formatter<crimson::os::seastore::extent_types_t> : fmt::ostream_formatter {}; | |
2229 | template <> struct fmt::formatter<crimson::os::seastore::journal_seq_t> : fmt::ostream_formatter {}; | |
2230 | template <> struct fmt::formatter<crimson::os::seastore::journal_tail_delta_t> : fmt::ostream_formatter {}; | |
2231 | template <> struct fmt::formatter<crimson::os::seastore::laddr_list_t> : fmt::ostream_formatter {}; | |
2232 | template <> struct fmt::formatter<crimson::os::seastore::omap_root_t> : fmt::ostream_formatter {}; | |
2233 | template <> struct fmt::formatter<crimson::os::seastore::paddr_list_t> : fmt::ostream_formatter {}; | |
2234 | template <> struct fmt::formatter<crimson::os::seastore::paddr_t> : fmt::ostream_formatter {}; | |
aee94f69 | 2235 | template <> struct fmt::formatter<crimson::os::seastore::pladdr_t> : fmt::ostream_formatter {}; |
1e59de90 TL |
2236 | template <> struct fmt::formatter<crimson::os::seastore::placement_hint_t> : fmt::ostream_formatter {}; |
2237 | template <> struct fmt::formatter<crimson::os::seastore::device_type_t> : fmt::ostream_formatter {}; | |
2238 | template <> struct fmt::formatter<crimson::os::seastore::record_group_header_t> : fmt::ostream_formatter {}; | |
2239 | template <> struct fmt::formatter<crimson::os::seastore::record_group_size_t> : fmt::ostream_formatter {}; | |
2240 | template <> struct fmt::formatter<crimson::os::seastore::record_header_t> : fmt::ostream_formatter {}; | |
2241 | template <> struct fmt::formatter<crimson::os::seastore::record_locator_t> : fmt::ostream_formatter {}; | |
2242 | template <> struct fmt::formatter<crimson::os::seastore::record_t> : fmt::ostream_formatter {}; | |
2243 | template <> struct fmt::formatter<crimson::os::seastore::rewrite_gen_printer_t> : fmt::ostream_formatter {}; | |
2244 | template <> struct fmt::formatter<crimson::os::seastore::scan_valid_records_cursor> : fmt::ostream_formatter {}; | |
2245 | template <> struct fmt::formatter<crimson::os::seastore::sea_time_point_printer_t> : fmt::ostream_formatter {}; | |
2246 | template <> struct fmt::formatter<crimson::os::seastore::segment_header_t> : fmt::ostream_formatter {}; | |
2247 | template <> struct fmt::formatter<crimson::os::seastore::segment_id_t> : fmt::ostream_formatter {}; | |
2248 | template <> struct fmt::formatter<crimson::os::seastore::segment_seq_printer_t> : fmt::ostream_formatter {}; | |
2249 | template <> struct fmt::formatter<crimson::os::seastore::segment_tail_t> : fmt::ostream_formatter {}; | |
2250 | template <> struct fmt::formatter<crimson::os::seastore::segment_type_t> : fmt::ostream_formatter {}; | |
2251 | template <> struct fmt::formatter<crimson::os::seastore::transaction_type_t> : fmt::ostream_formatter {}; | |
2252 | template <> struct fmt::formatter<crimson::os::seastore::write_result_t> : fmt::ostream_formatter {}; | |
2253 | template <> struct fmt::formatter<ceph::buffer::list> : fmt::ostream_formatter {}; | |
2254 | #endif |