]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/seastore_types.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / seastore_types.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
6#include <limits>
20effc67
TL
7#include <numeric>
8#include <optional>
f67539c2 9#include <iostream>
20effc67 10#include <vector>
f67539c2
TL
11
12#include "include/byteorder.h"
13#include "include/denc.h"
14#include "include/buffer.h"
15#include "include/cmp.h"
16#include "include/uuid.h"
20effc67 17#include "include/interval_set.h"
f67539c2
TL
18
19namespace crimson::os::seastore {
20
20effc67
TL
21using depth_t = uint32_t;
22using depth_le_t = ceph_le32;
23
24inline depth_le_t init_depth_le(uint32_t i) {
25 return ceph_le32(i);
26}
f67539c2
TL
27
28using checksum_t = uint32_t;
29
30// Immutable metadata for seastore to set at mkfs time
31struct seastore_meta_t {
32 uuid_d seastore_id;
33
34 DENC(seastore_meta_t, v, p) {
35 DENC_START(1, 1, p);
36 denc(v.seastore_id, p);
37 DENC_FINISH(p);
38 }
39};
40
20effc67
TL
41std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta);
42
43// identifies a specific physical device within seastore
44using device_id_t = uint8_t;
45
46constexpr uint16_t SEGMENT_ID_LEN_BITS = 24;
47
48// order of device_id_t
49constexpr uint16_t DEVICE_ID_LEN_BITS = 8;
50
51// 1 bit to identify address type
52
53// segment ids without a device id encapsulated
54using device_segment_id_t = uint32_t;
55
56constexpr device_id_t DEVICE_ID_MAX =
57 (std::numeric_limits<device_id_t>::max() >>
58 (std::numeric_limits<device_id_t>::digits - DEVICE_ID_LEN_BITS + 1));
59constexpr device_id_t DEVICE_ID_RECORD_RELATIVE = DEVICE_ID_MAX - 1;
60constexpr device_id_t DEVICE_ID_BLOCK_RELATIVE = DEVICE_ID_MAX - 2;
61constexpr device_id_t DEVICE_ID_DELAYED = DEVICE_ID_MAX - 3;
62constexpr device_id_t DEVICE_ID_NULL = DEVICE_ID_MAX - 4;
63constexpr device_id_t DEVICE_ID_FAKE = DEVICE_ID_MAX - 5;
64constexpr device_id_t DEVICE_ID_ZERO = DEVICE_ID_MAX - 6;
65constexpr device_id_t DEVICE_ID_MAX_VALID = DEVICE_ID_MAX - 7;
66
67constexpr device_segment_id_t DEVICE_SEGMENT_ID_MAX =
68 (1 << SEGMENT_ID_LEN_BITS) - 1;
69
f67539c2 70// Identifies segment location on disk, see SegmentManager,
20effc67
TL
71struct segment_id_t {
72private:
73 // internal segment id type of segment_id_t, basically
74 // this is a unsigned int with the top "DEVICE_ID_LEN_BITS"
75 // bits representing the id of the device on which the
76 // segment resides
77 using internal_segment_id_t = uint32_t;
78
79 // mask for segment manager id
80 static constexpr internal_segment_id_t SM_ID_MASK =
81 0xF << (std::numeric_limits<internal_segment_id_t>::digits - DEVICE_ID_LEN_BITS);
82 // default internal segment id
83 static constexpr internal_segment_id_t DEFAULT_INTERNAL_SEG_ID =
84 (std::numeric_limits<internal_segment_id_t>::max() >> 1) - 1;
85
86 internal_segment_id_t segment = DEFAULT_INTERNAL_SEG_ID;
87
88 constexpr segment_id_t(uint32_t encoded) : segment(encoded) {}
89
90public:
91 segment_id_t() = default;
92 constexpr segment_id_t(device_id_t id, device_segment_id_t segment)
93 : segment(make_internal(segment, id)) {}
94
95 [[gnu::always_inline]]
96 device_id_t device_id() const {
97 return internal_to_device(segment);
98 }
99
100 [[gnu::always_inline]]
101 constexpr device_segment_id_t device_segment_id() const {
102 return internal_to_segment(segment);
103 }
104
105 bool operator==(const segment_id_t& other) const {
106 return segment == other.segment;
107 }
108 bool operator!=(const segment_id_t& other) const {
109 return segment != other.segment;
110 }
111 bool operator<(const segment_id_t& other) const {
112 return segment < other.segment;
113 }
114 bool operator<=(const segment_id_t& other) const {
115 return segment <= other.segment;
116 }
117 bool operator>(const segment_id_t& other) const {
118 return segment > other.segment;
119 }
120 bool operator>=(const segment_id_t& other) const {
121 return segment >= other.segment;
122 }
123
124 DENC(segment_id_t, v, p) {
125 denc(v.segment, p);
126 }
127private:
128 static constexpr unsigned segment_bits = (
129 std::numeric_limits<internal_segment_id_t>::digits - DEVICE_ID_LEN_BITS
130 );
131
132 static inline device_id_t internal_to_device(internal_segment_id_t id) {
133 return (static_cast<device_id_t>(id) & SM_ID_MASK) >> segment_bits;
134 }
135
136 constexpr static inline device_segment_id_t internal_to_segment(
137 internal_segment_id_t id) {
138 return id & (~SM_ID_MASK);
139 }
140
141 constexpr static inline internal_segment_id_t make_internal(
142 device_segment_id_t id,
143 device_id_t sm_id) {
144 return static_cast<internal_segment_id_t>(id) |
145 (static_cast<internal_segment_id_t>(sm_id) << segment_bits);
146 }
147
148 friend struct segment_id_le_t;
149 friend struct seg_paddr_t;
150 friend struct paddr_t;
151 friend struct paddr_le_t;
152};
f67539c2 153
20effc67
TL
154// ondisk type of segment_id_t
155struct __attribute((packed)) segment_id_le_t {
156 ceph_le32 segment = ceph_le32(segment_id_t::DEFAULT_INTERNAL_SEG_ID);
157
158 segment_id_le_t(const segment_id_t id) :
159 segment(ceph_le32(id.segment)) {}
160
161 operator segment_id_t() const {
162 return segment_id_t(segment);
163 }
164};
165
166constexpr segment_id_t MAX_SEG_ID = segment_id_t(
167 DEVICE_ID_MAX,
168 DEVICE_SEGMENT_ID_MAX
169);
f67539c2 170// for tests which generate fake paddrs
20effc67
TL
171constexpr segment_id_t NULL_SEG_ID = segment_id_t(DEVICE_ID_NULL, 0);
172constexpr segment_id_t FAKE_SEG_ID = segment_id_t(DEVICE_ID_FAKE, 0);
173
174std::ostream &operator<<(std::ostream &out, const segment_id_t&);
175
f67539c2
TL
176
177std::ostream &segment_to_stream(std::ostream &, const segment_id_t &t);
178
179// Offset within a segment on disk, see SegmentManager
180// may be negative for relative offsets
181using segment_off_t = int32_t;
182constexpr segment_off_t NULL_SEG_OFF =
20effc67
TL
183 std::numeric_limits<segment_off_t>::max();
184constexpr segment_off_t MAX_SEG_OFF =
185 std::numeric_limits<segment_off_t>::max();
f67539c2
TL
186
187std::ostream &offset_to_stream(std::ostream &, const segment_off_t &t);
188
189/* Monotonically increasing segment seq, uniquely identifies
190 * the incarnation of a segment */
191using segment_seq_t = uint32_t;
192static constexpr segment_seq_t NULL_SEG_SEQ =
193 std::numeric_limits<segment_seq_t>::max();
20effc67
TL
194static constexpr segment_seq_t MAX_SEG_SEQ =
195 std::numeric_limits<segment_seq_t>::max();
f67539c2
TL
196
197// Offset of delta within a record
198using record_delta_idx_t = uint32_t;
199constexpr record_delta_idx_t NULL_DELTA_IDX =
200 std::numeric_limits<record_delta_idx_t>::max();
201
20effc67
TL
202/**
203 * segment_map_t
204 *
205 * Compact templated mapping from a segment_id_t to a value type.
206 */
207template <typename T>
208class segment_map_t {
209public:
210 segment_map_t() {
211 // initializes top vector with 0 length vectors to indicate that they
212 // are not yet present
213 device_to_segments.resize(DEVICE_ID_MAX_VALID);
214 }
215 void add_device(device_id_t device, size_t segments, const T& init) {
216 assert(device <= DEVICE_ID_MAX_VALID);
217 assert(device_to_segments[device].size() == 0);
218 device_to_segments[device].resize(segments, init);
219 total_segments += segments;
220 }
221 void clear() {
222 device_to_segments.clear();
223 device_to_segments.resize(DEVICE_ID_MAX_VALID);
224 total_segments = 0;
225 }
226
227 T& operator[](segment_id_t id) {
228 assert(id.device_segment_id() < device_to_segments[id.device_id()].size());
229 return device_to_segments[id.device_id()][id.device_segment_id()];
230 }
231 const T& operator[](segment_id_t id) const {
232 assert(id.device_segment_id() < device_to_segments[id.device_id()].size());
233 return device_to_segments[id.device_id()][id.device_segment_id()];
234 }
235
236 bool contains(segment_id_t id) {
237 bool b = id.device_id() < device_to_segments.size();
238 if (!b) {
239 return b;
240 }
241 b = id.device_segment_id() < device_to_segments[id.device_id()].size();
242 return b;
243 }
244
245 auto begin() {
246 return iterator<false>::lower_bound(*this, 0, 0);
247 }
248 auto begin() const {
249 return iterator<true>::lower_bound(*this, 0, 0);
250 }
251
252 auto end() {
253 return iterator<false>::end_iterator(*this);
254 }
255 auto end() const {
256 return iterator<true>::end_iterator(*this);
257 }
258
259 auto device_begin(device_id_t id) {
260 auto ret = iterator<false>::lower_bound(*this, id, 0);
261 assert(ret->first.device_id() == id);
262 return ret;
263 }
264 auto device_end(device_id_t id) {
265 return iterator<false>::lower_bound(*this, id + 1, 0);
266 }
267
268 size_t size() const {
269 return total_segments;
270 }
271
272private:
273 template <bool is_const = false>
274 class iterator {
275 /// points at set being iterated over
276 std::conditional_t<
277 is_const,
278 const segment_map_t &,
279 segment_map_t &> parent;
280
281 /// points at current device, or DEVICE_ID_MAX_VALID if is_end()
282 device_id_t device_id;
283
284 /// segment at which we are pointing, 0 if is_end()
285 device_segment_id_t device_segment_id;
286
287 /// holds referent for operator* and operator-> when !is_end()
288 std::optional<
289 std::pair<
290 const segment_id_t,
291 std::conditional_t<is_const, const T&, T&>
292 >> current;
293
294 bool is_end() const {
295 return device_id == DEVICE_ID_MAX_VALID;
296 }
297
298 void find_valid() {
299 assert(!is_end());
300 auto &device_vec = parent.device_to_segments[device_id];
301 if (device_vec.size() == 0 ||
302 device_segment_id == device_vec.size()) {
303 while (++device_id < DEVICE_ID_MAX_VALID &&
304 parent.device_to_segments[device_id].size() == 0);
305 device_segment_id = 0;
306 }
307 if (is_end()) {
308 current = std::nullopt;
309 } else {
310 current.emplace(
311 segment_id_t{device_id, device_segment_id},
312 parent.device_to_segments[device_id][device_segment_id]
313 );
314 }
315 }
316
317 iterator(
318 decltype(parent) &parent,
319 device_id_t device_id,
320 device_segment_id_t device_segment_id)
321 : parent(parent), device_id(device_id),
322 device_segment_id(device_segment_id) {}
323
324 public:
325 static iterator lower_bound(
326 decltype(parent) &parent,
327 device_id_t device_id,
328 device_segment_id_t device_segment_id) {
329 if (device_id == DEVICE_ID_MAX_VALID) {
330 return end_iterator(parent);
331 } else {
332 auto ret = iterator{parent, device_id, device_segment_id};
333 ret.find_valid();
334 return ret;
335 }
336 }
337
338 static iterator end_iterator(
339 decltype(parent) &parent) {
340 return iterator{parent, DEVICE_ID_MAX_VALID, 0};
341 }
342
343 iterator<is_const>& operator++() {
344 assert(!is_end());
345 ++device_segment_id;
346 find_valid();
347 return *this;
348 }
349
350 bool operator==(iterator<is_const> rit) {
351 return (device_id == rit.device_id &&
352 device_segment_id == rit.device_segment_id);
353 }
354
355 bool operator!=(iterator<is_const> rit) {
356 return !(*this == rit);
357 }
358
359 template <bool c = is_const, std::enable_if_t<c, int> = 0>
360 const std::pair<const segment_id_t, const T&> *operator->() {
361 assert(!is_end());
362 return &*current;
363 }
364 template <bool c = is_const, std::enable_if_t<!c, int> = 0>
365 std::pair<const segment_id_t, T&> *operator->() {
366 assert(!is_end());
367 return &*current;
368 }
369 template <bool c = is_const, std::enable_if_t<c, int> = 0>
370 const std::pair<const segment_id_t, const T&> &operator*() {
371 assert(!is_end());
372 return *current;
373 }
374 template <bool c = is_const, std::enable_if_t<!c, int> = 0>
375 std::pair<const segment_id_t, T&> &operator*() {
376 assert(!is_end());
377 return *current;
378 }
379 };
380
381 /**
382 * device_to_segments
383 *
384 * device -> segment -> T mapping. device_to_segments[d].size() > 0 iff
385 * device <d> has been added.
386 */
387 std::vector<std::vector<T>> device_to_segments;
388
389 /// total number of added segments
390 size_t total_segments = 0;
391};
392
f67539c2
TL
393/**
394 * paddr_t
395 *
396 * <segment, offset> offset on disk, see SegmentManager
397 *
398 * May be absolute, record_relative, or block_relative.
399 *
400 * Blocks get read independently of the surrounding record,
401 * so paddrs embedded directly within a block need to refer
402 * to other blocks within the same record by a block_relative
403 * addr relative to the block's own offset. By contrast,
404 * deltas to existing blocks need to use record_relative
405 * addrs relative to the first block of the record.
406 *
407 * Fresh extents during a transaction are refered to by
408 * record_relative paddrs.
409 */
20effc67
TL
410constexpr uint16_t DEV_ADDR_LEN_BITS = 64 - DEVICE_ID_LEN_BITS;
411static constexpr uint16_t SEG_OFF_LEN_BITS = 32;
412enum class addr_types_t : uint8_t {
413 SEGMENT = 0,
414 RANDOM_BLOCK = 1
415};
416struct seg_paddr_t;
f67539c2 417struct paddr_t {
20effc67
TL
418protected:
419 using common_addr_t = uint64_t;
420 common_addr_t dev_addr;
421private:
422 constexpr paddr_t(segment_id_t seg, segment_off_t offset)
423 : dev_addr((static_cast<common_addr_t>(seg.segment)
424 << SEG_OFF_LEN_BITS) | static_cast<uint32_t>(offset)) {}
425 constexpr paddr_t(common_addr_t val) : dev_addr(val) {}
426public:
427 static constexpr paddr_t make_seg_paddr(
428 segment_id_t seg, segment_off_t offset) {
429 return paddr_t(seg, offset);
430 }
431 static constexpr paddr_t make_seg_paddr(
432 device_id_t device,
433 device_segment_id_t seg,
434 segment_off_t offset) {
435 return paddr_t(segment_id_t(device, seg), offset);
436 }
437 constexpr paddr_t() : paddr_t(NULL_SEG_ID, 0) {}
f67539c2 438
20effc67
TL
439 // use 1bit in device_id_t for address type
440 void set_device_id(device_id_t id, addr_types_t type = addr_types_t::SEGMENT) {
441 dev_addr &= static_cast<common_addr_t>(
442 std::numeric_limits<device_segment_id_t>::max());
443 dev_addr |= static_cast<common_addr_t>(id & 0x8) << DEV_ADDR_LEN_BITS;
444 dev_addr |= static_cast<common_addr_t>(type)
445 << (std::numeric_limits<common_addr_t>::digits - 1);
f67539c2
TL
446 }
447
20effc67
TL
448 device_id_t get_device_id() const {
449 return static_cast<device_id_t>(dev_addr >> DEV_ADDR_LEN_BITS);
450 }
451 addr_types_t get_addr_type() const {
452 return (addr_types_t)((dev_addr
453 >> (std::numeric_limits<common_addr_t>::digits - 1)) & 1);
f67539c2
TL
454 }
455
20effc67
TL
456 paddr_t add_offset(int32_t o) const;
457 paddr_t add_relative(paddr_t o) const;
458 paddr_t add_block_relative(paddr_t o) const;
459 paddr_t add_record_relative(paddr_t o) const;
460 paddr_t maybe_relative_to(paddr_t base) const;
461
462 seg_paddr_t& as_seg_paddr();
463 const seg_paddr_t& as_seg_paddr() const;
464
465 paddr_t operator-(paddr_t rhs) const;
466
f67539c2 467 bool is_block_relative() const {
20effc67
TL
468 return get_device_id() == DEVICE_ID_BLOCK_RELATIVE;
469 }
470 bool is_record_relative() const {
471 return get_device_id() == DEVICE_ID_RECORD_RELATIVE;
472 }
473 bool is_relative() const {
474 return is_block_relative() || is_record_relative();
475 }
476 /// Denotes special null addr
477 bool is_null() const {
478 return get_device_id() == DEVICE_ID_NULL;
479 }
480 /// Denotes special zero addr
481 bool is_zero() const {
482 return get_device_id() == DEVICE_ID_ZERO;
483 }
484
485 /**
486 * is_real
487 *
488 * indicates whether addr reflects a physical location, absolute
489 * or relative. FAKE segments also count as real so as to reflect
490 * the way in which unit tests use them.
491 */
492 bool is_real() const {
493 return !is_zero() && !is_null();
494 }
495
496 DENC(paddr_t, v, p) {
497 DENC_START(1, 1, p);
498 denc(v.dev_addr, p);
499 DENC_FINISH(p);
500 }
501 friend struct paddr_le_t;
502 friend struct seg_paddr_t;
503
504 friend bool operator==(const paddr_t &, const paddr_t&);
505 friend bool operator!=(const paddr_t &, const paddr_t&);
506 friend bool operator<=(const paddr_t &, const paddr_t&);
507 friend bool operator<(const paddr_t &, const paddr_t&);
508 friend bool operator>=(const paddr_t &, const paddr_t&);
509 friend bool operator>(const paddr_t &, const paddr_t&);
510};
511WRITE_EQ_OPERATORS_1(paddr_t, dev_addr);
512WRITE_CMP_OPERATORS_1(paddr_t, dev_addr);
513
514struct seg_paddr_t : public paddr_t {
515 static constexpr uint64_t SEG_OFF_MASK = std::numeric_limits<uint32_t>::max();
516 // mask for segment manager id
517 static constexpr uint64_t SEG_ID_MASK =
518 static_cast<common_addr_t>(0xFFFFFFFF) << SEG_OFF_LEN_BITS;
519
520 seg_paddr_t(const seg_paddr_t&) = delete;
521 seg_paddr_t(seg_paddr_t&) = delete;
522 seg_paddr_t& operator=(const seg_paddr_t&) = delete;
523 seg_paddr_t& operator=(seg_paddr_t&) = delete;
524 segment_id_t get_segment_id() const {
525 return segment_id_t((dev_addr & SEG_ID_MASK) >> SEG_OFF_LEN_BITS);
526 }
527 segment_off_t get_segment_off() const {
528 return segment_off_t(dev_addr & SEG_OFF_MASK);
529 }
530 void set_segment_id(const segment_id_t id) {
531 dev_addr &= static_cast<common_addr_t>(
532 std::numeric_limits<device_segment_id_t>::max());
533 dev_addr |= static_cast<common_addr_t>(id.segment) << SEG_OFF_LEN_BITS;
534 }
535 void set_segment_off(const segment_off_t off) {
536 dev_addr &= static_cast<common_addr_t>(
537 std::numeric_limits<device_segment_id_t>::max()) << SEG_OFF_LEN_BITS;
538 dev_addr |= (uint32_t)off;
f67539c2
TL
539 }
540
541 paddr_t add_offset(segment_off_t o) const {
20effc67 542 return paddr_t::make_seg_paddr(get_segment_id(), get_segment_off() + o);
f67539c2
TL
543 }
544
545 paddr_t add_relative(paddr_t o) const {
546 assert(o.is_relative());
20effc67
TL
547 seg_paddr_t& s = o.as_seg_paddr();
548 return paddr_t::make_seg_paddr(get_segment_id(),
549 get_segment_off() + s.get_segment_off());
f67539c2
TL
550 }
551
552 paddr_t add_block_relative(paddr_t o) const {
553 // special version mainly for documentation purposes
554 assert(o.is_block_relative());
555 return add_relative(o);
556 }
557
558 paddr_t add_record_relative(paddr_t o) const {
559 // special version mainly for documentation purposes
560 assert(o.is_record_relative());
561 return add_relative(o);
562 }
563
564 /**
565 * paddr_t::operator-
566 *
567 * Only defined for record_relative paddr_ts. Yields a
568 * block_relative address.
569 */
570 paddr_t operator-(paddr_t rhs) const {
20effc67 571 seg_paddr_t& r = rhs.as_seg_paddr();
f67539c2 572 assert(rhs.is_relative() && is_relative());
20effc67
TL
573 assert(r.get_segment_id() == get_segment_id());
574 return paddr_t::make_seg_paddr(
575 segment_id_t{DEVICE_ID_BLOCK_RELATIVE, 0},
576 get_segment_off() - r.get_segment_off()
577 );
f67539c2
TL
578 }
579
580 /**
581 * maybe_relative_to
582 *
583 * Helper for the case where an in-memory paddr_t may be
584 * either block_relative or absolute (not record_relative).
585 *
586 * base must be either absolute or record_relative.
587 */
588 paddr_t maybe_relative_to(paddr_t base) const {
589 assert(!base.is_block_relative());
20effc67 590 seg_paddr_t& s = base.as_seg_paddr();
f67539c2 591 if (is_block_relative())
20effc67 592 return s.add_block_relative(*this);
f67539c2
TL
593 else
594 return *this;
595 }
f67539c2 596};
f67539c2 597constexpr paddr_t P_ADDR_NULL = paddr_t{};
20effc67
TL
598constexpr paddr_t P_ADDR_MIN = paddr_t::make_seg_paddr(segment_id_t(0, 0), 0);
599constexpr paddr_t P_ADDR_MAX = paddr_t::make_seg_paddr(
600 segment_id_t(DEVICE_ID_MAX, DEVICE_SEGMENT_ID_MAX),
601 std::numeric_limits<segment_off_t>::max());
602constexpr paddr_t P_ADDR_ZERO = paddr_t::make_seg_paddr(
603 DEVICE_ID_ZERO, 0, 0);
604
f67539c2 605constexpr paddr_t make_record_relative_paddr(segment_off_t off) {
20effc67
TL
606 return paddr_t::make_seg_paddr(
607 segment_id_t{DEVICE_ID_RECORD_RELATIVE, 0},
608 off);
f67539c2
TL
609}
610constexpr paddr_t make_block_relative_paddr(segment_off_t off) {
20effc67
TL
611 return paddr_t::make_seg_paddr(
612 segment_id_t{DEVICE_ID_BLOCK_RELATIVE, 0},
613 off);
f67539c2
TL
614}
615constexpr paddr_t make_fake_paddr(segment_off_t off) {
20effc67
TL
616 return paddr_t::make_seg_paddr(FAKE_SEG_ID, off);
617}
618constexpr paddr_t delayed_temp_paddr(segment_off_t off) {
619 return paddr_t::make_seg_paddr(
620 segment_id_t{DEVICE_ID_DELAYED, 0},
621 off);
f67539c2
TL
622}
623
20effc67
TL
624struct __attribute((packed)) paddr_le_t {
625 ceph_le64 dev_addr =
626 ceph_le64(P_ADDR_NULL.dev_addr);
f67539c2
TL
627
628 paddr_le_t() = default;
20effc67 629 paddr_le_t(const paddr_t &addr) : dev_addr(ceph_le64(addr.dev_addr)) {}
f67539c2
TL
630
631 operator paddr_t() const {
20effc67 632 return paddr_t{dev_addr};
f67539c2
TL
633 }
634};
635
636std::ostream &operator<<(std::ostream &out, const paddr_t &rhs);
637
638using objaddr_t = uint32_t;
20effc67
TL
639constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max();
640constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX - 1;
641
642enum class placement_hint_t {
643 HOT = 0, // Most of the metadata
644 COLD, // Object data
645 REWRITE, // Cold metadata and data (probably need further splits)
646 NUM_HINTS // Constant for number of hints
647};
648
649enum class device_type_t {
650 NONE = 0,
651 SEGMENTED, // i.e. Hard_Disk, SATA_SSD, NAND_NVME
652 RANDOM_BLOCK, // i.e. RANDOM_BD
653 PMEM, // i.e. NVDIMM, PMEM
654 NUM_TYPES
655};
656
657std::ostream& operator<<(std::ostream& out, device_type_t t);
658
659bool can_delay_allocation(device_type_t type);
660device_type_t string_to_device_type(std::string type);
f67539c2
TL
661
662/* Monotonically increasing identifier for the location of a
663 * journal_record.
664 */
665struct journal_seq_t {
666 segment_seq_t segment_seq = 0;
667 paddr_t offset;
668
20effc67
TL
669 journal_seq_t add_offset(segment_off_t o) const {
670 return {segment_seq, offset.add_offset(o)};
671 }
672
f67539c2
TL
673 DENC(journal_seq_t, v, p) {
674 DENC_START(1, 1, p);
675 denc(v.segment_seq, p);
676 denc(v.offset, p);
677 DENC_FINISH(p);
678 }
679};
680WRITE_CMP_OPERATORS_2(journal_seq_t, segment_seq, offset)
681WRITE_EQ_OPERATORS_2(journal_seq_t, segment_seq, offset)
20effc67
TL
682constexpr journal_seq_t JOURNAL_SEQ_MIN{
683 0,
684 paddr_t::make_seg_paddr(NULL_SEG_ID, 0)
685};
686constexpr journal_seq_t JOURNAL_SEQ_MAX{
687 MAX_SEG_SEQ,
688 P_ADDR_MAX
689};
f67539c2
TL
690
691std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq);
692
693static constexpr journal_seq_t NO_DELTAS = journal_seq_t{
694 NULL_SEG_SEQ,
695 P_ADDR_NULL
696};
697
698// logical addr, see LBAManager, TransactionManager
699using laddr_t = uint64_t;
700constexpr laddr_t L_ADDR_MIN = std::numeric_limits<laddr_t>::min();
701constexpr laddr_t L_ADDR_MAX = std::numeric_limits<laddr_t>::max();
702constexpr laddr_t L_ADDR_NULL = std::numeric_limits<laddr_t>::max();
703constexpr laddr_t L_ADDR_ROOT = std::numeric_limits<laddr_t>::max() - 1;
704constexpr laddr_t L_ADDR_LBAT = std::numeric_limits<laddr_t>::max() - 2;
705
20effc67
TL
706struct __attribute((packed)) laddr_le_t {
707 ceph_le64 laddr = ceph_le64(L_ADDR_NULL);
f67539c2
TL
708
709 laddr_le_t() = default;
710 laddr_le_t(const laddr_le_t &) = default;
711 explicit laddr_le_t(const laddr_t &addr)
20effc67 712 : laddr(ceph_le64(addr)) {}
f67539c2
TL
713
714 operator laddr_t() const {
715 return laddr_t(laddr);
716 }
717 laddr_le_t& operator=(laddr_t addr) {
718 ceph_le64 val;
719 val = addr;
720 laddr = val;
721 return *this;
722 }
723};
724
725// logical offset, see LBAManager, TransactionManager
726using extent_len_t = uint32_t;
727constexpr extent_len_t EXTENT_LEN_MAX =
728 std::numeric_limits<extent_len_t>::max();
729
730using extent_len_le_t = ceph_le32;
20effc67
TL
731inline extent_len_le_t init_extent_len_le(extent_len_t len) {
732 return ceph_le32(len);
f67539c2
TL
733}
734
735struct laddr_list_t : std::list<std::pair<laddr_t, extent_len_t>> {
736 template <typename... T>
737 laddr_list_t(T&&... args)
738 : std::list<std::pair<laddr_t, extent_len_t>>(std::forward<T>(args)...) {}
739};
740struct paddr_list_t : std::list<std::pair<paddr_t, extent_len_t>> {
741 template <typename... T>
742 paddr_list_t(T&&... args)
743 : std::list<std::pair<paddr_t, extent_len_t>>(std::forward<T>(args)...) {}
744};
745
746std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs);
747std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs);
748
749/* identifies type of extent, used for interpretting deltas, managing
750 * writeback.
751 *
752 * Note that any new extent type needs to be added to
753 * Cache::get_extent_by_type in cache.cc
754 */
755enum class extent_types_t : uint8_t {
756 ROOT = 0,
757 LADDR_INTERNAL = 1,
758 LADDR_LEAF = 2,
20effc67
TL
759 OMAP_INNER = 3,
760 OMAP_LEAF = 4,
761 ONODE_BLOCK_STAGED = 5,
762 COLL_BLOCK = 6,
763 OBJECT_DATA_BLOCK = 7,
764 RETIRED_PLACEHOLDER = 8,
765 RBM_ALLOC_INFO = 9,
f67539c2 766 // Test Block Types
20effc67
TL
767 TEST_BLOCK = 10,
768 TEST_BLOCK_PHYSICAL = 11,
769 // None and the number of valid extent_types_t
770 NONE = 12,
f67539c2 771};
20effc67 772constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE);
f67539c2 773
20effc67 774constexpr bool is_logical_type(extent_types_t type) {
f67539c2
TL
775 switch (type) {
776 case extent_types_t::ROOT:
777 case extent_types_t::LADDR_INTERNAL:
778 case extent_types_t::LADDR_LEAF:
779 return false;
780 default:
781 return true;
782 }
783}
784
20effc67
TL
785constexpr bool is_lba_node(extent_types_t type)
786{
787 return type == extent_types_t::LADDR_INTERNAL ||
788 type == extent_types_t::LADDR_LEAF;
789}
790
f67539c2
TL
791std::ostream &operator<<(std::ostream &out, extent_types_t t);
792
793/* description of a new physical extent */
794struct extent_t {
795 extent_types_t type; ///< type of extent
796 laddr_t addr; ///< laddr of extent (L_ADDR_NULL for non-logical)
797 ceph::bufferlist bl; ///< payload, bl.length() == length, aligned
798};
799
800using extent_version_t = uint32_t;
801constexpr extent_version_t EXTENT_VERSION_NULL = 0;
802
803/* description of a mutation to a physical extent */
804struct delta_info_t {
805 extent_types_t type = extent_types_t::NONE; ///< delta type
806 paddr_t paddr; ///< physical address
807 laddr_t laddr = L_ADDR_NULL; ///< logical address
808 uint32_t prev_crc = 0;
809 uint32_t final_crc = 0;
810 segment_off_t length = NULL_SEG_OFF; ///< extent length
811 extent_version_t pversion; ///< prior version
812 ceph::bufferlist bl; ///< payload
813
814 DENC(delta_info_t, v, p) {
815 DENC_START(1, 1, p);
816 denc(v.type, p);
817 denc(v.paddr, p);
818 denc(v.laddr, p);
819 denc(v.prev_crc, p);
820 denc(v.final_crc, p);
821 denc(v.length, p);
822 denc(v.pversion, p);
823 denc(v.bl, p);
824 DENC_FINISH(p);
825 }
826
827 bool operator==(const delta_info_t &rhs) const {
828 return (
829 type == rhs.type &&
830 paddr == rhs.paddr &&
831 laddr == rhs.laddr &&
832 prev_crc == rhs.prev_crc &&
833 final_crc == rhs.final_crc &&
834 length == rhs.length &&
835 pversion == rhs.pversion &&
836 bl == rhs.bl
837 );
838 }
839
840 friend std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs);
841};
842
843std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs);
844
20effc67
TL
845class object_data_t {
846 laddr_t reserved_data_base = L_ADDR_NULL;
847 extent_len_t reserved_data_len = 0;
848
849 bool dirty = false;
850public:
851 object_data_t(
852 laddr_t reserved_data_base,
853 extent_len_t reserved_data_len)
854 : reserved_data_base(reserved_data_base),
855 reserved_data_len(reserved_data_len) {}
856
857 laddr_t get_reserved_data_base() const {
858 return reserved_data_base;
859 }
860
861 extent_len_t get_reserved_data_len() const {
862 return reserved_data_len;
863 }
864
865 bool is_null() const {
866 return reserved_data_base == L_ADDR_NULL;
867 }
868
869 bool must_update() const {
870 return dirty;
871 }
872
873 void update_reserved(
874 laddr_t base,
875 extent_len_t len) {
876 dirty = true;
877 reserved_data_base = base;
878 reserved_data_len = len;
879 }
880
881 void update_len(
882 extent_len_t len) {
883 dirty = true;
884 reserved_data_len = len;
885 }
886
887 void clear() {
888 dirty = true;
889 reserved_data_base = L_ADDR_NULL;
890 reserved_data_len = 0;
891 }
892};
893
894struct __attribute__((packed)) object_data_le_t {
895 laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL);
896 extent_len_le_t reserved_data_len = init_extent_len_le(0);
897
898 void update(const object_data_t &nroot) {
899 reserved_data_base = nroot.get_reserved_data_base();
900 reserved_data_len = init_extent_len_le(nroot.get_reserved_data_len());
901 }
902
903 object_data_t get() const {
904 return object_data_t(
905 reserved_data_base,
906 reserved_data_len);
907 }
908};
909
910struct omap_root_t {
911 laddr_t addr = L_ADDR_NULL;
912 depth_t depth = 0;
913 laddr_t hint = L_ADDR_MIN;
914 bool mutated = false;
915
916 omap_root_t() = default;
917 omap_root_t(laddr_t addr, depth_t depth, laddr_t addr_min)
918 : addr(addr),
919 depth(depth),
920 hint(addr_min) {}
921
922 omap_root_t(const omap_root_t &o) = default;
923 omap_root_t(omap_root_t &&o) = default;
924 omap_root_t &operator=(const omap_root_t &o) = default;
925 omap_root_t &operator=(omap_root_t &&o) = default;
926
927 bool is_null() const {
928 return addr == L_ADDR_NULL;
929 }
930
931 bool must_update() const {
932 return mutated;
933 }
934
935 void update(laddr_t _addr, depth_t _depth, laddr_t _hint) {
936 mutated = true;
937 addr = _addr;
938 depth = _depth;
939 hint = _hint;
940 }
941
942 laddr_t get_location() const {
943 return addr;
944 }
945
946 depth_t get_depth() const {
947 return depth;
948 }
949
950 laddr_t get_hint() const {
951 return hint;
952 }
953};
954
955class __attribute__((packed)) omap_root_le_t {
956 laddr_le_t addr = laddr_le_t(L_ADDR_NULL);
957 depth_le_t depth = init_depth_le(0);
958
959public:
960 omap_root_le_t() = default;
961
962 omap_root_le_t(laddr_t addr, depth_t depth)
963 : addr(addr), depth(init_depth_le(depth)) {}
964
965 omap_root_le_t(const omap_root_le_t &o) = default;
966 omap_root_le_t(omap_root_le_t &&o) = default;
967 omap_root_le_t &operator=(const omap_root_le_t &o) = default;
968 omap_root_le_t &operator=(omap_root_le_t &&o) = default;
969
970 void update(const omap_root_t &nroot) {
971 addr = nroot.get_location();
972 depth = init_depth_le(nroot.get_depth());
973 }
974
975 omap_root_t get(laddr_t hint) const {
976 return omap_root_t(addr, depth, hint);
977 }
978};
979
980/**
981 * lba_root_t
982 */
983class __attribute__((packed)) lba_root_t {
984 paddr_le_t root_addr;
985 depth_le_t depth = init_extent_len_le(0);
986
987public:
988 lba_root_t() = default;
989
990 lba_root_t(paddr_t addr, depth_t depth)
991 : root_addr(addr), depth(init_depth_le(depth)) {}
992
993 lba_root_t(const lba_root_t &o) = default;
994 lba_root_t(lba_root_t &&o) = default;
995 lba_root_t &operator=(const lba_root_t &o) = default;
996 lba_root_t &operator=(lba_root_t &&o) = default;
997
998 paddr_t get_location() const {
999 return root_addr;
1000 }
1001
1002 void set_location(paddr_t location) {
1003 root_addr = location;
1004 }
1005
1006 depth_t get_depth() const {
1007 return depth;
1008 }
1009
1010 void set_depth(depth_t ndepth) {
1011 depth = ndepth;
1012 }
1013
1014 void adjust_addrs_from_base(paddr_t base) {
1015 paddr_t _root_addr = root_addr;
1016 if (_root_addr.is_relative()) {
1017 root_addr = base.add_record_relative(_root_addr);
1018 }
1019 }
1020};
1021
1022class coll_root_t {
1023 laddr_t addr = L_ADDR_NULL;
1024 extent_len_t size = 0;
1025
1026 bool mutated = false;
1027
1028public:
1029 coll_root_t() = default;
1030 coll_root_t(laddr_t addr, extent_len_t size) : addr(addr), size(size) {}
1031
1032 coll_root_t(const coll_root_t &o) = default;
1033 coll_root_t(coll_root_t &&o) = default;
1034 coll_root_t &operator=(const coll_root_t &o) = default;
1035 coll_root_t &operator=(coll_root_t &&o) = default;
1036
1037 bool must_update() const {
1038 return mutated;
1039 }
1040
1041 void update(laddr_t _addr, extent_len_t _s) {
1042 mutated = true;
1043 addr = _addr;
1044 size = _s;
1045 }
1046
1047 laddr_t get_location() const {
1048 return addr;
1049 }
1050
1051 extent_len_t get_size() const {
1052 return size;
1053 }
1054};
1055
1056/**
1057 * coll_root_le_t
1058 *
1059 * Information for locating CollectionManager information, to be embedded
1060 * in root block.
1061 */
1062class __attribute__((packed)) coll_root_le_t {
1063 laddr_le_t addr;
1064 extent_len_le_t size = init_extent_len_le(0);
1065
1066public:
1067 coll_root_le_t() = default;
1068
1069 coll_root_le_t(laddr_t laddr, segment_off_t size)
1070 : addr(laddr), size(init_extent_len_le(size)) {}
1071
1072
1073 coll_root_le_t(const coll_root_le_t &o) = default;
1074 coll_root_le_t(coll_root_le_t &&o) = default;
1075 coll_root_le_t &operator=(const coll_root_le_t &o) = default;
1076 coll_root_le_t &operator=(coll_root_le_t &&o) = default;
1077
1078 void update(const coll_root_t &nroot) {
1079 addr = nroot.get_location();
1080 size = init_extent_len_le(nroot.get_size());
1081 }
1082
1083 coll_root_t get() const {
1084 return coll_root_t(addr, size);
1085 }
1086};
1087
1088
1089/**
1090 * root_t
1091 *
1092 * Contains information required to find metadata roots.
1093 * TODO: generalize this to permit more than one lba_manager implementation
1094 */
1095struct __attribute__((packed)) root_t {
1096 using meta_t = std::map<std::string, std::string>;
1097
1098 static constexpr int MAX_META_LENGTH = 1024;
1099
1100 lba_root_t lba_root;
1101 laddr_le_t onode_root;
1102 coll_root_le_t collection_root;
1103
1104 char meta[MAX_META_LENGTH];
1105
1106 root_t() {
1107 set_meta(meta_t{});
1108 }
1109
1110 void adjust_addrs_from_base(paddr_t base) {
1111 lba_root.adjust_addrs_from_base(base);
1112 }
1113
1114 meta_t get_meta() {
1115 bufferlist bl;
1116 bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta));
1117 meta_t ret;
1118 auto iter = bl.cbegin();
1119 decode(ret, iter);
1120 return ret;
1121 }
1122
1123 void set_meta(const meta_t &m) {
1124 ceph::bufferlist bl;
1125 encode(m, bl);
1126 ceph_assert(bl.length() < MAX_META_LENGTH);
1127 bl.rebuild();
1128 auto &bptr = bl.front();
1129 ::memset(meta, 0, MAX_META_LENGTH);
1130 ::memcpy(meta, bptr.c_str(), bl.length());
1131 }
1132};
1133
1134using blk_id_t = uint64_t;
1135constexpr blk_id_t NULL_BLK_ID =
1136 std::numeric_limits<blk_id_t>::max();
1137
1138// use absolute address
1139using blk_paddr_t = uint64_t;
1140struct rbm_alloc_delta_t {
1141 enum class op_types_t : uint8_t {
1142 NONE = 0,
1143 SET = 1,
1144 CLEAR = 2
1145 };
1146 std::vector<std::pair<paddr_t, size_t>> alloc_blk_ranges;
1147 op_types_t op = op_types_t::NONE;
1148
1149 rbm_alloc_delta_t() = default;
1150
1151 DENC(rbm_alloc_delta_t, v, p) {
1152 DENC_START(1, 1, p);
1153 denc(v.alloc_blk_ranges, p);
1154 denc(v.op, p);
1155 DENC_FINISH(p);
1156 }
1157};
1158
1159paddr_t convert_blk_paddr_to_paddr(blk_paddr_t addr, size_t block_size,
1160 uint32_t blocks_per_segment, device_id_t d_id);
1161blk_paddr_t convert_paddr_to_blk_paddr(paddr_t addr, size_t block_size,
1162 uint32_t blocks_per_segment);
1163
1164struct extent_info_t {
1165 extent_types_t type = extent_types_t::NONE;
1166 laddr_t addr = L_ADDR_NULL;
1167 extent_len_t len = 0;
1168
1169 extent_info_t() = default;
1170 extent_info_t(const extent_t &et)
1171 : type(et.type), addr(et.addr), len(et.bl.length()) {}
1172
1173 DENC(extent_info_t, v, p) {
1174 DENC_START(1, 1, p);
1175 denc(v.type, p);
1176 denc(v.addr, p);
1177 denc(v.len, p);
1178 DENC_FINISH(p);
1179 }
1180};
1181std::ostream &operator<<(std::ostream &out, const extent_info_t &header);
1182
1183using segment_nonce_t = uint32_t;
1184
1185/**
1186 * Segment header
1187 *
1188 * Every segment contains and encode segment_header_t in the first block.
1189 * Our strategy for finding the journal replay point is:
1190 * 1) Find the segment with the highest journal_segment_seq
1191 * 2) Replay starting at record located at that segment's journal_tail
1192 */
1193struct segment_header_t {
1194 segment_seq_t journal_segment_seq;
1195 segment_id_t physical_segment_id; // debugging
1196
1197 journal_seq_t journal_tail;
1198 segment_nonce_t segment_nonce;
1199 bool out_of_line;
1200
1201 DENC(segment_header_t, v, p) {
1202 DENC_START(1, 1, p);
1203 denc(v.journal_segment_seq, p);
1204 denc(v.physical_segment_id, p);
1205 denc(v.journal_tail, p);
1206 denc(v.segment_nonce, p);
1207 denc(v.out_of_line, p);
1208 DENC_FINISH(p);
1209 }
1210};
1211std::ostream &operator<<(std::ostream &out, const segment_header_t &header);
1212
1213struct record_size_t {
1214 extent_len_t plain_mdlength = 0; // mdlength without the record header
1215 extent_len_t dlength = 0;
1216
1217 extent_len_t get_raw_mdlength() const;
1218
1219 bool is_empty() const {
1220 return plain_mdlength == 0 &&
1221 dlength == 0;
1222 }
1223
1224 void account_extent(extent_len_t extent_len);
1225
1226 void account(const extent_t& extent) {
1227 account_extent(extent.bl.length());
1228 }
1229
1230 void account(const delta_info_t& delta);
1231};
1232WRITE_EQ_OPERATORS_2(record_size_t, plain_mdlength, dlength);
1233
f67539c2
TL
1234struct record_t {
1235 std::vector<extent_t> extents;
1236 std::vector<delta_info_t> deltas;
20effc67
TL
1237 record_size_t size;
1238
1239 record_t() = default;
1240 record_t(std::vector<extent_t>&& _extents,
1241 std::vector<delta_info_t>&& _deltas) {
1242 for (auto& e: _extents) {
1243 push_back(std::move(e));
1244 }
1245 for (auto& d: _deltas) {
1246 push_back(std::move(d));
1247 }
1248 }
1249
1250 bool is_empty() const {
1251 return extents.size() == 0 &&
1252 deltas.size() == 0;
1253 }
1254
1255 std::size_t get_delta_size() const {
1256 auto delta_size = std::accumulate(
1257 deltas.begin(), deltas.end(), 0,
1258 [](uint64_t sum, auto& delta) {
1259 return sum + delta.bl.length();
1260 }
1261 );
1262 return delta_size;
1263 }
1264
1265 void push_back(extent_t&& extent) {
1266 size.account(extent);
1267 extents.push_back(std::move(extent));
1268 }
1269
1270 void push_back(delta_info_t&& delta) {
1271 size.account(delta);
1272 deltas.push_back(std::move(delta));
1273 }
f67539c2
TL
1274};
1275
20effc67
TL
1276struct record_header_t {
1277 uint32_t deltas; // number of deltas
1278 uint32_t extents; // number of extents
1279
1280
1281 DENC(record_header_t, v, p) {
1282 DENC_START(1, 1, p);
1283 denc(v.deltas, p);
1284 denc(v.extents, p);
1285 DENC_FINISH(p);
1286 }
1287};
1288
1289struct record_group_header_t {
1290 uint32_t records;
1291 extent_len_t mdlength; // block aligned, length of metadata
1292 extent_len_t dlength; // block aligned, length of data
1293 segment_nonce_t segment_nonce;// nonce of containing segment
1294 journal_seq_t committed_to; // records prior to committed_to have been
1295 // fully written, maybe in another segment.
1296 checksum_t data_crc; // crc of data payload
1297
1298
1299 DENC(record_group_header_t, v, p) {
1300 DENC_START(1, 1, p);
1301 denc(v.records, p);
1302 denc(v.mdlength, p);
1303 denc(v.dlength, p);
1304 denc(v.segment_nonce, p);
1305 denc(v.committed_to, p);
1306 denc(v.data_crc, p);
1307 DENC_FINISH(p);
1308 }
1309};
1310
1311struct record_group_size_t {
1312 extent_len_t plain_mdlength = 0; // mdlength without the group header
1313 extent_len_t dlength = 0;
1314 extent_len_t block_size = 0;
1315
1316 record_group_size_t() = default;
1317 record_group_size_t(
1318 const record_size_t& rsize,
1319 extent_len_t block_size) {
1320 account(rsize, block_size);
1321 }
1322
1323 extent_len_t get_raw_mdlength() const;
1324
1325 extent_len_t get_mdlength() const {
1326 assert(block_size > 0);
1327 return p2roundup(get_raw_mdlength(), block_size);
1328 }
1329
1330 extent_len_t get_encoded_length() const {
1331 assert(block_size > 0);
1332 assert(dlength % block_size == 0);
1333 return get_mdlength() + dlength;
1334 }
1335
1336 record_group_size_t get_encoded_length_after(
1337 const record_size_t& rsize,
1338 extent_len_t block_size) const {
1339 record_group_size_t tmp = *this;
1340 tmp.account(rsize, block_size);
1341 return tmp;
1342 }
1343
1344 double get_fullness() const {
1345 assert(block_size > 0);
1346 return ((double)(get_raw_mdlength() + dlength) /
1347 get_encoded_length());
1348 }
1349
1350 void account(const record_size_t& rsize,
1351 extent_len_t block_size);
1352};
1353WRITE_EQ_OPERATORS_3(record_group_size_t, plain_mdlength, dlength, block_size);
1354
1355struct record_group_t {
1356 std::vector<record_t> records;
1357 record_group_size_t size;
1358 extent_len_t current_dlength = 0;
1359
1360 record_group_t() = default;
1361 record_group_t(
1362 record_t&& record,
1363 extent_len_t block_size) {
1364 push_back(std::move(record), block_size);
1365 }
1366
1367 std::size_t get_size() const {
1368 return records.size();
1369 }
1370
1371 void push_back(
1372 record_t&& record,
1373 extent_len_t block_size) {
1374 size.account(record.size, block_size);
1375 current_dlength += record.size.dlength;
1376 records.push_back(std::move(record));
1377 assert(size.get_encoded_length() < MAX_SEG_OFF);
1378 }
1379
1380 void reserve(std::size_t limit) {
1381 records.reserve(limit);
1382 }
1383
1384 void clear() {
1385 records.clear();
1386 size = {};
1387 current_dlength = 0;
1388 }
1389};
1390
1391ceph::bufferlist encode_record(
1392 record_t&& record,
1393 extent_len_t block_size,
1394 const journal_seq_t& committed_to,
1395 segment_nonce_t current_segment_nonce);
1396
1397ceph::bufferlist encode_records(
1398 record_group_t& record_group,
1399 const journal_seq_t& committed_to,
1400 segment_nonce_t current_segment_nonce);
1401
1402std::optional<record_group_header_t>
1403try_decode_records_header(
1404 const ceph::bufferlist& header_bl,
1405 segment_nonce_t expected_nonce);
1406
1407bool validate_records_metadata(
1408 const ceph::bufferlist& md_bl);
1409
1410bool validate_records_data(
1411 const record_group_header_t& header,
1412 const ceph::bufferlist& data_bl);
1413
1414struct record_extent_infos_t {
1415 record_header_t header;
1416 std::vector<extent_info_t> extent_infos;
1417};
1418std::optional<std::vector<record_extent_infos_t> >
1419try_decode_extent_infos(
1420 const record_group_header_t& header,
1421 const ceph::bufferlist& md_bl);
1422
1423struct record_deltas_t {
1424 paddr_t record_block_base;
1425 std::vector<delta_info_t> deltas;
1426};
1427std::optional<std::vector<record_deltas_t> >
1428try_decode_deltas(
1429 const record_group_header_t& header,
1430 const ceph::bufferlist& md_bl,
1431 paddr_t record_block_base);
1432
1433struct write_result_t {
1434 journal_seq_t start_seq;
1435 segment_off_t length;
1436
1437 journal_seq_t get_end_seq() const {
1438 return start_seq.add_offset(length);
1439 }
1440};
1441
1442struct record_locator_t {
1443 paddr_t record_block_base;
1444 write_result_t write_result;
1445};
1446
1447/// scan segment for end incrementally
1448struct scan_valid_records_cursor {
1449 bool last_valid_header_found = false;
1450 journal_seq_t seq;
1451 journal_seq_t last_committed;
1452
1453 struct found_record_group_t {
1454 paddr_t offset;
1455 record_group_header_t header;
1456 bufferlist mdbuffer;
1457
1458 found_record_group_t(
1459 paddr_t offset,
1460 const record_group_header_t &header,
1461 const bufferlist &mdbuffer)
1462 : offset(offset), header(header), mdbuffer(mdbuffer) {}
1463 };
1464 std::deque<found_record_group_t> pending_record_groups;
1465
1466 bool is_complete() const {
1467 return last_valid_header_found && pending_record_groups.empty();
1468 }
1469
1470 segment_id_t get_segment_id() const {
1471 return seq.offset.as_seg_paddr().get_segment_id();
1472 }
1473
1474 segment_off_t get_segment_offset() const {
1475 return seq.offset.as_seg_paddr().get_segment_off();
1476 }
1477
1478 void increment(segment_off_t off) {
1479 auto& seg_addr = seq.offset.as_seg_paddr();
1480 seg_addr.set_segment_off(
1481 seg_addr.get_segment_off() + off);
1482 }
1483
1484 scan_valid_records_cursor(
1485 journal_seq_t seq)
1486 : seq(seq) {}
1487};
1488
1489inline const seg_paddr_t& paddr_t::as_seg_paddr() const {
1490 assert(get_addr_type() == addr_types_t::SEGMENT);
1491 return *static_cast<const seg_paddr_t*>(this);
1492}
1493
1494inline seg_paddr_t& paddr_t::as_seg_paddr() {
1495 assert(get_addr_type() == addr_types_t::SEGMENT);
1496 return *static_cast<seg_paddr_t*>(this);
1497}
1498
1499inline paddr_t paddr_t::operator-(paddr_t rhs) const {
1500 if (get_addr_type() == addr_types_t::SEGMENT) {
1501 auto& seg_addr = as_seg_paddr();
1502 return seg_addr - rhs;
1503 }
1504 ceph_assert(0 == "not supported type");
1505 return paddr_t{};
1506}
1507
1508#define PADDR_OPERATION(a_type, base, func) \
1509 if (get_addr_type() == a_type) { \
1510 return static_cast<const base*>(this)->func; \
1511 }
1512
1513inline paddr_t paddr_t::add_offset(int32_t o) const {
1514 PADDR_OPERATION(addr_types_t::SEGMENT, seg_paddr_t, add_offset(o))
1515 ceph_assert(0 == "not supported type");
1516 return paddr_t{};
1517}
1518
1519inline paddr_t paddr_t::add_relative(paddr_t o) const {
1520 PADDR_OPERATION(addr_types_t::SEGMENT, seg_paddr_t, add_relative(o))
1521 ceph_assert(0 == "not supported type");
1522 return paddr_t{};
1523}
1524
1525inline paddr_t paddr_t::add_block_relative(paddr_t o) const {
1526 PADDR_OPERATION(addr_types_t::SEGMENT, seg_paddr_t, add_block_relative(o))
1527 ceph_assert(0 == "not supported type");
1528 return paddr_t{};
1529}
1530
1531inline paddr_t paddr_t::add_record_relative(paddr_t o) const {
1532 PADDR_OPERATION(addr_types_t::SEGMENT, seg_paddr_t, add_record_relative(o))
1533 ceph_assert(0 == "not supported type");
1534 return paddr_t{};
1535}
1536
1537inline paddr_t paddr_t::maybe_relative_to(paddr_t o) const {
1538 PADDR_OPERATION(addr_types_t::SEGMENT, seg_paddr_t, maybe_relative_to(o))
1539 ceph_assert(0 == "not supported type");
1540 return paddr_t{};
1541}
1542
f67539c2
TL
1543}
1544
1545WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::seastore_meta_t)
20effc67 1546WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_id_t)
f67539c2
TL
1547WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::paddr_t)
1548WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_seq_t)
1549WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t)
20effc67
TL
1550WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t)
1551WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_group_header_t)
1552WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t)
1553WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t)
1554WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::rbm_alloc_delta_t)
1555
1556template<>
1557struct denc_traits<crimson::os::seastore::device_type_t> {
1558 static constexpr bool supported = true;
1559 static constexpr bool featured = false;
1560 static constexpr bool bounded = true;
1561 static constexpr bool need_contiguous = false;
1562
1563 static void bound_encode(
1564 const crimson::os::seastore::device_type_t &o,
1565 size_t& p,
1566 uint64_t f=0) {
1567 p += sizeof(crimson::os::seastore::device_type_t);
1568 }
1569 template<class It>
1570 static std::enable_if_t<!is_const_iterator_v<It>>
1571 encode(
1572 const crimson::os::seastore::device_type_t &o,
1573 It& p,
1574 uint64_t f=0) {
1575 get_pos_add<crimson::os::seastore::device_type_t>(p) = o;
1576 }
1577 template<class It>
1578 static std::enable_if_t<is_const_iterator_v<It>>
1579 decode(
1580 crimson::os::seastore::device_type_t& o,
1581 It& p,
1582 uint64_t f=0) {
1583 o = get_pos_add<crimson::os::seastore::device_type_t>(p);
1584 }
1585 static void decode(
1586 crimson::os::seastore::device_type_t& o,
1587 ceph::buffer::list::const_iterator &p) {
1588 p.copy(sizeof(crimson::os::seastore::device_type_t),
1589 reinterpret_cast<char*>(&o));
1590 }
1591};