]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/seastore_types.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / seastore_types.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "crimson/os/seastore/seastore_types.h"
20effc67
TL
5#include "crimson/common/log.h"
6
7namespace {
8
1e59de90
TL
9seastar::logger& journal_logger() {
10 return crimson::get_logger(ceph_subsys_seastore_journal);
20effc67
TL
11}
12
13}
f67539c2
TL
14
15namespace crimson::os::seastore {
16
1e59de90
TL
17bool is_aligned(uint64_t offset, uint64_t alignment)
18{
19 return (offset % alignment) == 0;
20}
21
22std::ostream& operator<<(std::ostream &out, const omap_root_t &root)
23{
24 return out << "omap_root{addr=" << root.addr
25 << ", depth=" << root.depth
26 << ", hint=" << root.hint
27 << ", mutated=" << root.mutated
28 << "}";
29}
30
20effc67
TL
31std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta)
32{
33 return out << meta.seastore_id;
34}
35
1e59de90 36std::ostream &operator<<(std::ostream &out, const device_id_printer_t &id)
f67539c2 37{
1e59de90
TL
38 auto _id = id.id;
39 if (_id == DEVICE_ID_NULL) {
40 return out << "Dev(NULL)";
41 } else if (_id == DEVICE_ID_RECORD_RELATIVE) {
42 return out << "Dev(RR)";
43 } else if (_id == DEVICE_ID_BLOCK_RELATIVE) {
44 return out << "Dev(BR)";
45 } else if (_id == DEVICE_ID_DELAYED) {
46 return out << "Dev(DELAYED)";
47 } else if (_id == DEVICE_ID_FAKE) {
48 return out << "Dev(FAKE)";
49 } else if (_id == DEVICE_ID_ZERO) {
50 return out << "Dev(ZERO)";
51 } else if (_id == DEVICE_ID_ROOT) {
52 return out << "Dev(ROOT)";
53 } else {
54 return out << "Dev(" << (unsigned)_id << ")";
55 }
56}
57
58std::ostream &operator<<(std::ostream &out, const segment_id_t &segment)
59{
60 if (segment == NULL_SEG_ID) {
61 return out << "Seg[NULL]";
62 } else {
63 return out << "Seg[" << device_id_printer_t{segment.device_id()}
64 << "," << segment.device_segment_id()
65 << "]";
66 }
f67539c2
TL
67}
68
1e59de90 69std::ostream& operator<<(std::ostream& out, segment_type_t t)
f67539c2 70{
1e59de90
TL
71 switch(t) {
72 case segment_type_t::JOURNAL:
73 return out << "JOURNAL";
74 case segment_type_t::OOL:
75 return out << "OOL";
76 case segment_type_t::NULL_SEG:
77 return out << "NULL_SEG";
78 default:
79 return out << "INVALID_SEGMENT_TYPE!";
80 }
f67539c2
TL
81}
82
1e59de90 83std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq)
20effc67 84{
1e59de90
TL
85 if (seq.seq == NULL_SEG_SEQ) {
86 return out << "sseq(NULL)";
87 } else {
88 return out << "sseq(" << seq.seq << ")";
89 }
20effc67
TL
90}
91
f67539c2
TL
92std::ostream &operator<<(std::ostream &out, const paddr_t &rhs)
93{
1e59de90
TL
94 auto id = rhs.get_device_id();
95 out << "paddr<";
20effc67 96 if (rhs == P_ADDR_NULL) {
1e59de90 97 out << "NULL";
20effc67 98 } else if (rhs == P_ADDR_MIN) {
1e59de90
TL
99 out << "MIN";
100 } else if (rhs == P_ADDR_ZERO) {
101 out << "ZERO";
102 } else if (has_device_off(id)) {
103 auto &s = rhs.as_res_paddr();
104 out << device_id_printer_t{id}
105 << ","
106 << s.get_device_off();
107 } else if (rhs.get_addr_type() == paddr_types_t::SEGMENT) {
108 auto &s = rhs.as_seg_paddr();
109 out << s.get_segment_id()
110 << ","
111 << s.get_segment_off();
112 } else if (rhs.get_addr_type() == paddr_types_t::RANDOM_BLOCK) {
113 auto &s = rhs.as_blk_paddr();
114 out << device_id_printer_t{s.get_device_id()}
115 << ","
116 << s.get_device_off();
20effc67 117 } else {
1e59de90 118 out << "INVALID!";
20effc67 119 }
f67539c2
TL
120 return out << ">";
121}
122
1e59de90
TL
123journal_seq_t journal_seq_t::add_offset(
124 journal_type_t type,
125 device_off_t off,
126 device_off_t roll_start,
127 device_off_t roll_size) const
128{
129 assert(offset.is_absolute());
130 assert(off <= DEVICE_OFF_MAX && off >= DEVICE_OFF_MIN);
131 assert(roll_start >= 0);
132 assert(roll_size > 0);
133
134 segment_seq_t jseq = segment_seq;
135 device_off_t joff;
136 if (type == journal_type_t::SEGMENTED) {
137 joff = offset.as_seg_paddr().get_segment_off();
138 } else {
139 assert(type == journal_type_t::RANDOM_BLOCK);
140 auto boff = offset.as_blk_paddr().get_device_off();
141 joff = boff;
142 }
143 auto roll_end = roll_start + roll_size;
144 assert(joff >= roll_start);
145 assert(joff <= roll_end);
146
147 if (off >= 0) {
148 device_off_t new_jseq = jseq + (off / roll_size);
149 joff += (off % roll_size);
150 if (joff >= roll_end) {
151 ++new_jseq;
152 joff -= roll_size;
153 }
154 assert(new_jseq < MAX_SEG_SEQ);
155 jseq = static_cast<segment_seq_t>(new_jseq);
156 } else {
157 device_off_t mod = (-off) / roll_size;
158 joff -= ((-off) % roll_size);
159 if (joff < roll_start) {
160 ++mod;
161 joff += roll_size;
162 }
163 if (jseq >= mod) {
164 jseq -= mod;
165 } else {
166 return JOURNAL_SEQ_MIN;
167 }
168 }
169 assert(joff >= roll_start);
170 assert(joff < roll_end);
171 return journal_seq_t{jseq, make_block_relative_paddr(joff)};
172}
173
174device_off_t journal_seq_t::relative_to(
175 journal_type_t type,
176 const journal_seq_t& r,
177 device_off_t roll_start,
178 device_off_t roll_size) const
179{
180 assert(offset.is_absolute());
181 assert(r.offset.is_absolute());
182 assert(roll_start >= 0);
183 assert(roll_size > 0);
184
185 device_off_t ret = static_cast<device_off_t>(segment_seq) - r.segment_seq;
186 ret *= roll_size;
187 if (type == journal_type_t::SEGMENTED) {
188 ret += (static_cast<device_off_t>(offset.as_seg_paddr().get_segment_off()) -
189 static_cast<device_off_t>(r.offset.as_seg_paddr().get_segment_off()));
190 } else {
191 assert(type == journal_type_t::RANDOM_BLOCK);
192 ret += offset.as_blk_paddr().get_device_off() -
193 r.offset.as_blk_paddr().get_device_off();
194 }
195 assert(ret <= DEVICE_OFF_MAX && ret >= DEVICE_OFF_MIN);
196 return ret;
197}
198
f67539c2
TL
199std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq)
200{
1e59de90
TL
201 if (seq == JOURNAL_SEQ_NULL) {
202 return out << "JOURNAL_SEQ_NULL";
203 } else if (seq == JOURNAL_SEQ_MIN) {
204 return out << "JOURNAL_SEQ_MIN";
205 } else {
206 return out << "jseq("
207 << segment_seq_printer_t{seq.segment_seq}
208 << ", " << seq.offset
209 << ")";
210 }
f67539c2
TL
211}
212
213std::ostream &operator<<(std::ostream &out, extent_types_t t)
214{
215 switch (t) {
216 case extent_types_t::ROOT:
217 return out << "ROOT";
218 case extent_types_t::LADDR_INTERNAL:
219 return out << "LADDR_INTERNAL";
220 case extent_types_t::LADDR_LEAF:
221 return out << "LADDR_LEAF";
1e59de90
TL
222 case extent_types_t::DINK_LADDR_LEAF:
223 return out << "LADDR_LEAF";
f67539c2
TL
224 case extent_types_t::ONODE_BLOCK_STAGED:
225 return out << "ONODE_BLOCK_STAGED";
20effc67
TL
226 case extent_types_t::OMAP_INNER:
227 return out << "OMAP_INNER";
228 case extent_types_t::OMAP_LEAF:
229 return out << "OMAP_LEAF";
230 case extent_types_t::COLL_BLOCK:
231 return out << "COLL_BLOCK";
232 case extent_types_t::OBJECT_DATA_BLOCK:
233 return out << "OBJECT_DATA_BLOCK";
234 case extent_types_t::RETIRED_PLACEHOLDER:
235 return out << "RETIRED_PLACEHOLDER";
f67539c2
TL
236 case extent_types_t::TEST_BLOCK:
237 return out << "TEST_BLOCK";
238 case extent_types_t::TEST_BLOCK_PHYSICAL:
239 return out << "TEST_BLOCK_PHYSICAL";
1e59de90
TL
240 case extent_types_t::BACKREF_INTERNAL:
241 return out << "BACKREF_INTERNAL";
242 case extent_types_t::BACKREF_LEAF:
243 return out << "BACKREF_LEAF";
f67539c2
TL
244 case extent_types_t::NONE:
245 return out << "NONE";
246 default:
247 return out << "UNKNOWN";
248 }
249}
250
1e59de90
TL
251std::ostream &operator<<(std::ostream &out, rewrite_gen_printer_t gen)
252{
253 if (gen.gen == NULL_GENERATION) {
254 return out << "GEN_NULL";
255 } else if (gen.gen == INIT_GENERATION) {
256 return out << "GEN_INIT";
257 } else if (gen.gen == INLINE_GENERATION) {
258 return out << "GEN_INL";
259 } else if (gen.gen == OOL_GENERATION) {
260 return out << "GEN_OOL";
261 } else if (gen.gen > REWRITE_GENERATIONS) {
262 return out << "GEN_INVALID(" << (unsigned)gen.gen << ")!";
263 } else {
264 return out << "GEN(" << (unsigned)gen.gen << ")";
265 }
266}
267
268std::ostream &operator<<(std::ostream &out, data_category_t c)
269{
270 switch (c) {
271 case data_category_t::METADATA:
272 return out << "MD";
273 case data_category_t::DATA:
274 return out << "DATA";
275 default:
276 return out << "INVALID_CATEGORY!";
277 }
278}
279
280std::ostream &operator<<(std::ostream &out, sea_time_point_printer_t tp)
281{
282 if (tp.tp == NULL_TIME) {
283 return out << "tp(NULL)";
284 }
285 auto time = seastar::lowres_system_clock::to_time_t(tp.tp);
286 char buf[32];
287 std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&time));
288 return out << "tp(" << buf << ")";
289}
290
291std::ostream &operator<<(std::ostream &out, mod_time_point_printer_t tp) {
292 auto time = mod_to_timepoint(tp.tp);
293 return out << "mod_" << sea_time_point_printer_t{time};
294}
295
f67539c2
TL
296std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs)
297{
298 bool first = false;
299 for (auto &i: rhs) {
300 out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')';
301 first = true;
302 }
303 return out << ']';
304}
305std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs)
306{
307 bool first = false;
308 for (auto &i: rhs) {
309 out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')';
310 first = true;
311 }
312 return out << ']';
313}
314
1e59de90 315std::ostream &operator<<(std::ostream &out, const delta_info_t &delta)
f67539c2 316{
1e59de90
TL
317 return out << "delta_info_t("
318 << "type: " << delta.type
319 << ", paddr: " << delta.paddr
320 << ", laddr: " << delta.laddr
321 << ", prev_crc: " << delta.prev_crc
322 << ", final_crc: " << delta.final_crc
323 << ", length: " << delta.length
324 << ", pversion: " << delta.pversion
325 << ", ext_seq: " << delta.ext_seq
326 << ", seg_type: " << delta.seg_type
f67539c2
TL
327 << ")";
328}
329
1e59de90
TL
330std::ostream &operator<<(std::ostream &out, const journal_tail_delta_t &delta)
331{
332 return out << "journal_tail_delta_t("
333 << "alloc_tail=" << delta.alloc_tail
334 << ", dirty_tail=" << delta.dirty_tail
335 << ")";
336}
337
20effc67
TL
338std::ostream &operator<<(std::ostream &out, const extent_info_t &info)
339{
340 return out << "extent_info_t("
341 << "type: " << info.type
342 << ", addr: " << info.addr
343 << ", len: " << info.len
344 << ")";
345}
346
347std::ostream &operator<<(std::ostream &out, const segment_header_t &header)
348{
349 return out << "segment_header_t("
1e59de90
TL
350 << header.physical_segment_id
351 << " " << header.type
352 << " " << segment_seq_printer_t{header.segment_seq}
353 << " " << header.category
354 << " " << rewrite_gen_printer_t{header.generation}
355 << ", dirty_tail=" << header.dirty_tail
356 << ", alloc_tail=" << header.alloc_tail
357 << ", segment_nonce=" << header.segment_nonce
358 << ")";
359}
360
361std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail)
362{
363 return out << "segment_tail_t("
364 << tail.physical_segment_id
365 << " " << tail.type
366 << " " << segment_seq_printer_t{tail.segment_seq}
367 << ", segment_nonce=" << tail.segment_nonce
368 << ", modify_time=" << mod_time_point_printer_t{tail.modify_time}
369 << ", num_extents=" << tail.num_extents
370 << ")";
20effc67
TL
371}
372
373extent_len_t record_size_t::get_raw_mdlength() const
374{
375 // empty record is allowed to submit
376 return plain_mdlength +
377 ceph::encoded_sizeof_bounded<record_header_t>();
378}
379
380void record_size_t::account_extent(extent_len_t extent_len)
381{
382 assert(extent_len);
383 plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>();
384 dlength += extent_len;
385}
386
387void record_size_t::account(const delta_info_t& delta)
388{
389 assert(delta.bl.length());
390 plain_mdlength += ceph::encoded_sizeof(delta);
391}
392
1e59de90
TL
393std::ostream &operator<<(std::ostream &os, transaction_type_t type)
394{
395 switch (type) {
396 case transaction_type_t::MUTATE:
397 return os << "MUTATE";
398 case transaction_type_t::READ:
399 return os << "READ";
400 case transaction_type_t::TRIM_DIRTY:
401 return os << "TRIM_DIRTY";
402 case transaction_type_t::TRIM_ALLOC:
403 return os << "TRIM_ALLOC";
404 case transaction_type_t::CLEANER_MAIN:
405 return os << "CLEANER_MAIN";
406 case transaction_type_t::CLEANER_COLD:
407 return os << "CLEANER_COLD";
408 case transaction_type_t::MAX:
409 return os << "TRANS_TYPE_NULL";
410 default:
411 return os << "INVALID_TRANS_TYPE("
412 << static_cast<std::size_t>(type)
413 << ")";
414 }
415}
416
417std::ostream &operator<<(std::ostream& out, const record_size_t& rsize)
418{
419 return out << "record_size_t("
420 << "raw_md=" << rsize.get_raw_mdlength()
421 << ", data=" << rsize.dlength
422 << ")";
423}
424
425std::ostream &operator<<(std::ostream& out, const record_t& r)
426{
427 return out << "record_t("
428 << "type=" << r.type
429 << ", num_extents=" << r.extents.size()
430 << ", num_deltas=" << r.deltas.size()
431 << ", modify_time=" << sea_time_point_printer_t{r.modify_time}
432 << ")";
433}
434
435std::ostream &operator<<(std::ostream& out, const record_header_t& r)
436{
437 return out << "record_header_t("
438 << "type=" << r.type
439 << ", num_extents=" << r.extents
440 << ", num_deltas=" << r.deltas
441 << ", modify_time=" << mod_time_point_printer_t{r.modify_time}
442 << ")";
443}
444
445std::ostream& operator<<(std::ostream& out, const record_group_header_t& h)
446{
447 return out << "record_group_header_t("
448 << "num_records=" << h.records
449 << ", mdlength=" << h.mdlength
450 << ", dlength=" << h.dlength
451 << ", nonce=" << h.segment_nonce
452 << ", committed_to=" << h.committed_to
453 << ", data_crc=" << h.data_crc
454 << ")";
455}
456
20effc67
TL
457extent_len_t record_group_size_t::get_raw_mdlength() const
458{
459 return plain_mdlength +
460 sizeof(checksum_t) +
461 ceph::encoded_sizeof_bounded<record_group_header_t>();
462}
463
464void record_group_size_t::account(
465 const record_size_t& rsize,
466 extent_len_t _block_size)
467{
468 // empty record is allowed to submit
469 assert(_block_size > 0);
470 assert(rsize.dlength % _block_size == 0);
471 assert(block_size == 0 || block_size == _block_size);
472 plain_mdlength += rsize.get_raw_mdlength();
473 dlength += rsize.dlength;
474 block_size = _block_size;
475}
476
1e59de90
TL
477std::ostream& operator<<(std::ostream& out, const record_group_size_t& size)
478{
479 return out << "record_group_size_t("
480 << "raw_md=" << size.get_raw_mdlength()
481 << ", data=" << size.dlength
482 << ", block_size=" << size.block_size
483 << ", fullness=" << size.get_fullness()
484 << ")";
485}
486
487std::ostream& operator<<(std::ostream& out, const record_group_t& rg)
488{
489 return out << "record_group_t("
490 << "num_records=" << rg.records.size()
491 << ", " << rg.size
492 << ")";
493}
494
20effc67
TL
495ceph::bufferlist encode_record(
496 record_t&& record,
497 extent_len_t block_size,
498 const journal_seq_t& committed_to,
499 segment_nonce_t current_segment_nonce)
500{
501 record_group_t record_group(std::move(record), block_size);
502 return encode_records(
503 record_group,
504 committed_to,
505 current_segment_nonce);
506}
507
508ceph::bufferlist encode_records(
509 record_group_t& record_group,
510 const journal_seq_t& committed_to,
511 segment_nonce_t current_segment_nonce)
512{
513 assert(record_group.size.block_size > 0);
514 assert(record_group.records.size() > 0);
515
516 bufferlist data_bl;
517 for (auto& r: record_group.records) {
518 for (auto& i: r.extents) {
519 assert(i.bl.length());
520 data_bl.append(i.bl);
521 }
522 }
523
524 bufferlist bl;
525 record_group_header_t header{
526 static_cast<extent_len_t>(record_group.records.size()),
527 record_group.size.get_mdlength(),
528 record_group.size.dlength,
529 current_segment_nonce,
530 committed_to,
531 data_bl.crc32c(-1)
532 };
533 encode(header, bl);
534
535 auto metadata_crc_filler = bl.append_hole(sizeof(checksum_t));
536
537 for (auto& r: record_group.records) {
538 record_header_t rheader{
1e59de90 539 r.type,
20effc67
TL
540 (extent_len_t)r.deltas.size(),
541 (extent_len_t)r.extents.size(),
1e59de90 542 timepoint_to_mod(r.modify_time)
20effc67
TL
543 };
544 encode(rheader, bl);
545 }
546 for (auto& r: record_group.records) {
547 for (const auto& i: r.extents) {
548 encode(extent_info_t(i), bl);
549 }
550 }
551 for (auto& r: record_group.records) {
552 for (const auto& i: r.deltas) {
553 encode(i, bl);
554 }
555 }
556 ceph_assert(bl.length() == record_group.size.get_raw_mdlength());
557
558 auto aligned_mdlength = record_group.size.get_mdlength();
559 if (bl.length() != aligned_mdlength) {
560 assert(bl.length() < aligned_mdlength);
561 bl.append_zero(aligned_mdlength - bl.length());
562 }
563
564 auto bliter = bl.cbegin();
565 auto metadata_crc = bliter.crc32c(
566 ceph::encoded_sizeof_bounded<record_group_header_t>(),
567 -1);
568 bliter += sizeof(checksum_t); /* metadata crc hole */
569 metadata_crc = bliter.crc32c(
570 bliter.get_remaining(),
571 metadata_crc);
572 ceph_le32 metadata_crc_le;
573 metadata_crc_le = metadata_crc;
574 metadata_crc_filler.copy_in(
575 sizeof(checksum_t),
576 reinterpret_cast<const char *>(&metadata_crc_le));
577
578 bl.claim_append(data_bl);
579 ceph_assert(bl.length() == record_group.size.get_encoded_length());
580
581 record_group.clear();
582 return bl;
583}
584
585std::optional<record_group_header_t>
586try_decode_records_header(
587 const ceph::bufferlist& header_bl,
588 segment_nonce_t expected_nonce)
589{
590 auto bp = header_bl.cbegin();
591 record_group_header_t header;
592 try {
593 decode(header, bp);
594 } catch (ceph::buffer::error &e) {
1e59de90 595 journal_logger().debug(
20effc67
TL
596 "try_decode_records_header: failed, "
597 "cannot decode record_group_header_t, got {}.",
1e59de90 598 e.what());
20effc67
TL
599 return std::nullopt;
600 }
601 if (header.segment_nonce != expected_nonce) {
1e59de90 602 journal_logger().debug(
20effc67
TL
603 "try_decode_records_header: failed, record_group_header nonce mismatch, "
604 "read {}, expected {}!",
605 header.segment_nonce,
606 expected_nonce);
607 return std::nullopt;
608 }
609 return header;
610}
611
612bool validate_records_metadata(
613 const ceph::bufferlist& md_bl)
614{
615 auto bliter = md_bl.cbegin();
616 auto test_crc = bliter.crc32c(
617 ceph::encoded_sizeof_bounded<record_group_header_t>(),
618 -1);
619 ceph_le32 recorded_crc_le;
620 decode(recorded_crc_le, bliter);
621 uint32_t recorded_crc = recorded_crc_le;
622 test_crc = bliter.crc32c(
623 bliter.get_remaining(),
624 test_crc);
625 bool success = (test_crc == recorded_crc);
626 if (!success) {
1e59de90
TL
627 journal_logger().debug(
628 "validate_records_metadata: failed, metadata crc mismatch.");
20effc67
TL
629 }
630 return success;
631}
632
633bool validate_records_data(
634 const record_group_header_t& header,
635 const ceph::bufferlist& data_bl)
636{
637 bool success = (data_bl.crc32c(-1) == header.data_crc);
638 if (!success) {
1e59de90
TL
639 journal_logger().debug(
640 "validate_records_data: failed, data crc mismatch!");
20effc67
TL
641 }
642 return success;
643}
644
20effc67
TL
645std::optional<std::vector<record_header_t>>
646try_decode_record_headers(
647 const record_group_header_t& header,
648 const ceph::bufferlist& md_bl)
649{
650 auto bliter = md_bl.cbegin();
651 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
652 bliter += sizeof(checksum_t); /* metadata crc hole */
653 std::vector<record_header_t> record_headers(header.records);
654 for (auto &&i: record_headers) {
655 try {
656 decode(i, bliter);
657 } catch (ceph::buffer::error &e) {
1e59de90 658 journal_logger().debug(
20effc67
TL
659 "try_decode_record_headers: failed, "
660 "cannot decode record_header_t, got {}.",
1e59de90 661 e.what());
20effc67
TL
662 return std::nullopt;
663 }
664 }
665 return record_headers;
666}
667
20effc67
TL
668std::optional<std::vector<record_extent_infos_t> >
669try_decode_extent_infos(
670 const record_group_header_t& header,
671 const ceph::bufferlist& md_bl)
672{
673 auto maybe_headers = try_decode_record_headers(header, md_bl);
674 if (!maybe_headers) {
20effc67
TL
675 return std::nullopt;
676 }
677
678 auto bliter = md_bl.cbegin();
679 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
680 bliter += sizeof(checksum_t); /* metadata crc hole */
681 bliter += (ceph::encoded_sizeof_bounded<record_header_t>() *
682 maybe_headers->size());
683
684 std::vector<record_extent_infos_t> record_extent_infos(
685 maybe_headers->size());
686 auto result_iter = record_extent_infos.begin();
687 for (auto& h: *maybe_headers) {
688 result_iter->header = h;
689 result_iter->extent_infos.resize(h.extents);
690 for (auto& i: result_iter->extent_infos) {
691 try {
692 decode(i, bliter);
693 } catch (ceph::buffer::error &e) {
1e59de90 694 journal_logger().debug(
20effc67
TL
695 "try_decode_extent_infos: failed, "
696 "cannot decode extent_info_t, got {}.",
1e59de90 697 e.what());
20effc67
TL
698 return std::nullopt;
699 }
700 }
701 ++result_iter;
702 }
703 return record_extent_infos;
704}
705
706std::optional<std::vector<record_deltas_t> >
707try_decode_deltas(
708 const record_group_header_t& header,
709 const ceph::bufferlist& md_bl,
710 paddr_t record_block_base)
711{
712 auto maybe_record_extent_infos = try_decode_extent_infos(header, md_bl);
713 if (!maybe_record_extent_infos) {
20effc67
TL
714 return std::nullopt;
715 }
716
717 auto bliter = md_bl.cbegin();
718 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
719 bliter += sizeof(checksum_t); /* metadata crc hole */
720 bliter += (ceph::encoded_sizeof_bounded<record_header_t>() *
721 maybe_record_extent_infos->size());
722 for (auto& r: *maybe_record_extent_infos) {
723 bliter += (ceph::encoded_sizeof_bounded<extent_info_t>() *
724 r.extent_infos.size());
725 }
726
727 std::vector<record_deltas_t> record_deltas(
728 maybe_record_extent_infos->size());
729 auto result_iter = record_deltas.begin();
730 for (auto& r: *maybe_record_extent_infos) {
731 result_iter->record_block_base = record_block_base;
732 result_iter->deltas.resize(r.header.deltas);
733 for (auto& i: result_iter->deltas) {
734 try {
1e59de90
TL
735 decode(i.second, bliter);
736 i.first = mod_to_timepoint(r.header.modify_time);
20effc67 737 } catch (ceph::buffer::error &e) {
1e59de90 738 journal_logger().debug(
20effc67
TL
739 "try_decode_deltas: failed, "
740 "cannot decode delta_info_t, got {}.",
1e59de90 741 e.what());
20effc67
TL
742 return std::nullopt;
743 }
744 }
745 for (auto& i: r.extent_infos) {
1e59de90 746 record_block_base = record_block_base.add_offset(i.len);
20effc67
TL
747 }
748 ++result_iter;
749 }
750 return record_deltas;
751}
752
1e59de90
TL
753std::ostream& operator<<(std::ostream& out, placement_hint_t h)
754{
755 switch (h) {
756 case placement_hint_t::HOT:
757 return out << "Hint(HOT)";
758 case placement_hint_t::COLD:
759 return out << "Hint(COLD)";
760 case placement_hint_t::REWRITE:
761 return out << "Hint(REWRITE)";
762 case PLACEMENT_HINT_NULL:
763 return out << "Hint(NULL)";
764 default:
765 return out << "INVALID_PLACEMENT_HINT_TYPE!";
766 }
767}
768
20effc67
TL
769bool can_delay_allocation(device_type_t type) {
770 // Some types of device may not support delayed allocation, for example PMEM.
1e59de90
TL
771 // All types of device currently support delayed allocation.
772 return true;
20effc67
TL
773}
774
775device_type_t string_to_device_type(std::string type) {
1e59de90
TL
776 if (type == "HDD") {
777 return device_type_t::HDD;
20effc67 778 }
1e59de90
TL
779 if (type == "SSD") {
780 return device_type_t::SSD;
20effc67 781 }
1e59de90
TL
782 if (type == "ZNS") {
783 return device_type_t::ZNS;
784 }
785 if (type == "RANDOM_BLOCK_SSD") {
786 return device_type_t::RANDOM_BLOCK_SSD;
20effc67
TL
787 }
788 return device_type_t::NONE;
789}
790
791std::ostream& operator<<(std::ostream& out, device_type_t t)
792{
793 switch (t) {
794 case device_type_t::NONE:
795 return out << "NONE";
1e59de90
TL
796 case device_type_t::HDD:
797 return out << "HDD";
798 case device_type_t::SSD:
799 return out << "SSD";
800 case device_type_t::ZNS:
801 return out << "ZNS";
802 case device_type_t::EPHEMERAL_COLD:
803 return out << "EPHEMERAL_COLD";
804 case device_type_t::EPHEMERAL_MAIN:
805 return out << "EPHEMERAL_MAIN";
806 case device_type_t::RANDOM_BLOCK_SSD:
807 return out << "RANDOM_BLOCK_SSD";
808 case device_type_t::RANDOM_BLOCK_EPHEMERAL:
809 return out << "RANDOM_BLOCK_EPHEMERAL";
20effc67
TL
810 default:
811 return out << "INVALID_DEVICE_TYPE!";
812 }
813}
814
1e59de90
TL
815std::ostream& operator<<(std::ostream& out, backend_type_t btype) {
816 if (btype == backend_type_t::SEGMENTED) {
817 return out << "SEGMENTED";
818 } else {
819 return out << "RANDOM_BLOCK";
820 }
821}
822
823std::ostream& operator<<(std::ostream& out, const write_result_t& w)
20effc67 824{
1e59de90
TL
825 return out << "write_result_t("
826 << "start=" << w.start_seq
827 << ", length=" << w.length
828 << ")";
20effc67
TL
829}
830
1e59de90 831std::ostream& operator<<(std::ostream& out, const record_locator_t& l)
20effc67 832{
1e59de90
TL
833 return out << "record_locator_t("
834 << "block_base=" << l.record_block_base
835 << ", " << l.write_result
836 << ")";
20effc67
TL
837}
838
1e59de90
TL
839void scan_valid_records_cursor::emplace_record_group(
840 const record_group_header_t& header, ceph::bufferlist&& md_bl)
841{
842 auto new_committed_to = header.committed_to;
843 ceph_assert(last_committed == JOURNAL_SEQ_NULL ||
844 last_committed <= new_committed_to);
845 last_committed = new_committed_to;
846 pending_record_groups.emplace_back(
847 seq.offset,
848 header,
849 std::move(md_bl));
850 increment_seq(header.dlength + header.mdlength);
851 ceph_assert(new_committed_to == JOURNAL_SEQ_NULL ||
852 new_committed_to < seq);
853}
854
855std::ostream& operator<<(std::ostream& out, const scan_valid_records_cursor& c)
856{
857 return out << "cursor(last_valid_header_found=" << c.last_valid_header_found
858 << ", seq=" << c.seq
859 << ", last_committed=" << c.last_committed
860 << ", pending_record_groups=" << c.pending_record_groups.size()
861 << ", num_consumed_records=" << c.num_consumed_records
862 << ")";
863}
20effc67 864
f67539c2 865}