]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/seastore_types.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / seastore_types.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "crimson/os/seastore/seastore_types.h"
20effc67
TL
5#include "crimson/common/log.h"
6
7namespace {
8
9seastar::logger& logger() {
10 return crimson::get_logger(ceph_subsys_seastore_tm);
11}
12
13}
f67539c2
TL
14
15namespace crimson::os::seastore {
16
20effc67
TL
17std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta)
18{
19 return out << meta.seastore_id;
20}
21
f67539c2
TL
22std::ostream &segment_to_stream(std::ostream &out, const segment_id_t &t)
23{
24 if (t == NULL_SEG_ID)
25 return out << "NULL_SEG";
f67539c2
TL
26 else if (t == FAKE_SEG_ID)
27 return out << "FAKE_SEG";
28 else
29 return out << t;
30}
31
32std::ostream &offset_to_stream(std::ostream &out, const segment_off_t &t)
33{
34 if (t == NULL_SEG_OFF)
35 return out << "NULL_OFF";
36 else
37 return out << t;
38}
39
20effc67
TL
40std::ostream &operator<<(std::ostream &out, const segment_id_t& segment)
41{
42 return out << "[" << (uint64_t)segment.device_id() << ","
43 << segment.device_segment_id() << "]";
44}
45
f67539c2
TL
46std::ostream &operator<<(std::ostream &out, const paddr_t &rhs)
47{
48 out << "paddr_t<";
20effc67
TL
49 if (rhs == P_ADDR_NULL) {
50 out << "NULL_PADDR";
51 } else if (rhs == P_ADDR_MIN) {
52 out << "MIN_PADDR";
53 } else if (rhs.is_block_relative()) {
54 out << "BLOCK_REG";
55 } else if (rhs.is_record_relative()) {
56 out << "RECORD_REG";
57 } else if (rhs.get_device_id() == DEVICE_ID_DELAYED) {
58 out << "DELAYED_TEMP";
59 } else if (rhs.get_addr_type() == addr_types_t::SEGMENT) {
60 const seg_paddr_t& s = rhs.as_seg_paddr();
61 segment_to_stream(out, s.get_segment_id());
62 out << ", ";
63 offset_to_stream(out, s.get_segment_off());
64 } else {
65 out << "INVALID";
66 }
f67539c2
TL
67 return out << ">";
68}
69
70std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq)
71{
72 return out << "journal_seq_t(segment_seq="
73 << seq.segment_seq << ", offset="
74 << seq.offset
75 << ")";
76}
77
78std::ostream &operator<<(std::ostream &out, extent_types_t t)
79{
80 switch (t) {
81 case extent_types_t::ROOT:
82 return out << "ROOT";
83 case extent_types_t::LADDR_INTERNAL:
84 return out << "LADDR_INTERNAL";
85 case extent_types_t::LADDR_LEAF:
86 return out << "LADDR_LEAF";
f67539c2
TL
87 case extent_types_t::ONODE_BLOCK_STAGED:
88 return out << "ONODE_BLOCK_STAGED";
20effc67
TL
89 case extent_types_t::OMAP_INNER:
90 return out << "OMAP_INNER";
91 case extent_types_t::OMAP_LEAF:
92 return out << "OMAP_LEAF";
93 case extent_types_t::COLL_BLOCK:
94 return out << "COLL_BLOCK";
95 case extent_types_t::OBJECT_DATA_BLOCK:
96 return out << "OBJECT_DATA_BLOCK";
97 case extent_types_t::RETIRED_PLACEHOLDER:
98 return out << "RETIRED_PLACEHOLDER";
f67539c2
TL
99 case extent_types_t::TEST_BLOCK:
100 return out << "TEST_BLOCK";
101 case extent_types_t::TEST_BLOCK_PHYSICAL:
102 return out << "TEST_BLOCK_PHYSICAL";
103 case extent_types_t::NONE:
104 return out << "NONE";
105 default:
106 return out << "UNKNOWN";
107 }
108}
109
110std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs)
111{
112 bool first = false;
113 for (auto &i: rhs) {
114 out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')';
115 first = true;
116 }
117 return out << ']';
118}
119std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs)
120{
121 bool first = false;
122 for (auto &i: rhs) {
123 out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')';
124 first = true;
125 }
126 return out << ']';
127}
128
129std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs)
130{
131 return lhs << "delta_info_t("
132 << "type: " << rhs.type
133 << ", paddr: " << rhs.paddr
134 << ", laddr: " << rhs.laddr
135 << ", prev_crc: " << rhs.prev_crc
136 << ", final_crc: " << rhs.final_crc
137 << ", length: " << rhs.length
138 << ", pversion: " << rhs.pversion
139 << ")";
140}
141
20effc67
TL
142std::ostream &operator<<(std::ostream &out, const extent_info_t &info)
143{
144 return out << "extent_info_t("
145 << "type: " << info.type
146 << ", addr: " << info.addr
147 << ", len: " << info.len
148 << ")";
149}
150
151std::ostream &operator<<(std::ostream &out, const segment_header_t &header)
152{
153 return out << "segment_header_t("
154 << "segment_seq=" << header.journal_segment_seq
155 << ", physical_segment_id=" << header.physical_segment_id
156 << ", journal_tail=" << header.journal_tail
157 << ", segment_nonce=" << header.segment_nonce
158 << ", out-of-line=" << header.out_of_line
159 << ")";
160}
161
162extent_len_t record_size_t::get_raw_mdlength() const
163{
164 // empty record is allowed to submit
165 return plain_mdlength +
166 ceph::encoded_sizeof_bounded<record_header_t>();
167}
168
169void record_size_t::account_extent(extent_len_t extent_len)
170{
171 assert(extent_len);
172 plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>();
173 dlength += extent_len;
174}
175
176void record_size_t::account(const delta_info_t& delta)
177{
178 assert(delta.bl.length());
179 plain_mdlength += ceph::encoded_sizeof(delta);
180}
181
182extent_len_t record_group_size_t::get_raw_mdlength() const
183{
184 return plain_mdlength +
185 sizeof(checksum_t) +
186 ceph::encoded_sizeof_bounded<record_group_header_t>();
187}
188
189void record_group_size_t::account(
190 const record_size_t& rsize,
191 extent_len_t _block_size)
192{
193 // empty record is allowed to submit
194 assert(_block_size > 0);
195 assert(rsize.dlength % _block_size == 0);
196 assert(block_size == 0 || block_size == _block_size);
197 plain_mdlength += rsize.get_raw_mdlength();
198 dlength += rsize.dlength;
199 block_size = _block_size;
200}
201
202ceph::bufferlist encode_record(
203 record_t&& record,
204 extent_len_t block_size,
205 const journal_seq_t& committed_to,
206 segment_nonce_t current_segment_nonce)
207{
208 record_group_t record_group(std::move(record), block_size);
209 return encode_records(
210 record_group,
211 committed_to,
212 current_segment_nonce);
213}
214
215ceph::bufferlist encode_records(
216 record_group_t& record_group,
217 const journal_seq_t& committed_to,
218 segment_nonce_t current_segment_nonce)
219{
220 assert(record_group.size.block_size > 0);
221 assert(record_group.records.size() > 0);
222
223 bufferlist data_bl;
224 for (auto& r: record_group.records) {
225 for (auto& i: r.extents) {
226 assert(i.bl.length());
227 data_bl.append(i.bl);
228 }
229 }
230
231 bufferlist bl;
232 record_group_header_t header{
233 static_cast<extent_len_t>(record_group.records.size()),
234 record_group.size.get_mdlength(),
235 record_group.size.dlength,
236 current_segment_nonce,
237 committed_to,
238 data_bl.crc32c(-1)
239 };
240 encode(header, bl);
241
242 auto metadata_crc_filler = bl.append_hole(sizeof(checksum_t));
243
244 for (auto& r: record_group.records) {
245 record_header_t rheader{
246 (extent_len_t)r.deltas.size(),
247 (extent_len_t)r.extents.size(),
248 };
249 encode(rheader, bl);
250 }
251 for (auto& r: record_group.records) {
252 for (const auto& i: r.extents) {
253 encode(extent_info_t(i), bl);
254 }
255 }
256 for (auto& r: record_group.records) {
257 for (const auto& i: r.deltas) {
258 encode(i, bl);
259 }
260 }
261 ceph_assert(bl.length() == record_group.size.get_raw_mdlength());
262
263 auto aligned_mdlength = record_group.size.get_mdlength();
264 if (bl.length() != aligned_mdlength) {
265 assert(bl.length() < aligned_mdlength);
266 bl.append_zero(aligned_mdlength - bl.length());
267 }
268
269 auto bliter = bl.cbegin();
270 auto metadata_crc = bliter.crc32c(
271 ceph::encoded_sizeof_bounded<record_group_header_t>(),
272 -1);
273 bliter += sizeof(checksum_t); /* metadata crc hole */
274 metadata_crc = bliter.crc32c(
275 bliter.get_remaining(),
276 metadata_crc);
277 ceph_le32 metadata_crc_le;
278 metadata_crc_le = metadata_crc;
279 metadata_crc_filler.copy_in(
280 sizeof(checksum_t),
281 reinterpret_cast<const char *>(&metadata_crc_le));
282
283 bl.claim_append(data_bl);
284 ceph_assert(bl.length() == record_group.size.get_encoded_length());
285
286 record_group.clear();
287 return bl;
288}
289
290std::optional<record_group_header_t>
291try_decode_records_header(
292 const ceph::bufferlist& header_bl,
293 segment_nonce_t expected_nonce)
294{
295 auto bp = header_bl.cbegin();
296 record_group_header_t header;
297 try {
298 decode(header, bp);
299 } catch (ceph::buffer::error &e) {
300 logger().debug(
301 "try_decode_records_header: failed, "
302 "cannot decode record_group_header_t, got {}.",
303 e);
304 return std::nullopt;
305 }
306 if (header.segment_nonce != expected_nonce) {
307 logger().debug(
308 "try_decode_records_header: failed, record_group_header nonce mismatch, "
309 "read {}, expected {}!",
310 header.segment_nonce,
311 expected_nonce);
312 return std::nullopt;
313 }
314 return header;
315}
316
317bool validate_records_metadata(
318 const ceph::bufferlist& md_bl)
319{
320 auto bliter = md_bl.cbegin();
321 auto test_crc = bliter.crc32c(
322 ceph::encoded_sizeof_bounded<record_group_header_t>(),
323 -1);
324 ceph_le32 recorded_crc_le;
325 decode(recorded_crc_le, bliter);
326 uint32_t recorded_crc = recorded_crc_le;
327 test_crc = bliter.crc32c(
328 bliter.get_remaining(),
329 test_crc);
330 bool success = (test_crc == recorded_crc);
331 if (!success) {
332 logger().debug("validate_records_metadata: failed, metadata crc mismatch.");
333 }
334 return success;
335}
336
337bool validate_records_data(
338 const record_group_header_t& header,
339 const ceph::bufferlist& data_bl)
340{
341 bool success = (data_bl.crc32c(-1) == header.data_crc);
342 if (!success) {
343 logger().debug("validate_records_data: failed, data crc mismatch!");
344 }
345 return success;
346}
347
348namespace {
349
350std::optional<std::vector<record_header_t>>
351try_decode_record_headers(
352 const record_group_header_t& header,
353 const ceph::bufferlist& md_bl)
354{
355 auto bliter = md_bl.cbegin();
356 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
357 bliter += sizeof(checksum_t); /* metadata crc hole */
358 std::vector<record_header_t> record_headers(header.records);
359 for (auto &&i: record_headers) {
360 try {
361 decode(i, bliter);
362 } catch (ceph::buffer::error &e) {
363 logger().debug(
364 "try_decode_record_headers: failed, "
365 "cannot decode record_header_t, got {}.",
366 e);
367 return std::nullopt;
368 }
369 }
370 return record_headers;
371}
372
373}
374
375std::optional<std::vector<record_extent_infos_t> >
376try_decode_extent_infos(
377 const record_group_header_t& header,
378 const ceph::bufferlist& md_bl)
379{
380 auto maybe_headers = try_decode_record_headers(header, md_bl);
381 if (!maybe_headers) {
382 logger().debug(
383 "try_decode_extent_infos: failed, cannot decode record headers.");
384 return std::nullopt;
385 }
386
387 auto bliter = md_bl.cbegin();
388 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
389 bliter += sizeof(checksum_t); /* metadata crc hole */
390 bliter += (ceph::encoded_sizeof_bounded<record_header_t>() *
391 maybe_headers->size());
392
393 std::vector<record_extent_infos_t> record_extent_infos(
394 maybe_headers->size());
395 auto result_iter = record_extent_infos.begin();
396 for (auto& h: *maybe_headers) {
397 result_iter->header = h;
398 result_iter->extent_infos.resize(h.extents);
399 for (auto& i: result_iter->extent_infos) {
400 try {
401 decode(i, bliter);
402 } catch (ceph::buffer::error &e) {
403 logger().debug(
404 "try_decode_extent_infos: failed, "
405 "cannot decode extent_info_t, got {}.",
406 e);
407 return std::nullopt;
408 }
409 }
410 ++result_iter;
411 }
412 return record_extent_infos;
413}
414
415std::optional<std::vector<record_deltas_t> >
416try_decode_deltas(
417 const record_group_header_t& header,
418 const ceph::bufferlist& md_bl,
419 paddr_t record_block_base)
420{
421 auto maybe_record_extent_infos = try_decode_extent_infos(header, md_bl);
422 if (!maybe_record_extent_infos) {
423 logger().debug(
424 "try_decode_deltas: failed, cannot decode extent_infos.");
425 return std::nullopt;
426 }
427
428 auto bliter = md_bl.cbegin();
429 bliter += ceph::encoded_sizeof_bounded<record_group_header_t>();
430 bliter += sizeof(checksum_t); /* metadata crc hole */
431 bliter += (ceph::encoded_sizeof_bounded<record_header_t>() *
432 maybe_record_extent_infos->size());
433 for (auto& r: *maybe_record_extent_infos) {
434 bliter += (ceph::encoded_sizeof_bounded<extent_info_t>() *
435 r.extent_infos.size());
436 }
437
438 std::vector<record_deltas_t> record_deltas(
439 maybe_record_extent_infos->size());
440 auto result_iter = record_deltas.begin();
441 for (auto& r: *maybe_record_extent_infos) {
442 result_iter->record_block_base = record_block_base;
443 result_iter->deltas.resize(r.header.deltas);
444 for (auto& i: result_iter->deltas) {
445 try {
446 decode(i, bliter);
447 } catch (ceph::buffer::error &e) {
448 logger().debug(
449 "try_decode_deltas: failed, "
450 "cannot decode delta_info_t, got {}.",
451 e);
452 return std::nullopt;
453 }
454 }
455 for (auto& i: r.extent_infos) {
456 auto& seg_addr = record_block_base.as_seg_paddr();
457 seg_addr.set_segment_off(seg_addr.get_segment_off() + i.len);
458 }
459 ++result_iter;
460 }
461 return record_deltas;
462}
463
464bool can_delay_allocation(device_type_t type) {
465 // Some types of device may not support delayed allocation, for example PMEM.
466 return type <= device_type_t::RANDOM_BLOCK;
467}
468
469device_type_t string_to_device_type(std::string type) {
470 if (type == "segmented") {
471 return device_type_t::SEGMENTED;
472 }
473 if (type == "random_block") {
474 return device_type_t::RANDOM_BLOCK;
475 }
476 if (type == "pmem") {
477 return device_type_t::PMEM;
478 }
479 return device_type_t::NONE;
480}
481
482std::ostream& operator<<(std::ostream& out, device_type_t t)
483{
484 switch (t) {
485 case device_type_t::NONE:
486 return out << "NONE";
487 case device_type_t::SEGMENTED:
488 return out << "SEGMENTED";
489 case device_type_t::RANDOM_BLOCK:
490 return out << "RANDOM_BLOCK";
491 case device_type_t::PMEM:
492 return out << "PMEM";
493 default:
494 return out << "INVALID_DEVICE_TYPE!";
495 }
496}
497
498paddr_t convert_blk_paddr_to_paddr(blk_paddr_t addr, size_t block_size,
499 uint32_t blocks_per_segment, device_id_t d_id)
500{
501 segment_id_t id = segment_id_t {
502 d_id,
503 (device_segment_id_t)(addr / (block_size * blocks_per_segment))
504 };
505 segment_off_t off = addr % (block_size * blocks_per_segment);
506 return paddr_t::make_seg_paddr(id, off);
507}
508
509blk_paddr_t convert_paddr_to_blk_paddr(paddr_t addr, size_t block_size,
510 uint32_t blocks_per_segment)
511{
512 seg_paddr_t& s = addr.as_seg_paddr();
513 return (blk_paddr_t)(s.get_segment_id().device_segment_id() *
514 (block_size * blocks_per_segment) + s.get_segment_off());
515}
516
517
f67539c2 518}