]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "crimson/os/seastore/seastore_types.h" | |
20effc67 TL |
5 | #include "crimson/common/log.h" |
6 | ||
7 | namespace { | |
8 | ||
9 | seastar::logger& logger() { | |
10 | return crimson::get_logger(ceph_subsys_seastore_tm); | |
11 | } | |
12 | ||
13 | } | |
f67539c2 TL |
14 | |
15 | namespace crimson::os::seastore { | |
16 | ||
20effc67 TL |
17 | std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta) |
18 | { | |
19 | return out << meta.seastore_id; | |
20 | } | |
21 | ||
f67539c2 TL |
22 | std::ostream &segment_to_stream(std::ostream &out, const segment_id_t &t) |
23 | { | |
24 | if (t == NULL_SEG_ID) | |
25 | return out << "NULL_SEG"; | |
f67539c2 TL |
26 | else if (t == FAKE_SEG_ID) |
27 | return out << "FAKE_SEG"; | |
28 | else | |
29 | return out << t; | |
30 | } | |
31 | ||
32 | std::ostream &offset_to_stream(std::ostream &out, const segment_off_t &t) | |
33 | { | |
34 | if (t == NULL_SEG_OFF) | |
35 | return out << "NULL_OFF"; | |
36 | else | |
37 | return out << t; | |
38 | } | |
39 | ||
20effc67 TL |
40 | std::ostream &operator<<(std::ostream &out, const segment_id_t& segment) |
41 | { | |
42 | return out << "[" << (uint64_t)segment.device_id() << "," | |
43 | << segment.device_segment_id() << "]"; | |
44 | } | |
45 | ||
f67539c2 TL |
46 | std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) |
47 | { | |
48 | out << "paddr_t<"; | |
20effc67 TL |
49 | if (rhs == P_ADDR_NULL) { |
50 | out << "NULL_PADDR"; | |
51 | } else if (rhs == P_ADDR_MIN) { | |
52 | out << "MIN_PADDR"; | |
53 | } else if (rhs.is_block_relative()) { | |
54 | out << "BLOCK_REG"; | |
55 | } else if (rhs.is_record_relative()) { | |
56 | out << "RECORD_REG"; | |
57 | } else if (rhs.get_device_id() == DEVICE_ID_DELAYED) { | |
58 | out << "DELAYED_TEMP"; | |
59 | } else if (rhs.get_addr_type() == addr_types_t::SEGMENT) { | |
60 | const seg_paddr_t& s = rhs.as_seg_paddr(); | |
61 | segment_to_stream(out, s.get_segment_id()); | |
62 | out << ", "; | |
63 | offset_to_stream(out, s.get_segment_off()); | |
64 | } else { | |
65 | out << "INVALID"; | |
66 | } | |
f67539c2 TL |
67 | return out << ">"; |
68 | } | |
69 | ||
70 | std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq) | |
71 | { | |
72 | return out << "journal_seq_t(segment_seq=" | |
73 | << seq.segment_seq << ", offset=" | |
74 | << seq.offset | |
75 | << ")"; | |
76 | } | |
77 | ||
78 | std::ostream &operator<<(std::ostream &out, extent_types_t t) | |
79 | { | |
80 | switch (t) { | |
81 | case extent_types_t::ROOT: | |
82 | return out << "ROOT"; | |
83 | case extent_types_t::LADDR_INTERNAL: | |
84 | return out << "LADDR_INTERNAL"; | |
85 | case extent_types_t::LADDR_LEAF: | |
86 | return out << "LADDR_LEAF"; | |
f67539c2 TL |
87 | case extent_types_t::ONODE_BLOCK_STAGED: |
88 | return out << "ONODE_BLOCK_STAGED"; | |
20effc67 TL |
89 | case extent_types_t::OMAP_INNER: |
90 | return out << "OMAP_INNER"; | |
91 | case extent_types_t::OMAP_LEAF: | |
92 | return out << "OMAP_LEAF"; | |
93 | case extent_types_t::COLL_BLOCK: | |
94 | return out << "COLL_BLOCK"; | |
95 | case extent_types_t::OBJECT_DATA_BLOCK: | |
96 | return out << "OBJECT_DATA_BLOCK"; | |
97 | case extent_types_t::RETIRED_PLACEHOLDER: | |
98 | return out << "RETIRED_PLACEHOLDER"; | |
f67539c2 TL |
99 | case extent_types_t::TEST_BLOCK: |
100 | return out << "TEST_BLOCK"; | |
101 | case extent_types_t::TEST_BLOCK_PHYSICAL: | |
102 | return out << "TEST_BLOCK_PHYSICAL"; | |
103 | case extent_types_t::NONE: | |
104 | return out << "NONE"; | |
105 | default: | |
106 | return out << "UNKNOWN"; | |
107 | } | |
108 | } | |
109 | ||
110 | std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs) | |
111 | { | |
112 | bool first = false; | |
113 | for (auto &i: rhs) { | |
114 | out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')'; | |
115 | first = true; | |
116 | } | |
117 | return out << ']'; | |
118 | } | |
119 | std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs) | |
120 | { | |
121 | bool first = false; | |
122 | for (auto &i: rhs) { | |
123 | out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')'; | |
124 | first = true; | |
125 | } | |
126 | return out << ']'; | |
127 | } | |
128 | ||
129 | std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs) | |
130 | { | |
131 | return lhs << "delta_info_t(" | |
132 | << "type: " << rhs.type | |
133 | << ", paddr: " << rhs.paddr | |
134 | << ", laddr: " << rhs.laddr | |
135 | << ", prev_crc: " << rhs.prev_crc | |
136 | << ", final_crc: " << rhs.final_crc | |
137 | << ", length: " << rhs.length | |
138 | << ", pversion: " << rhs.pversion | |
139 | << ")"; | |
140 | } | |
141 | ||
20effc67 TL |
142 | std::ostream &operator<<(std::ostream &out, const extent_info_t &info) |
143 | { | |
144 | return out << "extent_info_t(" | |
145 | << "type: " << info.type | |
146 | << ", addr: " << info.addr | |
147 | << ", len: " << info.len | |
148 | << ")"; | |
149 | } | |
150 | ||
151 | std::ostream &operator<<(std::ostream &out, const segment_header_t &header) | |
152 | { | |
153 | return out << "segment_header_t(" | |
154 | << "segment_seq=" << header.journal_segment_seq | |
155 | << ", physical_segment_id=" << header.physical_segment_id | |
156 | << ", journal_tail=" << header.journal_tail | |
157 | << ", segment_nonce=" << header.segment_nonce | |
158 | << ", out-of-line=" << header.out_of_line | |
159 | << ")"; | |
160 | } | |
161 | ||
162 | extent_len_t record_size_t::get_raw_mdlength() const | |
163 | { | |
164 | // empty record is allowed to submit | |
165 | return plain_mdlength + | |
166 | ceph::encoded_sizeof_bounded<record_header_t>(); | |
167 | } | |
168 | ||
169 | void record_size_t::account_extent(extent_len_t extent_len) | |
170 | { | |
171 | assert(extent_len); | |
172 | plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>(); | |
173 | dlength += extent_len; | |
174 | } | |
175 | ||
176 | void record_size_t::account(const delta_info_t& delta) | |
177 | { | |
178 | assert(delta.bl.length()); | |
179 | plain_mdlength += ceph::encoded_sizeof(delta); | |
180 | } | |
181 | ||
182 | extent_len_t record_group_size_t::get_raw_mdlength() const | |
183 | { | |
184 | return plain_mdlength + | |
185 | sizeof(checksum_t) + | |
186 | ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
187 | } | |
188 | ||
189 | void record_group_size_t::account( | |
190 | const record_size_t& rsize, | |
191 | extent_len_t _block_size) | |
192 | { | |
193 | // empty record is allowed to submit | |
194 | assert(_block_size > 0); | |
195 | assert(rsize.dlength % _block_size == 0); | |
196 | assert(block_size == 0 || block_size == _block_size); | |
197 | plain_mdlength += rsize.get_raw_mdlength(); | |
198 | dlength += rsize.dlength; | |
199 | block_size = _block_size; | |
200 | } | |
201 | ||
202 | ceph::bufferlist encode_record( | |
203 | record_t&& record, | |
204 | extent_len_t block_size, | |
205 | const journal_seq_t& committed_to, | |
206 | segment_nonce_t current_segment_nonce) | |
207 | { | |
208 | record_group_t record_group(std::move(record), block_size); | |
209 | return encode_records( | |
210 | record_group, | |
211 | committed_to, | |
212 | current_segment_nonce); | |
213 | } | |
214 | ||
215 | ceph::bufferlist encode_records( | |
216 | record_group_t& record_group, | |
217 | const journal_seq_t& committed_to, | |
218 | segment_nonce_t current_segment_nonce) | |
219 | { | |
220 | assert(record_group.size.block_size > 0); | |
221 | assert(record_group.records.size() > 0); | |
222 | ||
223 | bufferlist data_bl; | |
224 | for (auto& r: record_group.records) { | |
225 | for (auto& i: r.extents) { | |
226 | assert(i.bl.length()); | |
227 | data_bl.append(i.bl); | |
228 | } | |
229 | } | |
230 | ||
231 | bufferlist bl; | |
232 | record_group_header_t header{ | |
233 | static_cast<extent_len_t>(record_group.records.size()), | |
234 | record_group.size.get_mdlength(), | |
235 | record_group.size.dlength, | |
236 | current_segment_nonce, | |
237 | committed_to, | |
238 | data_bl.crc32c(-1) | |
239 | }; | |
240 | encode(header, bl); | |
241 | ||
242 | auto metadata_crc_filler = bl.append_hole(sizeof(checksum_t)); | |
243 | ||
244 | for (auto& r: record_group.records) { | |
245 | record_header_t rheader{ | |
246 | (extent_len_t)r.deltas.size(), | |
247 | (extent_len_t)r.extents.size(), | |
248 | }; | |
249 | encode(rheader, bl); | |
250 | } | |
251 | for (auto& r: record_group.records) { | |
252 | for (const auto& i: r.extents) { | |
253 | encode(extent_info_t(i), bl); | |
254 | } | |
255 | } | |
256 | for (auto& r: record_group.records) { | |
257 | for (const auto& i: r.deltas) { | |
258 | encode(i, bl); | |
259 | } | |
260 | } | |
261 | ceph_assert(bl.length() == record_group.size.get_raw_mdlength()); | |
262 | ||
263 | auto aligned_mdlength = record_group.size.get_mdlength(); | |
264 | if (bl.length() != aligned_mdlength) { | |
265 | assert(bl.length() < aligned_mdlength); | |
266 | bl.append_zero(aligned_mdlength - bl.length()); | |
267 | } | |
268 | ||
269 | auto bliter = bl.cbegin(); | |
270 | auto metadata_crc = bliter.crc32c( | |
271 | ceph::encoded_sizeof_bounded<record_group_header_t>(), | |
272 | -1); | |
273 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
274 | metadata_crc = bliter.crc32c( | |
275 | bliter.get_remaining(), | |
276 | metadata_crc); | |
277 | ceph_le32 metadata_crc_le; | |
278 | metadata_crc_le = metadata_crc; | |
279 | metadata_crc_filler.copy_in( | |
280 | sizeof(checksum_t), | |
281 | reinterpret_cast<const char *>(&metadata_crc_le)); | |
282 | ||
283 | bl.claim_append(data_bl); | |
284 | ceph_assert(bl.length() == record_group.size.get_encoded_length()); | |
285 | ||
286 | record_group.clear(); | |
287 | return bl; | |
288 | } | |
289 | ||
290 | std::optional<record_group_header_t> | |
291 | try_decode_records_header( | |
292 | const ceph::bufferlist& header_bl, | |
293 | segment_nonce_t expected_nonce) | |
294 | { | |
295 | auto bp = header_bl.cbegin(); | |
296 | record_group_header_t header; | |
297 | try { | |
298 | decode(header, bp); | |
299 | } catch (ceph::buffer::error &e) { | |
300 | logger().debug( | |
301 | "try_decode_records_header: failed, " | |
302 | "cannot decode record_group_header_t, got {}.", | |
303 | e); | |
304 | return std::nullopt; | |
305 | } | |
306 | if (header.segment_nonce != expected_nonce) { | |
307 | logger().debug( | |
308 | "try_decode_records_header: failed, record_group_header nonce mismatch, " | |
309 | "read {}, expected {}!", | |
310 | header.segment_nonce, | |
311 | expected_nonce); | |
312 | return std::nullopt; | |
313 | } | |
314 | return header; | |
315 | } | |
316 | ||
317 | bool validate_records_metadata( | |
318 | const ceph::bufferlist& md_bl) | |
319 | { | |
320 | auto bliter = md_bl.cbegin(); | |
321 | auto test_crc = bliter.crc32c( | |
322 | ceph::encoded_sizeof_bounded<record_group_header_t>(), | |
323 | -1); | |
324 | ceph_le32 recorded_crc_le; | |
325 | decode(recorded_crc_le, bliter); | |
326 | uint32_t recorded_crc = recorded_crc_le; | |
327 | test_crc = bliter.crc32c( | |
328 | bliter.get_remaining(), | |
329 | test_crc); | |
330 | bool success = (test_crc == recorded_crc); | |
331 | if (!success) { | |
332 | logger().debug("validate_records_metadata: failed, metadata crc mismatch."); | |
333 | } | |
334 | return success; | |
335 | } | |
336 | ||
337 | bool validate_records_data( | |
338 | const record_group_header_t& header, | |
339 | const ceph::bufferlist& data_bl) | |
340 | { | |
341 | bool success = (data_bl.crc32c(-1) == header.data_crc); | |
342 | if (!success) { | |
343 | logger().debug("validate_records_data: failed, data crc mismatch!"); | |
344 | } | |
345 | return success; | |
346 | } | |
347 | ||
348 | namespace { | |
349 | ||
350 | std::optional<std::vector<record_header_t>> | |
351 | try_decode_record_headers( | |
352 | const record_group_header_t& header, | |
353 | const ceph::bufferlist& md_bl) | |
354 | { | |
355 | auto bliter = md_bl.cbegin(); | |
356 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
357 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
358 | std::vector<record_header_t> record_headers(header.records); | |
359 | for (auto &&i: record_headers) { | |
360 | try { | |
361 | decode(i, bliter); | |
362 | } catch (ceph::buffer::error &e) { | |
363 | logger().debug( | |
364 | "try_decode_record_headers: failed, " | |
365 | "cannot decode record_header_t, got {}.", | |
366 | e); | |
367 | return std::nullopt; | |
368 | } | |
369 | } | |
370 | return record_headers; | |
371 | } | |
372 | ||
373 | } | |
374 | ||
375 | std::optional<std::vector<record_extent_infos_t> > | |
376 | try_decode_extent_infos( | |
377 | const record_group_header_t& header, | |
378 | const ceph::bufferlist& md_bl) | |
379 | { | |
380 | auto maybe_headers = try_decode_record_headers(header, md_bl); | |
381 | if (!maybe_headers) { | |
382 | logger().debug( | |
383 | "try_decode_extent_infos: failed, cannot decode record headers."); | |
384 | return std::nullopt; | |
385 | } | |
386 | ||
387 | auto bliter = md_bl.cbegin(); | |
388 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
389 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
390 | bliter += (ceph::encoded_sizeof_bounded<record_header_t>() * | |
391 | maybe_headers->size()); | |
392 | ||
393 | std::vector<record_extent_infos_t> record_extent_infos( | |
394 | maybe_headers->size()); | |
395 | auto result_iter = record_extent_infos.begin(); | |
396 | for (auto& h: *maybe_headers) { | |
397 | result_iter->header = h; | |
398 | result_iter->extent_infos.resize(h.extents); | |
399 | for (auto& i: result_iter->extent_infos) { | |
400 | try { | |
401 | decode(i, bliter); | |
402 | } catch (ceph::buffer::error &e) { | |
403 | logger().debug( | |
404 | "try_decode_extent_infos: failed, " | |
405 | "cannot decode extent_info_t, got {}.", | |
406 | e); | |
407 | return std::nullopt; | |
408 | } | |
409 | } | |
410 | ++result_iter; | |
411 | } | |
412 | return record_extent_infos; | |
413 | } | |
414 | ||
415 | std::optional<std::vector<record_deltas_t> > | |
416 | try_decode_deltas( | |
417 | const record_group_header_t& header, | |
418 | const ceph::bufferlist& md_bl, | |
419 | paddr_t record_block_base) | |
420 | { | |
421 | auto maybe_record_extent_infos = try_decode_extent_infos(header, md_bl); | |
422 | if (!maybe_record_extent_infos) { | |
423 | logger().debug( | |
424 | "try_decode_deltas: failed, cannot decode extent_infos."); | |
425 | return std::nullopt; | |
426 | } | |
427 | ||
428 | auto bliter = md_bl.cbegin(); | |
429 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
430 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
431 | bliter += (ceph::encoded_sizeof_bounded<record_header_t>() * | |
432 | maybe_record_extent_infos->size()); | |
433 | for (auto& r: *maybe_record_extent_infos) { | |
434 | bliter += (ceph::encoded_sizeof_bounded<extent_info_t>() * | |
435 | r.extent_infos.size()); | |
436 | } | |
437 | ||
438 | std::vector<record_deltas_t> record_deltas( | |
439 | maybe_record_extent_infos->size()); | |
440 | auto result_iter = record_deltas.begin(); | |
441 | for (auto& r: *maybe_record_extent_infos) { | |
442 | result_iter->record_block_base = record_block_base; | |
443 | result_iter->deltas.resize(r.header.deltas); | |
444 | for (auto& i: result_iter->deltas) { | |
445 | try { | |
446 | decode(i, bliter); | |
447 | } catch (ceph::buffer::error &e) { | |
448 | logger().debug( | |
449 | "try_decode_deltas: failed, " | |
450 | "cannot decode delta_info_t, got {}.", | |
451 | e); | |
452 | return std::nullopt; | |
453 | } | |
454 | } | |
455 | for (auto& i: r.extent_infos) { | |
456 | auto& seg_addr = record_block_base.as_seg_paddr(); | |
457 | seg_addr.set_segment_off(seg_addr.get_segment_off() + i.len); | |
458 | } | |
459 | ++result_iter; | |
460 | } | |
461 | return record_deltas; | |
462 | } | |
463 | ||
464 | bool can_delay_allocation(device_type_t type) { | |
465 | // Some types of device may not support delayed allocation, for example PMEM. | |
466 | return type <= device_type_t::RANDOM_BLOCK; | |
467 | } | |
468 | ||
469 | device_type_t string_to_device_type(std::string type) { | |
470 | if (type == "segmented") { | |
471 | return device_type_t::SEGMENTED; | |
472 | } | |
473 | if (type == "random_block") { | |
474 | return device_type_t::RANDOM_BLOCK; | |
475 | } | |
476 | if (type == "pmem") { | |
477 | return device_type_t::PMEM; | |
478 | } | |
479 | return device_type_t::NONE; | |
480 | } | |
481 | ||
482 | std::ostream& operator<<(std::ostream& out, device_type_t t) | |
483 | { | |
484 | switch (t) { | |
485 | case device_type_t::NONE: | |
486 | return out << "NONE"; | |
487 | case device_type_t::SEGMENTED: | |
488 | return out << "SEGMENTED"; | |
489 | case device_type_t::RANDOM_BLOCK: | |
490 | return out << "RANDOM_BLOCK"; | |
491 | case device_type_t::PMEM: | |
492 | return out << "PMEM"; | |
493 | default: | |
494 | return out << "INVALID_DEVICE_TYPE!"; | |
495 | } | |
496 | } | |
497 | ||
498 | paddr_t convert_blk_paddr_to_paddr(blk_paddr_t addr, size_t block_size, | |
499 | uint32_t blocks_per_segment, device_id_t d_id) | |
500 | { | |
501 | segment_id_t id = segment_id_t { | |
502 | d_id, | |
503 | (device_segment_id_t)(addr / (block_size * blocks_per_segment)) | |
504 | }; | |
505 | segment_off_t off = addr % (block_size * blocks_per_segment); | |
506 | return paddr_t::make_seg_paddr(id, off); | |
507 | } | |
508 | ||
509 | blk_paddr_t convert_paddr_to_blk_paddr(paddr_t addr, size_t block_size, | |
510 | uint32_t blocks_per_segment) | |
511 | { | |
512 | seg_paddr_t& s = addr.as_seg_paddr(); | |
513 | return (blk_paddr_t)(s.get_segment_id().device_segment_id() * | |
514 | (block_size * blocks_per_segment) + s.get_segment_off()); | |
515 | } | |
516 | ||
517 | ||
f67539c2 | 518 | } |