]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "crimson/os/seastore/seastore_types.h" | |
20effc67 TL |
5 | #include "crimson/common/log.h" |
6 | ||
7 | namespace { | |
8 | ||
1e59de90 TL |
9 | seastar::logger& journal_logger() { |
10 | return crimson::get_logger(ceph_subsys_seastore_journal); | |
20effc67 TL |
11 | } |
12 | ||
13 | } | |
f67539c2 TL |
14 | |
15 | namespace crimson::os::seastore { | |
16 | ||
1e59de90 TL |
17 | bool is_aligned(uint64_t offset, uint64_t alignment) |
18 | { | |
19 | return (offset % alignment) == 0; | |
20 | } | |
21 | ||
22 | std::ostream& operator<<(std::ostream &out, const omap_root_t &root) | |
23 | { | |
24 | return out << "omap_root{addr=" << root.addr | |
25 | << ", depth=" << root.depth | |
26 | << ", hint=" << root.hint | |
27 | << ", mutated=" << root.mutated | |
28 | << "}"; | |
29 | } | |
30 | ||
20effc67 TL |
31 | std::ostream& operator<<(std::ostream& out, const seastore_meta_t& meta) |
32 | { | |
33 | return out << meta.seastore_id; | |
34 | } | |
35 | ||
1e59de90 | 36 | std::ostream &operator<<(std::ostream &out, const device_id_printer_t &id) |
f67539c2 | 37 | { |
1e59de90 TL |
38 | auto _id = id.id; |
39 | if (_id == DEVICE_ID_NULL) { | |
40 | return out << "Dev(NULL)"; | |
41 | } else if (_id == DEVICE_ID_RECORD_RELATIVE) { | |
42 | return out << "Dev(RR)"; | |
43 | } else if (_id == DEVICE_ID_BLOCK_RELATIVE) { | |
44 | return out << "Dev(BR)"; | |
45 | } else if (_id == DEVICE_ID_DELAYED) { | |
46 | return out << "Dev(DELAYED)"; | |
47 | } else if (_id == DEVICE_ID_FAKE) { | |
48 | return out << "Dev(FAKE)"; | |
49 | } else if (_id == DEVICE_ID_ZERO) { | |
50 | return out << "Dev(ZERO)"; | |
51 | } else if (_id == DEVICE_ID_ROOT) { | |
52 | return out << "Dev(ROOT)"; | |
53 | } else { | |
54 | return out << "Dev(" << (unsigned)_id << ")"; | |
55 | } | |
56 | } | |
57 | ||
58 | std::ostream &operator<<(std::ostream &out, const segment_id_t &segment) | |
59 | { | |
60 | if (segment == NULL_SEG_ID) { | |
61 | return out << "Seg[NULL]"; | |
62 | } else { | |
63 | return out << "Seg[" << device_id_printer_t{segment.device_id()} | |
64 | << "," << segment.device_segment_id() | |
65 | << "]"; | |
66 | } | |
f67539c2 TL |
67 | } |
68 | ||
1e59de90 | 69 | std::ostream& operator<<(std::ostream& out, segment_type_t t) |
f67539c2 | 70 | { |
1e59de90 TL |
71 | switch(t) { |
72 | case segment_type_t::JOURNAL: | |
73 | return out << "JOURNAL"; | |
74 | case segment_type_t::OOL: | |
75 | return out << "OOL"; | |
76 | case segment_type_t::NULL_SEG: | |
77 | return out << "NULL_SEG"; | |
78 | default: | |
79 | return out << "INVALID_SEGMENT_TYPE!"; | |
80 | } | |
f67539c2 TL |
81 | } |
82 | ||
1e59de90 | 83 | std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq) |
20effc67 | 84 | { |
1e59de90 TL |
85 | if (seq.seq == NULL_SEG_SEQ) { |
86 | return out << "sseq(NULL)"; | |
87 | } else { | |
88 | return out << "sseq(" << seq.seq << ")"; | |
89 | } | |
20effc67 TL |
90 | } |
91 | ||
f67539c2 TL |
92 | std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) |
93 | { | |
1e59de90 TL |
94 | auto id = rhs.get_device_id(); |
95 | out << "paddr<"; | |
20effc67 | 96 | if (rhs == P_ADDR_NULL) { |
1e59de90 | 97 | out << "NULL"; |
20effc67 | 98 | } else if (rhs == P_ADDR_MIN) { |
1e59de90 TL |
99 | out << "MIN"; |
100 | } else if (rhs == P_ADDR_ZERO) { | |
101 | out << "ZERO"; | |
102 | } else if (has_device_off(id)) { | |
103 | auto &s = rhs.as_res_paddr(); | |
104 | out << device_id_printer_t{id} | |
105 | << "," | |
106 | << s.get_device_off(); | |
107 | } else if (rhs.get_addr_type() == paddr_types_t::SEGMENT) { | |
108 | auto &s = rhs.as_seg_paddr(); | |
109 | out << s.get_segment_id() | |
110 | << "," | |
111 | << s.get_segment_off(); | |
112 | } else if (rhs.get_addr_type() == paddr_types_t::RANDOM_BLOCK) { | |
113 | auto &s = rhs.as_blk_paddr(); | |
114 | out << device_id_printer_t{s.get_device_id()} | |
115 | << "," | |
116 | << s.get_device_off(); | |
20effc67 | 117 | } else { |
1e59de90 | 118 | out << "INVALID!"; |
20effc67 | 119 | } |
f67539c2 TL |
120 | return out << ">"; |
121 | } | |
122 | ||
1e59de90 TL |
123 | journal_seq_t journal_seq_t::add_offset( |
124 | journal_type_t type, | |
125 | device_off_t off, | |
126 | device_off_t roll_start, | |
127 | device_off_t roll_size) const | |
128 | { | |
129 | assert(offset.is_absolute()); | |
130 | assert(off <= DEVICE_OFF_MAX && off >= DEVICE_OFF_MIN); | |
131 | assert(roll_start >= 0); | |
132 | assert(roll_size > 0); | |
133 | ||
134 | segment_seq_t jseq = segment_seq; | |
135 | device_off_t joff; | |
136 | if (type == journal_type_t::SEGMENTED) { | |
137 | joff = offset.as_seg_paddr().get_segment_off(); | |
138 | } else { | |
139 | assert(type == journal_type_t::RANDOM_BLOCK); | |
140 | auto boff = offset.as_blk_paddr().get_device_off(); | |
141 | joff = boff; | |
142 | } | |
143 | auto roll_end = roll_start + roll_size; | |
144 | assert(joff >= roll_start); | |
145 | assert(joff <= roll_end); | |
146 | ||
147 | if (off >= 0) { | |
148 | device_off_t new_jseq = jseq + (off / roll_size); | |
149 | joff += (off % roll_size); | |
150 | if (joff >= roll_end) { | |
151 | ++new_jseq; | |
152 | joff -= roll_size; | |
153 | } | |
154 | assert(new_jseq < MAX_SEG_SEQ); | |
155 | jseq = static_cast<segment_seq_t>(new_jseq); | |
156 | } else { | |
157 | device_off_t mod = (-off) / roll_size; | |
158 | joff -= ((-off) % roll_size); | |
159 | if (joff < roll_start) { | |
160 | ++mod; | |
161 | joff += roll_size; | |
162 | } | |
163 | if (jseq >= mod) { | |
164 | jseq -= mod; | |
165 | } else { | |
166 | return JOURNAL_SEQ_MIN; | |
167 | } | |
168 | } | |
169 | assert(joff >= roll_start); | |
170 | assert(joff < roll_end); | |
171 | return journal_seq_t{jseq, make_block_relative_paddr(joff)}; | |
172 | } | |
173 | ||
174 | device_off_t journal_seq_t::relative_to( | |
175 | journal_type_t type, | |
176 | const journal_seq_t& r, | |
177 | device_off_t roll_start, | |
178 | device_off_t roll_size) const | |
179 | { | |
180 | assert(offset.is_absolute()); | |
181 | assert(r.offset.is_absolute()); | |
182 | assert(roll_start >= 0); | |
183 | assert(roll_size > 0); | |
184 | ||
185 | device_off_t ret = static_cast<device_off_t>(segment_seq) - r.segment_seq; | |
186 | ret *= roll_size; | |
187 | if (type == journal_type_t::SEGMENTED) { | |
188 | ret += (static_cast<device_off_t>(offset.as_seg_paddr().get_segment_off()) - | |
189 | static_cast<device_off_t>(r.offset.as_seg_paddr().get_segment_off())); | |
190 | } else { | |
191 | assert(type == journal_type_t::RANDOM_BLOCK); | |
192 | ret += offset.as_blk_paddr().get_device_off() - | |
193 | r.offset.as_blk_paddr().get_device_off(); | |
194 | } | |
195 | assert(ret <= DEVICE_OFF_MAX && ret >= DEVICE_OFF_MIN); | |
196 | return ret; | |
197 | } | |
198 | ||
f67539c2 TL |
199 | std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq) |
200 | { | |
1e59de90 TL |
201 | if (seq == JOURNAL_SEQ_NULL) { |
202 | return out << "JOURNAL_SEQ_NULL"; | |
203 | } else if (seq == JOURNAL_SEQ_MIN) { | |
204 | return out << "JOURNAL_SEQ_MIN"; | |
205 | } else { | |
206 | return out << "jseq(" | |
207 | << segment_seq_printer_t{seq.segment_seq} | |
208 | << ", " << seq.offset | |
209 | << ")"; | |
210 | } | |
f67539c2 TL |
211 | } |
212 | ||
213 | std::ostream &operator<<(std::ostream &out, extent_types_t t) | |
214 | { | |
215 | switch (t) { | |
216 | case extent_types_t::ROOT: | |
217 | return out << "ROOT"; | |
218 | case extent_types_t::LADDR_INTERNAL: | |
219 | return out << "LADDR_INTERNAL"; | |
220 | case extent_types_t::LADDR_LEAF: | |
221 | return out << "LADDR_LEAF"; | |
1e59de90 TL |
222 | case extent_types_t::DINK_LADDR_LEAF: |
223 | return out << "LADDR_LEAF"; | |
f67539c2 TL |
224 | case extent_types_t::ONODE_BLOCK_STAGED: |
225 | return out << "ONODE_BLOCK_STAGED"; | |
20effc67 TL |
226 | case extent_types_t::OMAP_INNER: |
227 | return out << "OMAP_INNER"; | |
228 | case extent_types_t::OMAP_LEAF: | |
229 | return out << "OMAP_LEAF"; | |
230 | case extent_types_t::COLL_BLOCK: | |
231 | return out << "COLL_BLOCK"; | |
232 | case extent_types_t::OBJECT_DATA_BLOCK: | |
233 | return out << "OBJECT_DATA_BLOCK"; | |
234 | case extent_types_t::RETIRED_PLACEHOLDER: | |
235 | return out << "RETIRED_PLACEHOLDER"; | |
f67539c2 TL |
236 | case extent_types_t::TEST_BLOCK: |
237 | return out << "TEST_BLOCK"; | |
238 | case extent_types_t::TEST_BLOCK_PHYSICAL: | |
239 | return out << "TEST_BLOCK_PHYSICAL"; | |
1e59de90 TL |
240 | case extent_types_t::BACKREF_INTERNAL: |
241 | return out << "BACKREF_INTERNAL"; | |
242 | case extent_types_t::BACKREF_LEAF: | |
243 | return out << "BACKREF_LEAF"; | |
f67539c2 TL |
244 | case extent_types_t::NONE: |
245 | return out << "NONE"; | |
246 | default: | |
247 | return out << "UNKNOWN"; | |
248 | } | |
249 | } | |
250 | ||
1e59de90 TL |
251 | std::ostream &operator<<(std::ostream &out, rewrite_gen_printer_t gen) |
252 | { | |
253 | if (gen.gen == NULL_GENERATION) { | |
254 | return out << "GEN_NULL"; | |
255 | } else if (gen.gen == INIT_GENERATION) { | |
256 | return out << "GEN_INIT"; | |
257 | } else if (gen.gen == INLINE_GENERATION) { | |
258 | return out << "GEN_INL"; | |
259 | } else if (gen.gen == OOL_GENERATION) { | |
260 | return out << "GEN_OOL"; | |
261 | } else if (gen.gen > REWRITE_GENERATIONS) { | |
262 | return out << "GEN_INVALID(" << (unsigned)gen.gen << ")!"; | |
263 | } else { | |
264 | return out << "GEN(" << (unsigned)gen.gen << ")"; | |
265 | } | |
266 | } | |
267 | ||
268 | std::ostream &operator<<(std::ostream &out, data_category_t c) | |
269 | { | |
270 | switch (c) { | |
271 | case data_category_t::METADATA: | |
272 | return out << "MD"; | |
273 | case data_category_t::DATA: | |
274 | return out << "DATA"; | |
275 | default: | |
276 | return out << "INVALID_CATEGORY!"; | |
277 | } | |
278 | } | |
279 | ||
280 | std::ostream &operator<<(std::ostream &out, sea_time_point_printer_t tp) | |
281 | { | |
282 | if (tp.tp == NULL_TIME) { | |
283 | return out << "tp(NULL)"; | |
284 | } | |
285 | auto time = seastar::lowres_system_clock::to_time_t(tp.tp); | |
286 | char buf[32]; | |
287 | std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&time)); | |
288 | return out << "tp(" << buf << ")"; | |
289 | } | |
290 | ||
291 | std::ostream &operator<<(std::ostream &out, mod_time_point_printer_t tp) { | |
292 | auto time = mod_to_timepoint(tp.tp); | |
293 | return out << "mod_" << sea_time_point_printer_t{time}; | |
294 | } | |
295 | ||
f67539c2 TL |
296 | std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs) |
297 | { | |
298 | bool first = false; | |
299 | for (auto &i: rhs) { | |
300 | out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')'; | |
301 | first = true; | |
302 | } | |
303 | return out << ']'; | |
304 | } | |
305 | std::ostream &operator<<(std::ostream &out, const paddr_list_t &rhs) | |
306 | { | |
307 | bool first = false; | |
308 | for (auto &i: rhs) { | |
309 | out << (first ? '[' : ',') << '(' << i.first << ',' << i.second << ')'; | |
310 | first = true; | |
311 | } | |
312 | return out << ']'; | |
313 | } | |
314 | ||
1e59de90 | 315 | std::ostream &operator<<(std::ostream &out, const delta_info_t &delta) |
f67539c2 | 316 | { |
1e59de90 TL |
317 | return out << "delta_info_t(" |
318 | << "type: " << delta.type | |
319 | << ", paddr: " << delta.paddr | |
320 | << ", laddr: " << delta.laddr | |
321 | << ", prev_crc: " << delta.prev_crc | |
322 | << ", final_crc: " << delta.final_crc | |
323 | << ", length: " << delta.length | |
324 | << ", pversion: " << delta.pversion | |
325 | << ", ext_seq: " << delta.ext_seq | |
326 | << ", seg_type: " << delta.seg_type | |
f67539c2 TL |
327 | << ")"; |
328 | } | |
329 | ||
1e59de90 TL |
330 | std::ostream &operator<<(std::ostream &out, const journal_tail_delta_t &delta) |
331 | { | |
332 | return out << "journal_tail_delta_t(" | |
333 | << "alloc_tail=" << delta.alloc_tail | |
334 | << ", dirty_tail=" << delta.dirty_tail | |
335 | << ")"; | |
336 | } | |
337 | ||
20effc67 TL |
338 | std::ostream &operator<<(std::ostream &out, const extent_info_t &info) |
339 | { | |
340 | return out << "extent_info_t(" | |
341 | << "type: " << info.type | |
342 | << ", addr: " << info.addr | |
343 | << ", len: " << info.len | |
344 | << ")"; | |
345 | } | |
346 | ||
347 | std::ostream &operator<<(std::ostream &out, const segment_header_t &header) | |
348 | { | |
349 | return out << "segment_header_t(" | |
1e59de90 TL |
350 | << header.physical_segment_id |
351 | << " " << header.type | |
352 | << " " << segment_seq_printer_t{header.segment_seq} | |
353 | << " " << header.category | |
354 | << " " << rewrite_gen_printer_t{header.generation} | |
355 | << ", dirty_tail=" << header.dirty_tail | |
356 | << ", alloc_tail=" << header.alloc_tail | |
357 | << ", segment_nonce=" << header.segment_nonce | |
358 | << ")"; | |
359 | } | |
360 | ||
361 | std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail) | |
362 | { | |
363 | return out << "segment_tail_t(" | |
364 | << tail.physical_segment_id | |
365 | << " " << tail.type | |
366 | << " " << segment_seq_printer_t{tail.segment_seq} | |
367 | << ", segment_nonce=" << tail.segment_nonce | |
368 | << ", modify_time=" << mod_time_point_printer_t{tail.modify_time} | |
369 | << ", num_extents=" << tail.num_extents | |
370 | << ")"; | |
20effc67 TL |
371 | } |
372 | ||
373 | extent_len_t record_size_t::get_raw_mdlength() const | |
374 | { | |
375 | // empty record is allowed to submit | |
376 | return plain_mdlength + | |
377 | ceph::encoded_sizeof_bounded<record_header_t>(); | |
378 | } | |
379 | ||
380 | void record_size_t::account_extent(extent_len_t extent_len) | |
381 | { | |
382 | assert(extent_len); | |
383 | plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>(); | |
384 | dlength += extent_len; | |
385 | } | |
386 | ||
387 | void record_size_t::account(const delta_info_t& delta) | |
388 | { | |
389 | assert(delta.bl.length()); | |
390 | plain_mdlength += ceph::encoded_sizeof(delta); | |
391 | } | |
392 | ||
1e59de90 TL |
393 | std::ostream &operator<<(std::ostream &os, transaction_type_t type) |
394 | { | |
395 | switch (type) { | |
396 | case transaction_type_t::MUTATE: | |
397 | return os << "MUTATE"; | |
398 | case transaction_type_t::READ: | |
399 | return os << "READ"; | |
400 | case transaction_type_t::TRIM_DIRTY: | |
401 | return os << "TRIM_DIRTY"; | |
402 | case transaction_type_t::TRIM_ALLOC: | |
403 | return os << "TRIM_ALLOC"; | |
404 | case transaction_type_t::CLEANER_MAIN: | |
405 | return os << "CLEANER_MAIN"; | |
406 | case transaction_type_t::CLEANER_COLD: | |
407 | return os << "CLEANER_COLD"; | |
408 | case transaction_type_t::MAX: | |
409 | return os << "TRANS_TYPE_NULL"; | |
410 | default: | |
411 | return os << "INVALID_TRANS_TYPE(" | |
412 | << static_cast<std::size_t>(type) | |
413 | << ")"; | |
414 | } | |
415 | } | |
416 | ||
417 | std::ostream &operator<<(std::ostream& out, const record_size_t& rsize) | |
418 | { | |
419 | return out << "record_size_t(" | |
420 | << "raw_md=" << rsize.get_raw_mdlength() | |
421 | << ", data=" << rsize.dlength | |
422 | << ")"; | |
423 | } | |
424 | ||
425 | std::ostream &operator<<(std::ostream& out, const record_t& r) | |
426 | { | |
427 | return out << "record_t(" | |
428 | << "type=" << r.type | |
429 | << ", num_extents=" << r.extents.size() | |
430 | << ", num_deltas=" << r.deltas.size() | |
431 | << ", modify_time=" << sea_time_point_printer_t{r.modify_time} | |
432 | << ")"; | |
433 | } | |
434 | ||
435 | std::ostream &operator<<(std::ostream& out, const record_header_t& r) | |
436 | { | |
437 | return out << "record_header_t(" | |
438 | << "type=" << r.type | |
439 | << ", num_extents=" << r.extents | |
440 | << ", num_deltas=" << r.deltas | |
441 | << ", modify_time=" << mod_time_point_printer_t{r.modify_time} | |
442 | << ")"; | |
443 | } | |
444 | ||
445 | std::ostream& operator<<(std::ostream& out, const record_group_header_t& h) | |
446 | { | |
447 | return out << "record_group_header_t(" | |
448 | << "num_records=" << h.records | |
449 | << ", mdlength=" << h.mdlength | |
450 | << ", dlength=" << h.dlength | |
451 | << ", nonce=" << h.segment_nonce | |
452 | << ", committed_to=" << h.committed_to | |
453 | << ", data_crc=" << h.data_crc | |
454 | << ")"; | |
455 | } | |
456 | ||
20effc67 TL |
457 | extent_len_t record_group_size_t::get_raw_mdlength() const |
458 | { | |
459 | return plain_mdlength + | |
460 | sizeof(checksum_t) + | |
461 | ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
462 | } | |
463 | ||
464 | void record_group_size_t::account( | |
465 | const record_size_t& rsize, | |
466 | extent_len_t _block_size) | |
467 | { | |
468 | // empty record is allowed to submit | |
469 | assert(_block_size > 0); | |
470 | assert(rsize.dlength % _block_size == 0); | |
471 | assert(block_size == 0 || block_size == _block_size); | |
472 | plain_mdlength += rsize.get_raw_mdlength(); | |
473 | dlength += rsize.dlength; | |
474 | block_size = _block_size; | |
475 | } | |
476 | ||
1e59de90 TL |
477 | std::ostream& operator<<(std::ostream& out, const record_group_size_t& size) |
478 | { | |
479 | return out << "record_group_size_t(" | |
480 | << "raw_md=" << size.get_raw_mdlength() | |
481 | << ", data=" << size.dlength | |
482 | << ", block_size=" << size.block_size | |
483 | << ", fullness=" << size.get_fullness() | |
484 | << ")"; | |
485 | } | |
486 | ||
487 | std::ostream& operator<<(std::ostream& out, const record_group_t& rg) | |
488 | { | |
489 | return out << "record_group_t(" | |
490 | << "num_records=" << rg.records.size() | |
491 | << ", " << rg.size | |
492 | << ")"; | |
493 | } | |
494 | ||
20effc67 TL |
495 | ceph::bufferlist encode_record( |
496 | record_t&& record, | |
497 | extent_len_t block_size, | |
498 | const journal_seq_t& committed_to, | |
499 | segment_nonce_t current_segment_nonce) | |
500 | { | |
501 | record_group_t record_group(std::move(record), block_size); | |
502 | return encode_records( | |
503 | record_group, | |
504 | committed_to, | |
505 | current_segment_nonce); | |
506 | } | |
507 | ||
508 | ceph::bufferlist encode_records( | |
509 | record_group_t& record_group, | |
510 | const journal_seq_t& committed_to, | |
511 | segment_nonce_t current_segment_nonce) | |
512 | { | |
513 | assert(record_group.size.block_size > 0); | |
514 | assert(record_group.records.size() > 0); | |
515 | ||
516 | bufferlist data_bl; | |
517 | for (auto& r: record_group.records) { | |
518 | for (auto& i: r.extents) { | |
519 | assert(i.bl.length()); | |
520 | data_bl.append(i.bl); | |
521 | } | |
522 | } | |
523 | ||
524 | bufferlist bl; | |
525 | record_group_header_t header{ | |
526 | static_cast<extent_len_t>(record_group.records.size()), | |
527 | record_group.size.get_mdlength(), | |
528 | record_group.size.dlength, | |
529 | current_segment_nonce, | |
530 | committed_to, | |
531 | data_bl.crc32c(-1) | |
532 | }; | |
533 | encode(header, bl); | |
534 | ||
535 | auto metadata_crc_filler = bl.append_hole(sizeof(checksum_t)); | |
536 | ||
537 | for (auto& r: record_group.records) { | |
538 | record_header_t rheader{ | |
1e59de90 | 539 | r.type, |
20effc67 TL |
540 | (extent_len_t)r.deltas.size(), |
541 | (extent_len_t)r.extents.size(), | |
1e59de90 | 542 | timepoint_to_mod(r.modify_time) |
20effc67 TL |
543 | }; |
544 | encode(rheader, bl); | |
545 | } | |
546 | for (auto& r: record_group.records) { | |
547 | for (const auto& i: r.extents) { | |
548 | encode(extent_info_t(i), bl); | |
549 | } | |
550 | } | |
551 | for (auto& r: record_group.records) { | |
552 | for (const auto& i: r.deltas) { | |
553 | encode(i, bl); | |
554 | } | |
555 | } | |
556 | ceph_assert(bl.length() == record_group.size.get_raw_mdlength()); | |
557 | ||
558 | auto aligned_mdlength = record_group.size.get_mdlength(); | |
559 | if (bl.length() != aligned_mdlength) { | |
560 | assert(bl.length() < aligned_mdlength); | |
561 | bl.append_zero(aligned_mdlength - bl.length()); | |
562 | } | |
563 | ||
564 | auto bliter = bl.cbegin(); | |
565 | auto metadata_crc = bliter.crc32c( | |
566 | ceph::encoded_sizeof_bounded<record_group_header_t>(), | |
567 | -1); | |
568 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
569 | metadata_crc = bliter.crc32c( | |
570 | bliter.get_remaining(), | |
571 | metadata_crc); | |
572 | ceph_le32 metadata_crc_le; | |
573 | metadata_crc_le = metadata_crc; | |
574 | metadata_crc_filler.copy_in( | |
575 | sizeof(checksum_t), | |
576 | reinterpret_cast<const char *>(&metadata_crc_le)); | |
577 | ||
578 | bl.claim_append(data_bl); | |
579 | ceph_assert(bl.length() == record_group.size.get_encoded_length()); | |
580 | ||
581 | record_group.clear(); | |
582 | return bl; | |
583 | } | |
584 | ||
585 | std::optional<record_group_header_t> | |
586 | try_decode_records_header( | |
587 | const ceph::bufferlist& header_bl, | |
588 | segment_nonce_t expected_nonce) | |
589 | { | |
590 | auto bp = header_bl.cbegin(); | |
591 | record_group_header_t header; | |
592 | try { | |
593 | decode(header, bp); | |
594 | } catch (ceph::buffer::error &e) { | |
1e59de90 | 595 | journal_logger().debug( |
20effc67 TL |
596 | "try_decode_records_header: failed, " |
597 | "cannot decode record_group_header_t, got {}.", | |
1e59de90 | 598 | e.what()); |
20effc67 TL |
599 | return std::nullopt; |
600 | } | |
601 | if (header.segment_nonce != expected_nonce) { | |
1e59de90 | 602 | journal_logger().debug( |
20effc67 TL |
603 | "try_decode_records_header: failed, record_group_header nonce mismatch, " |
604 | "read {}, expected {}!", | |
605 | header.segment_nonce, | |
606 | expected_nonce); | |
607 | return std::nullopt; | |
608 | } | |
609 | return header; | |
610 | } | |
611 | ||
612 | bool validate_records_metadata( | |
613 | const ceph::bufferlist& md_bl) | |
614 | { | |
615 | auto bliter = md_bl.cbegin(); | |
616 | auto test_crc = bliter.crc32c( | |
617 | ceph::encoded_sizeof_bounded<record_group_header_t>(), | |
618 | -1); | |
619 | ceph_le32 recorded_crc_le; | |
620 | decode(recorded_crc_le, bliter); | |
621 | uint32_t recorded_crc = recorded_crc_le; | |
622 | test_crc = bliter.crc32c( | |
623 | bliter.get_remaining(), | |
624 | test_crc); | |
625 | bool success = (test_crc == recorded_crc); | |
626 | if (!success) { | |
1e59de90 TL |
627 | journal_logger().debug( |
628 | "validate_records_metadata: failed, metadata crc mismatch."); | |
20effc67 TL |
629 | } |
630 | return success; | |
631 | } | |
632 | ||
633 | bool validate_records_data( | |
634 | const record_group_header_t& header, | |
635 | const ceph::bufferlist& data_bl) | |
636 | { | |
637 | bool success = (data_bl.crc32c(-1) == header.data_crc); | |
638 | if (!success) { | |
1e59de90 TL |
639 | journal_logger().debug( |
640 | "validate_records_data: failed, data crc mismatch!"); | |
20effc67 TL |
641 | } |
642 | return success; | |
643 | } | |
644 | ||
20effc67 TL |
645 | std::optional<std::vector<record_header_t>> |
646 | try_decode_record_headers( | |
647 | const record_group_header_t& header, | |
648 | const ceph::bufferlist& md_bl) | |
649 | { | |
650 | auto bliter = md_bl.cbegin(); | |
651 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
652 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
653 | std::vector<record_header_t> record_headers(header.records); | |
654 | for (auto &&i: record_headers) { | |
655 | try { | |
656 | decode(i, bliter); | |
657 | } catch (ceph::buffer::error &e) { | |
1e59de90 | 658 | journal_logger().debug( |
20effc67 TL |
659 | "try_decode_record_headers: failed, " |
660 | "cannot decode record_header_t, got {}.", | |
1e59de90 | 661 | e.what()); |
20effc67 TL |
662 | return std::nullopt; |
663 | } | |
664 | } | |
665 | return record_headers; | |
666 | } | |
667 | ||
20effc67 TL |
668 | std::optional<std::vector<record_extent_infos_t> > |
669 | try_decode_extent_infos( | |
670 | const record_group_header_t& header, | |
671 | const ceph::bufferlist& md_bl) | |
672 | { | |
673 | auto maybe_headers = try_decode_record_headers(header, md_bl); | |
674 | if (!maybe_headers) { | |
20effc67 TL |
675 | return std::nullopt; |
676 | } | |
677 | ||
678 | auto bliter = md_bl.cbegin(); | |
679 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
680 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
681 | bliter += (ceph::encoded_sizeof_bounded<record_header_t>() * | |
682 | maybe_headers->size()); | |
683 | ||
684 | std::vector<record_extent_infos_t> record_extent_infos( | |
685 | maybe_headers->size()); | |
686 | auto result_iter = record_extent_infos.begin(); | |
687 | for (auto& h: *maybe_headers) { | |
688 | result_iter->header = h; | |
689 | result_iter->extent_infos.resize(h.extents); | |
690 | for (auto& i: result_iter->extent_infos) { | |
691 | try { | |
692 | decode(i, bliter); | |
693 | } catch (ceph::buffer::error &e) { | |
1e59de90 | 694 | journal_logger().debug( |
20effc67 TL |
695 | "try_decode_extent_infos: failed, " |
696 | "cannot decode extent_info_t, got {}.", | |
1e59de90 | 697 | e.what()); |
20effc67 TL |
698 | return std::nullopt; |
699 | } | |
700 | } | |
701 | ++result_iter; | |
702 | } | |
703 | return record_extent_infos; | |
704 | } | |
705 | ||
706 | std::optional<std::vector<record_deltas_t> > | |
707 | try_decode_deltas( | |
708 | const record_group_header_t& header, | |
709 | const ceph::bufferlist& md_bl, | |
710 | paddr_t record_block_base) | |
711 | { | |
712 | auto maybe_record_extent_infos = try_decode_extent_infos(header, md_bl); | |
713 | if (!maybe_record_extent_infos) { | |
20effc67 TL |
714 | return std::nullopt; |
715 | } | |
716 | ||
717 | auto bliter = md_bl.cbegin(); | |
718 | bliter += ceph::encoded_sizeof_bounded<record_group_header_t>(); | |
719 | bliter += sizeof(checksum_t); /* metadata crc hole */ | |
720 | bliter += (ceph::encoded_sizeof_bounded<record_header_t>() * | |
721 | maybe_record_extent_infos->size()); | |
722 | for (auto& r: *maybe_record_extent_infos) { | |
723 | bliter += (ceph::encoded_sizeof_bounded<extent_info_t>() * | |
724 | r.extent_infos.size()); | |
725 | } | |
726 | ||
727 | std::vector<record_deltas_t> record_deltas( | |
728 | maybe_record_extent_infos->size()); | |
729 | auto result_iter = record_deltas.begin(); | |
730 | for (auto& r: *maybe_record_extent_infos) { | |
731 | result_iter->record_block_base = record_block_base; | |
732 | result_iter->deltas.resize(r.header.deltas); | |
733 | for (auto& i: result_iter->deltas) { | |
734 | try { | |
1e59de90 TL |
735 | decode(i.second, bliter); |
736 | i.first = mod_to_timepoint(r.header.modify_time); | |
20effc67 | 737 | } catch (ceph::buffer::error &e) { |
1e59de90 | 738 | journal_logger().debug( |
20effc67 TL |
739 | "try_decode_deltas: failed, " |
740 | "cannot decode delta_info_t, got {}.", | |
1e59de90 | 741 | e.what()); |
20effc67 TL |
742 | return std::nullopt; |
743 | } | |
744 | } | |
745 | for (auto& i: r.extent_infos) { | |
1e59de90 | 746 | record_block_base = record_block_base.add_offset(i.len); |
20effc67 TL |
747 | } |
748 | ++result_iter; | |
749 | } | |
750 | return record_deltas; | |
751 | } | |
752 | ||
1e59de90 TL |
753 | std::ostream& operator<<(std::ostream& out, placement_hint_t h) |
754 | { | |
755 | switch (h) { | |
756 | case placement_hint_t::HOT: | |
757 | return out << "Hint(HOT)"; | |
758 | case placement_hint_t::COLD: | |
759 | return out << "Hint(COLD)"; | |
760 | case placement_hint_t::REWRITE: | |
761 | return out << "Hint(REWRITE)"; | |
762 | case PLACEMENT_HINT_NULL: | |
763 | return out << "Hint(NULL)"; | |
764 | default: | |
765 | return out << "INVALID_PLACEMENT_HINT_TYPE!"; | |
766 | } | |
767 | } | |
768 | ||
20effc67 TL |
769 | bool can_delay_allocation(device_type_t type) { |
770 | // Some types of device may not support delayed allocation, for example PMEM. | |
1e59de90 TL |
771 | // All types of device currently support delayed allocation. |
772 | return true; | |
20effc67 TL |
773 | } |
774 | ||
775 | device_type_t string_to_device_type(std::string type) { | |
1e59de90 TL |
776 | if (type == "HDD") { |
777 | return device_type_t::HDD; | |
20effc67 | 778 | } |
1e59de90 TL |
779 | if (type == "SSD") { |
780 | return device_type_t::SSD; | |
20effc67 | 781 | } |
1e59de90 TL |
782 | if (type == "ZNS") { |
783 | return device_type_t::ZNS; | |
784 | } | |
785 | if (type == "RANDOM_BLOCK_SSD") { | |
786 | return device_type_t::RANDOM_BLOCK_SSD; | |
20effc67 TL |
787 | } |
788 | return device_type_t::NONE; | |
789 | } | |
790 | ||
791 | std::ostream& operator<<(std::ostream& out, device_type_t t) | |
792 | { | |
793 | switch (t) { | |
794 | case device_type_t::NONE: | |
795 | return out << "NONE"; | |
1e59de90 TL |
796 | case device_type_t::HDD: |
797 | return out << "HDD"; | |
798 | case device_type_t::SSD: | |
799 | return out << "SSD"; | |
800 | case device_type_t::ZNS: | |
801 | return out << "ZNS"; | |
802 | case device_type_t::EPHEMERAL_COLD: | |
803 | return out << "EPHEMERAL_COLD"; | |
804 | case device_type_t::EPHEMERAL_MAIN: | |
805 | return out << "EPHEMERAL_MAIN"; | |
806 | case device_type_t::RANDOM_BLOCK_SSD: | |
807 | return out << "RANDOM_BLOCK_SSD"; | |
808 | case device_type_t::RANDOM_BLOCK_EPHEMERAL: | |
809 | return out << "RANDOM_BLOCK_EPHEMERAL"; | |
20effc67 TL |
810 | default: |
811 | return out << "INVALID_DEVICE_TYPE!"; | |
812 | } | |
813 | } | |
814 | ||
1e59de90 TL |
815 | std::ostream& operator<<(std::ostream& out, backend_type_t btype) { |
816 | if (btype == backend_type_t::SEGMENTED) { | |
817 | return out << "SEGMENTED"; | |
818 | } else { | |
819 | return out << "RANDOM_BLOCK"; | |
820 | } | |
821 | } | |
822 | ||
823 | std::ostream& operator<<(std::ostream& out, const write_result_t& w) | |
20effc67 | 824 | { |
1e59de90 TL |
825 | return out << "write_result_t(" |
826 | << "start=" << w.start_seq | |
827 | << ", length=" << w.length | |
828 | << ")"; | |
20effc67 TL |
829 | } |
830 | ||
1e59de90 | 831 | std::ostream& operator<<(std::ostream& out, const record_locator_t& l) |
20effc67 | 832 | { |
1e59de90 TL |
833 | return out << "record_locator_t(" |
834 | << "block_base=" << l.record_block_base | |
835 | << ", " << l.write_result | |
836 | << ")"; | |
20effc67 TL |
837 | } |
838 | ||
1e59de90 TL |
839 | void scan_valid_records_cursor::emplace_record_group( |
840 | const record_group_header_t& header, ceph::bufferlist&& md_bl) | |
841 | { | |
842 | auto new_committed_to = header.committed_to; | |
843 | ceph_assert(last_committed == JOURNAL_SEQ_NULL || | |
844 | last_committed <= new_committed_to); | |
845 | last_committed = new_committed_to; | |
846 | pending_record_groups.emplace_back( | |
847 | seq.offset, | |
848 | header, | |
849 | std::move(md_bl)); | |
850 | increment_seq(header.dlength + header.mdlength); | |
851 | ceph_assert(new_committed_to == JOURNAL_SEQ_NULL || | |
852 | new_committed_to < seq); | |
853 | } | |
854 | ||
855 | std::ostream& operator<<(std::ostream& out, const scan_valid_records_cursor& c) | |
856 | { | |
857 | return out << "cursor(last_valid_header_found=" << c.last_valid_header_found | |
858 | << ", seq=" << c.seq | |
859 | << ", last_committed=" << c.last_committed | |
860 | << ", pending_record_groups=" << c.pending_record_groups.size() | |
861 | << ", num_consumed_records=" << c.num_consumed_records | |
862 | << ")"; | |
863 | } | |
20effc67 | 864 | |
f67539c2 | 865 | } |